1 module navm.bytecode;
2 
3 import navm.defs;
4 
5 import utils.lists;
6 import utils.misc;
7 
8 import std.conv : to;
9 
10 /// Class used for storing/constructing bytecode
11 public class NaBytecode{
12 private:
13 	/// where the bytecode is actually stored
14 	string[] _instructions;
15 	string[] _arguments;
16 	/// stores number of elements sitting on stack right now after the last added instruction would be executed
17 	uinteger _stackLength;
18 	/// stores max number of elements sitting on stack at any time
19 	uinteger _stackLengthMax;
20 	/// stores any elements that have been "bookmarked". Useful for keeping track of elements during constructing byte code
21 	uinteger[uinteger] _bookmarks;
22 	/// stores the instruction table
23 	NaInstruction[] _instructionTable;
24 public:
25 	/// constructor
26 	this(NaInstruction[] instructionTable){
27 		_instructionTable = instructionTable.dup;
28 	}
29 	~this(){
30 		// nothing to do
31 	}
32 	/// Returns: true if a NaInstruction exists in instruction table
33 	bool hasInstruction(string name, ref NaInstruction instruction){
34 		foreach (inst; _instructionTable){
35 			if (name.lowercase == inst.name){
36 				instruction = inst;
37 				return true;
38 			}
39 		}
40 		return false;
41 	}
42 	/// ditto
43 	bool hasInstruction(string name){
44 		NaInstruction dummy;
45 		return hasInstruction(name, dummy);
46 	}
47 	/// adds an instruction to the instruction table
48 	/// 
49 	/// Returns: true if it was added, false if not due to name or code already used
50 	bool addInstructionToTable(NaInstruction instruction){
51 		foreach (inst; _instructionTable){
52 			if (inst.name == instruction.name || inst.code == instruction.code)
53 				return false;
54 		}
55 		_instructionTable ~= instruction;
56 		return true;
57 	}
58 	/// goes over bytecode, checks if there are any errors, and converts jump positions to indexes
59 	/// i.e: makes the byte code a bit more ready for execution
60 	/// 
61 	/// Returns: errors in a string[], or an empty array in case no errors
62 	string[] resolve(){
63 		string[] errors;
64 		uinteger[string] jumpPos;
65 		uinteger instCount = 0;
66 		for (uinteger i=0, instIndex=0; i < _instructions.length; i ++){
67 			string name = _instructions[i];
68 			if (name.length && name[$-1] == ':'){
69 				name = name[0 .. $-1];
70 				if (name.lowercase in jumpPos){
71 					errors ~= "line#"~(i+1).to!string~' '~name~" as jump postion declared multiple times";
72 					continue;
73 				}
74 				if (name.isNum(true)){
75 					errors ~= "line#"~(i+1).to!string~' '~name~" is an invalid jump position, cannot be digits only.";
76 					continue;
77 				}
78 				jumpPos[name.lowercase] = instIndex;
79 				continue;
80 			}
81 			instIndex ++;
82 			instCount = instIndex;
83 		}
84 		for (uinteger i=0, writeIndex=0; i < _instructions.length; i ++){
85 			string name = _instructions[i];
86 			if (name.length && name[$-1] == ':')
87 				continue;
88 			if (writeIndex != i){
89 				_instructions[writeIndex] = name;
90 				_arguments[writeIndex] = _arguments[i];
91 			}
92 			NaInstruction instInfo;
93 			if (this.hasInstruction(name, instInfo)){
94 				if (instInfo.needsArg && !_arguments[i].length)
95 					errors ~= "line#"~(i+1).to!string~' '~name~"  needs argument";
96 				if (instInfo.argIsJumpPos){
97 					string arg = _arguments[i].lowercase;
98 					if (arg.isNum(false)){ // skip it if its an integer already
99 						_arguments[writeIndex] = arg;
100 					}else if (jumpPos.keys.hasElement(arg))
101 						_arguments[writeIndex] = jumpPos[arg].to!string;
102 					else
103 						errors ~= "line#"~(i+1).to!string~' '~arg~" is not a valid jump position";
104 				}
105 			}else
106 				errors ~= "line#"~(i+1).to!string~" instruction does not exist";
107 			writeIndex ++;
108 		}
109 		_instructions.length = instCount;
110 		_arguments.length = instCount;
111 		return errors;
112 	}
113 	/// Returns: the bytecode in a readable format
114 	string[] getBytecodePretty(){
115 		string[] r;
116 		r.length = _instructions.length;
117 		foreach (i, inst; _instructions){
118 			r[i] = _instructions[i];
119 			if (_arguments[i].length)
120 				r[i] ~= "\t\t" ~ _arguments[i];
121 		}
122 		return r;
123 	}
124 	/// Call `resolve` before this or prepare for crashes
125 	/// 
126 	/// Returns: pointers for all instructions
127 	void delegate()[] getBytecodePointers(){
128 		void delegate()[] r;
129 		r.length = _instructions.length;
130 		foreach (i, inst; _instructions){
131 			NaInstruction instInfo;
132 			hasInstruction(inst, instInfo);
133 			r[i] = instInfo.pointer;
134 		}
135 		return r;
136 	}
137 	/// Call `resolve` before this or prepare for crashes
138 	/// 
139 	/// Returns: codes for all instructions
140 	ushort[] getBytecodeCodes(){
141 		ushort[] r;
142 		r.length = _instructions.length;
143 		foreach (i, inst; _instructions){
144 			NaInstruction instInfo;
145 			hasInstruction(inst, instInfo);
146 			r[i] = instInfo.code;
147 		}
148 		return r;
149 	}
150 	/// Call `resolve` before this.
151 	/// 
152 	/// Returns: arguments for each instruction NaData[]
153 	/// 
154 	/// Throws: Exception in case of error in argument
155 	NaData[] getArgumentsNaData(){
156 		NaData[] r;
157 		r.length = _arguments.length;
158 		foreach (i, arg; _arguments){
159 			try{
160 				r[i] = readData(arg);
161 			}catch (Exception e){
162 				e.msg = "Line#"~(i+1).to!string~' '~e.msg;
163 				throw e;
164 			}
165 		}
166 		return r;
167 	}
168 	/// Reads from a string[] (follows spec/syntax.md)
169 	/// 
170 	/// Returns: errors in a string[], or [] if no errors
171 	string[] readByteCode(string[] input){
172 		string[] errors;
173 		immutable string[][] words = cast(immutable string[][])input.removeWhitespace.readWords();
174 		foreach (i, lineWords; words){
175 			if (!lineWords.length)
176 				continue;
177 			if (lineWords[0].length){
178 				if (lineWords[0][$-1] == ':'){
179 					this.addJumpPos(lineWords[0][0..$-1]);
180 					continue;
181 				}
182 				string error = "";
183 				if (!this.addInstruction(lineWords[0],
184 					lineWords.length>1 && lineWords[1].length ? lineWords[1] : "", error))
185 					errors ~= "line#"~(i+1).to!string~':'~error;
186 			}
187 		}
188 		return errors;
189 	}
190 	// functions for generating byte code
191 	
192 	/// appends an instruction
193 	/// 
194 	/// Returns: true if successful, false if not (writes error in `error`)
195 	bool addInstruction(string instName, string argument, ref string error){
196 		import navm.bytecode : removeWhitespace;
197 		NaInstruction inst;
198 		if (hasInstruction(instName, inst)){
199 			if (inst.needsArg && !removeWhitespace(argument).length){
200 				error = "instruction needs an argument";
201 				return false;
202 			}
203 			NaData arg;
204 			if (inst.needsArg && !inst.argIsJumpPos){
205 				try{
206 					arg = readData(argument);
207 				}catch (Exception e){
208 					error = "invalid argument: "~e.msg;
209 					return false;
210 				}
211 				if (_stackLength < inst.popCount(arg)){
212 					error = "stack does not have enough elements for instruction";
213 					return false;
214 				}
215 			}
216 			_instructions ~= instName;
217 			_arguments ~= argument;
218 			_stackLength -= inst.popCount(arg);
219 			_stackLength += inst.pushCount;
220 			_stackLengthMax = _stackLength > _stackLengthMax ? _stackLength : _stackLengthMax;
221 			return true;
222 		}
223 		error = "instruction "~instName~" does not exist";
224 		return false;
225 	}
226 	/// ditto
227 	bool addInstruction(string instName, string argument){
228 		string dummy;
229 		return addInstruction(instName, argument, dummy);
230 	}
231 	/// ditto
232 	bool addInstruction(string instName){
233 		string dummy;
234 		return addInstruction(instName, "", dummy);
235 	}
236 	/// adds a jump position
237 	void addJumpPos(string name){
238 		_instructions ~= name~':';
239 		_arguments ~= "";
240 	}
241 	/// Returns: the number of elements on stack after executing the last added instruction
242 	@property uinteger elementCount(){
243 		return _stackLength;
244 	}
245 	/// adds a "bookmark" to the last element on stack, so later on, relative to current peek index, bookmark index
246 	/// can be read.
247 	/// 
248 	/// Returns: bookmark id, or -1 if stack empty
249 	integer addBookmark(){
250 		if (_stackLength == 0)
251 			return -1;
252 		integer bookmarkId;
253 		for (bookmarkId = 0; bookmarkId <= integer.max; bookmarkId ++)
254 			if (bookmarkId !in _bookmarks)
255 				break;
256 		_bookmarks[bookmarkId] = _stackLength-1;
257 		return bookmarkId;
258 	}
259 	/// removes a bookmark
260 	/// 
261 	/// Returns: true if successful, false if does not exists
262 	bool removeBookmark(uinteger id){
263 		if (id !in _bookmarks)
264 			return false;
265 		_bookmarks.remove(id);
266 		return true;
267 	}
268 	/// gets relative index from current stack index to a bookmark
269 	/// 
270 	/// Returns: relative index, or integer.max if bookmark does not exist
271 	integer bookmarkRelIndex(uinteger id){
272 		if (id !in _bookmarks)
273 			return integer.max;
274 		return _stackLength.to!integer - (_bookmarks[id]+1).to!integer;
275 	}
276 }
277 
278 /// stores an data about available instruction
279 public struct NaInstruction{
280 	bool argIsJumpPos = false; /// if the argument to this instruction is a jump position
281 	string name; /// name of instruction, in lowercase
282 	ushort code = 0x0000; /// value when read as a ubyte
283 	bool needsArg; /// if this instruction needs an argument
284 	ubyte pushCount = 0; /// number of elements it will push to stack
285 	private ubyte _popCount = 0; /// number of elements it will pop (if ==255, then the argument is the number of elements to pop)
286 	void delegate() pointer; /// pointer to the delegate behind this instruction
287 	/// Returns: number of elements it will pop
288 	ubyte popCount(NaData arg){
289 		if (_popCount < 255)
290 			return _popCount;
291 		return cast(ubyte)(arg.intVal);
292 	}
293 	/// constructor, for instruction with no arg, no push/pop
294 	this (string name, integer code, void delegate() pointer){
295 		this.name = name.lowercase;
296 		this.code = cast(ushort)code;
297 		this.pointer = pointer;
298 		this.needsArg = false;
299 		this.pushCount = 0;
300 		this._popCount = 0;
301 	}
302 	/// constructor, for instruction with no arg, but pop and push
303 	this(string name, integer code, integer popCount, integer pushCount, void delegate() pointer){
304 		this.name = name.lowercase;
305 		this.code = cast(ushort)code;
306 		this.needsArg = false;
307 		this.pushCount = cast(ubyte)pushCount;
308 		this._popCount = cast(ubyte)popCount;
309 		this.pointer = pointer;
310 	}
311 	/// full constructor but arg is not jump position
312 	this (string name, integer code, bool needsArg, integer popCount, integer pushCount, void delegate() pointer){
313 		this.name = name.lowercase;
314 		this.code = cast(ushort)code;
315 		this.needsArg = needsArg;
316 		this.argIsJumpPos = false;
317 		this.pushCount = cast(ubyte)pushCount;
318 		this._popCount = cast(ubyte)popCount;
319 		this.pointer = pointer;
320 	}
321 	/// full constructor
322 	this (string name, integer code, bool needsArg, bool argIsJumpPos, integer popCount, integer pushCount, void delegate() pointer){
323 		this.name = name.lowercase;
324 		this.code = cast(ushort)code;
325 		this.needsArg = needsArg;
326 		this.argIsJumpPos = argIsJumpPos;
327 		this.pushCount = cast(ubyte)pushCount;
328 		this._popCount = cast(ubyte)popCount;
329 		this.pointer = pointer;
330 	}
331 }
332 
333 /// Reads data from a string (which can be string, double, integer, or array of any of those types, or array of array...)
334 /// 
335 /// Does not care if elements in array are of same type or not.
336 /// 
337 /// Returns: the data in NaData
338 /// 
339 /// Throws: Exception if data is invalid
340 public NaData readData(string strData){
341 	static string readElement(string array, uinteger startIndex){
342 		if (array[startIndex] == '[')
343 			return array[startIndex .. bracketPos(cast(char[])array, startIndex)+1];
344 		// search for ] or ,
345 		uinteger i = startIndex;
346 		while (i < array.length && ! [',',']'].hasElement(array[i]))
347 			i ++;
348 		return array[startIndex .. i];
349 	}
350 	if (strData.length == 0)
351 		return NaData();
352 	if (strData.isNum(false))
353 		return NaData(to!integer(strData));
354 	if (strData.isNum(true))
355 		return NaData(to!double(strData));
356 	if (["true", "false"].hasElement(strData))
357 		return NaData(strData == "true");
358 	// now checking for arrays
359 	if (strData[0] == '['){
360 		NaData r = NaData(cast(NaData[])[]);
361 		string[] elements = [];
362 		for (uinteger i = 1, bracketEnd = bracketPos(cast(char[])strData, 0); i < bracketEnd; i ++){
363 			if (strData[i] == ' ')
364 				continue;
365 			if (strData[i] != ']'){
366 				elements ~= readElement(strData, i);
367 				i += elements[$-1].length;
368 				// skip till ','
369 				while (![',',']'].hasElement(strData[i]))
370 					i ++;
371 			}
372 		}
373 		// now convert each of those elements to NaData
374 		r.arrayVal = new NaData[elements.length];
375 		foreach (i, element; elements)
376 			r.arrayVal[i] = readData(element);
377 		return r;
378 	}
379 	if (strData[0] == '\"'){
380 		// assume the whole thing is string, no need to find string end index
381 		NaData r;
382 		r.strVal = cast(dchar[])strReplaceSpecial(strData[1 .. $-1]).to!dstring;
383 		return r;
384 	}
385 	if (strData[0] == '\''){
386 		NaData r;
387 		strData = strData.dup;
388 		strData = strReplaceSpecial(strData[1 .. $ -1]);
389 		if (strData.length > 1)
390 			throw new Exception("'' can only contain 1 character");
391 		if (strData.length < 1)
392 			throw new Exception("no character provided in ''");
393 		r.dcharVal = strData[0];
394 		return r;
395 	}
396 	throw new Exception("invalid data");
397 }
398 
399 /// Removes whitespace from a string. And the remaining whitespace is only of one type. e.g: whitespace is ' ' & '\t', 
400 /// it will replace '\t' with ' ' so less conditions are needed after this
401 /// 
402 /// Returns: the string with minimal whitespace (just enough to separate identifiers)
403 private string removeWhitespace(char[] whitespace=[' ','\t'], char comment='#')(string line){
404 	char[] r = [];
405 	bool lastWasWhite = true;
406 	for (uinteger i = 0; i < line.length; i ++){
407 		if (line[i] == comment){
408 			break;
409 		}
410 		if (line[i] == '\"'){
411 			integer endIndex = line.strEnd(i);
412 			if (endIndex == -1)
413 				throw new Exception("string not terminated");
414 			r ~= line[i .. endIndex + 1];
415 			i = endIndex;
416 			lastWasWhite = false;
417 			continue;
418 		}
419 		if (whitespace.hasElement(line[i])){
420 			if (!lastWasWhite){
421 				r ~= whitespace[0];
422 			}
423 			lastWasWhite = true;
424 		}else{
425 			lastWasWhite = false;
426 			r ~= line[i];
427 		}
428 	}
429 	if (r.length > 0 && whitespace.hasElement(r[$-1])){
430 		r = r[0 .. $-1];
431 	}
432 	return cast(string)r;
433 }
434 /// 
435 unittest{
436 	assert("potato    potato".removeWhitespace == "potato potato");
437 	assert("potato    \t\t".removeWhitespace == "potato");
438 	assert("potato  \t  \t  potato  \t#comment".removeWhitespace == "potato potato");
439 	assert ("   ".removeWhitespace == "");
440 	assert ("   \t \t".removeWhitespace == "");
441 	assert ("  # \t \t".removeWhitespace == "");
442 	assert ("#  # \t \t".removeWhitespace == "");
443 }
444 
445 /// Removes whitespace from multiple strings.
446 /// 
447 /// Returns: string[] with minimal whitespace
448 string[] removeWhitespace(char[] whitespace=[' ','\t'], char comment='#')(string[] input){
449 	input = input.dup;
450 	uinteger writeIndex = 0;
451 	for (uinteger i = 0; i < input.length; i ++){
452 		const string line = input[i].removeWhitespace;
453 		input[writeIndex] = line;
454 		writeIndex ++;
455 	}
456 	input.length = writeIndex;
457 	return input;
458 }
459 
460 /// reads each line into words (separated by tab and space)
461 /// 
462 /// Returns: the words read (stuff inside square brackets is considerd a sinle word)
463 /// 
464 /// Throws: Exception if incorrect syntax (in brackets usually)
465 private string[][] readWords(string[] input){
466 	List!(string[]) lines = new List!(string[]);
467 	List!string words = new List!string;
468 	foreach (line; input){
469 		for (uinteger i = 0, readFrom = 0; i < line.length; i++){
470 			if (line[i] == '['){
471 				if (readFrom < i){
472 					words.append(line[readFrom .. i]);
473 					readFrom = i;
474 				}
475 				i = bracketPos(cast(char[])line, i);
476 				words.append(line[readFrom .. i + 1]);
477 				i ++; // skip the bracket end, the increment done by for will skip the space too
478 				readFrom = i+1;
479 				continue;
480 			}
481 			if (line[i] == '\"' || line[i] == '\''){
482 				if (readFrom < i){
483 					words.append(line[readFrom .. i]);
484 					readFrom = i;
485 				}
486 				immutable integer endIndex = line[i] == '\"' ? strEnd(line,i) : strEnd!('\\','\'')(line, i);
487 				if (endIndex == -1)
488 					throw new Exception("string not terminated");
489 				i = endIndex;
490 				words.append(line[readFrom .. i+1]);
491 				readFrom = i + 1;
492 				continue;
493 			}
494 			if (line[i] == ' ' || line[i] == '\t'){
495 				if (readFrom <= i && removeWhitespace(line[readFrom .. i]).length > 0)
496 					words.append(line[readFrom .. i]);
497 				readFrom = i + 1;
498 			}else if (i +1 == line.length && readFrom <= i){
499 				words.append(line[readFrom .. i + 1]);
500 			}
501 		}
502 		lines.append(words.toArray);
503 		words.clear;
504 	}
505 	.destroy(words);
506 	string[][] r = lines.toArray;
507 	.destroy(lines);
508 	return r;
509 }
510 ///
511 unittest{
512 	assert(["potato potato",
513 		"potato [asdf, sdfsdf, [0, 1, 2], 2] asd",
514 		"   \t",
515 		"potato \"some String\" \'c\'"].readWords == [
516 			["potato", "potato"],
517 			["potato", "[asdf, sdfsdf, [0, 1, 2], 2]", "asd"],
518 			["potato","\"some String\"","\'c\'"]
519 		]);
520 }
521 
522 /// Returns: the index where a string ends, -1 if not terminated
523 private integer strEnd(char specialCharBegin='\\', char strTerminator='"')(string s, uinteger startIndex){
524 	uinteger i;
525 	for (i = startIndex+1; i < s.length; i ++){
526 		if (s[i] == strTerminator){
527 			return i;
528 		}
529 		if (s[i] == specialCharBegin){
530 			i ++;
531 			continue;
532 		}
533 	}
534 	return -1;
535 }
536 ///
537 unittest{
538 	assert("st\"sdfsdfsd\"0".strEnd(2) == 11);
539 }
540 
541 /// Returns: string with special characters replaced with their actual characters (i.e, \t replaced with tab, \n with newline...)
542 private string strReplaceSpecial(char specialCharBegin='\\', char[char] map = ['t' : '\t', 'n' : '\n','\\':'\\'])
543 (string s){
544 	char[] r = [];
545 	for (uinteger i = 0; i < s.length; i ++){
546 		if (s[i] == specialCharBegin && i + 1 < s.length && s[i+1] in map){
547 			r ~= map[s[i+1]];
548 			i++;
549 			continue;
550 		}
551 		r ~= s[i];
552 	}
553 	return r;
554 }
555 ///
556 unittest{
557 	assert("newline:\\ntab:\\t".strReplaceSpecial == "newline:\ntab:\t");
558 }