1 module navm.bytecode;
2 
3 import navm.defs;
4 
5 import utils.lists;
6 import utils.misc;
7 
8 import std.conv : to;
9 
10 /// Class used for storing/constructing bytecode
11 public class NaBytecode{
12 private:
13 	/// where the bytecode is actually stored
14 	string[] _instructions;
15 	string[] _arguments;
16 	/// stores number of elements sitting on stack right now after the last added instruction would be executed
17 	uinteger _stackLength;
18 	/// stores max number of elements sitting on stack at any time
19 	uinteger _stackLengthMax;
20 	/// stores any elements that have been "bookmarked". Useful for keeping track of elements during constructing byte code
21 	uinteger[uinteger] _bookmarks;
22 	/// stores the instruction table
23 	NaInstruction[] _instructionTable;
24 public:
25 	/// constructor
26 	this(NaInstruction[] instructionTable){
27 		_instructionTable = instructionTable.dup;
28 	}
29 	~this(){
30 		// nothing to do
31 	}
32 	/// Returns: true if a NaInstruction exists in instruction table
33 	bool hasInstruction(string name, ref NaInstruction instruction){
34 		foreach (inst; _instructionTable){
35 			if (name.lowercase == inst.name){
36 				instruction = inst;
37 				return true;
38 			}
39 		}
40 		return false;
41 	}
42 	/// ditto
43 	bool hasInstruction(string name){
44 		NaInstruction dummy;
45 		return hasInstruction(name, dummy);
46 	}
47 	/// adds an instruction to the instruction table
48 	/// 
49 	/// Returns: true if it was added, false if not due to name or code already used
50 	bool addInstructionToTable(NaInstruction instruction){
51 		foreach (inst; _instructionTable){
52 			if (inst.name == instruction.name || inst.code == instruction.code)
53 				return false;
54 		}
55 		_instructionTable ~= instruction;
56 		return true;
57 	}
58 	/// goes over bytecode, checks if there are any errors, and converts jump positions to indexes
59 	/// i.e: makes the byte code a bit more ready for execution
60 	/// 
61 	/// Returns: errors in a string[], or an empty array in case no errors
62 	string[] resolve(){
63 		string[] errors;
64 		uinteger[string] jumpPos;
65 		uinteger instCount = 0;
66 		for (uinteger i=0, instIndex=0; i < _instructions.length; i ++){
67 			string name = _instructions[i];
68 			if (name.length && name[$-1] == ':'){
69 				name = name[0 .. $-1];
70 				if (name.lowercase in jumpPos){
71 					errors ~= "line#"~(i+1).to!string~' '~name~" as jump postion declared multiple times";
72 					continue;
73 				}
74 				if (name.isNum(true)){
75 					errors ~= "line#"~(i+1).to!string~' '~name~" is an invalid jump position, cannot be digits only.";
76 					continue;
77 				}
78 				// check if its the last line
79 				bool isLastLine = true;
80 				foreach (j; i .. _instructions.length){
81 					if (!_instructions[j].length || _instructions[j][$-1] == ':')
82 						continue;
83 					isLastLine = false;
84 				}
85 				if (isLastLine){
86 					errors ~= "line#"~(i+1).to!string~" jump position at last line is now allowed, follow it by an instruction";
87 					continue;
88 				}
89 				jumpPos[name.lowercase] = instIndex;
90 				continue;
91 			}
92 			instIndex ++;
93 			instCount = instIndex;
94 		}
95 		for (uinteger i=0, writeIndex=0; i < _instructions.length; i ++){
96 			string name = _instructions[i];
97 			if (name.length && name[$-1] == ':')
98 				continue;
99 			if (writeIndex != i){
100 				_instructions[writeIndex] = name;
101 				_arguments[writeIndex] = _arguments[i];
102 			}
103 			NaInstruction instInfo;
104 			if (this.hasInstruction(name, instInfo)){
105 				if (instInfo.needsArg && !_arguments[i].length)
106 					errors ~= "line#"~(i+1).to!string~' '~name~"  needs argument";
107 				if (instInfo.argIsJumpPos){
108 					string arg = _arguments[i].lowercase;
109 					if (arg.isNum(false)){ // skip it if its an integer already
110 						_arguments[writeIndex] = arg;
111 					}else if (jumpPos.keys.hasElement(arg))
112 						_arguments[writeIndex] = jumpPos[arg].to!string;
113 					else
114 						errors ~= "line#"~(i+1).to!string~' '~arg~" is not a valid jump position";
115 				}
116 			}else
117 				errors ~= "line#"~(i+1).to!string~" instruction does not exist";
118 			writeIndex ++;
119 		}
120 		_instructions.length = instCount;
121 		_arguments.length = instCount;
122 		return errors;
123 	}
124 	/// Returns: the bytecode in a readable format
125 	string[] getBytecodePretty(){
126 		string[] r;
127 		r.length = _instructions.length;
128 		foreach (i, inst; _instructions){
129 			r[i] = _instructions[i];
130 			if (_arguments[i].length)
131 				r[i] ~= "\t\t" ~ _arguments[i];
132 		}
133 		return r;
134 	}
135 	/// Call `resolve` before this or prepare for crashes
136 	/// 
137 	/// Returns: pointers for all instructions
138 	void delegate()[] getBytecodePointers(){
139 		void delegate()[] r;
140 		r.length = _instructions.length;
141 		foreach (i, inst; _instructions){
142 			NaInstruction instInfo;
143 			hasInstruction(inst, instInfo);
144 			r[i] = instInfo.pointer;
145 		}
146 		return r;
147 	}
148 	/// Call `resolve` before this or prepare for crashes
149 	/// 
150 	/// Returns: codes for all instructions
151 	ushort[] getBytecodeCodes(){
152 		ushort[] r;
153 		r.length = _instructions.length;
154 		foreach (i, inst; _instructions){
155 			NaInstruction instInfo;
156 			hasInstruction(inst, instInfo);
157 			r[i] = instInfo.code;
158 		}
159 		return r;
160 	}
161 	/// Call `resolve` before this.
162 	/// 
163 	/// Returns: arguments for each instruction NaData[]
164 	/// 
165 	/// Throws: Exception in case of error in argument
166 	NaData[] getArgumentsNaData(){
167 		NaData[] r;
168 		r.length = _arguments.length;
169 		foreach (i, arg; _arguments){
170 			try{
171 				r[i] = readData(arg);
172 			}catch (Exception e){
173 				e.msg = "Line#"~(i+1).to!string~' '~e.msg;
174 				throw e;
175 			}
176 		}
177 		return r;
178 	}
179 	/// Reads from a string[] (follows spec/syntax.md)
180 	/// 
181 	/// Returns: errors in a string[], or [] if no errors
182 	string[] readByteCode(string[] input){
183 		string[] errors;
184 		immutable string[][] words = cast(immutable string[][])input.removeWhitespace.readWords();
185 		foreach (i, lineWords; words){
186 			if (!lineWords.length)
187 				continue;
188 			if (lineWords[0].length){
189 				if (lineWords[0][$-1] == ':'){
190 					this.addJumpPos(lineWords[0][0..$-1]);
191 					continue;
192 				}
193 				string error = "";
194 				if (!this.addInstruction(lineWords[0],
195 					lineWords.length>1 && lineWords[1].length ? lineWords[1] : "", error))
196 					errors ~= "line#"~(i+1).to!string~':'~error;
197 			}
198 		}
199 		return errors;
200 	}
201 	// functions for generating byte code
202 	
203 	/// appends an instruction
204 	/// 
205 	/// Returns: true if successful, false if not (writes error in `error`)
206 	bool addInstruction(string instName, string argument, ref string error){
207 		import navm.bytecode : removeWhitespace;
208 		NaInstruction inst;
209 		if (hasInstruction(instName, inst)){
210 			if (inst.needsArg && !removeWhitespace(argument).length){
211 				error = "instruction needs an argument";
212 				return false;
213 			}
214 			NaData arg;
215 			if (inst.needsArg && !inst.argIsJumpPos){
216 				try{
217 					arg = readData(argument);
218 				}catch (Exception e){
219 					error = "invalid argument: "~e.msg;
220 					return false;
221 				}
222 				if (_stackLength < inst.popCount(arg)){
223 					error = "stack does not have enough elements for instruction";
224 					return false;
225 				}
226 			}
227 			_instructions ~= instName;
228 			_arguments ~= argument;
229 			_stackLength -= inst.popCount(arg);
230 			_stackLength += inst.pushCount;
231 			_stackLengthMax = _stackLength > _stackLengthMax ? _stackLength : _stackLengthMax;
232 			return true;
233 		}
234 		error = "instruction "~instName~" does not exist";
235 		return false;
236 	}
237 	/// ditto
238 	bool addInstruction(string instName, string argument){
239 		string dummy;
240 		return addInstruction(instName, argument, dummy);
241 	}
242 	/// adds a jump position
243 	void addJumpPos(string name){
244 		_instructions ~= name~':';
245 		_arguments ~= "";
246 	}
247 	/// Returns: the number of elements on stack after executing the last added instruction
248 	@property uinteger elementCount(){
249 		return _stackLength;
250 	}
251 	/// adds a "bookmark" to the last element on stack, so later on, relative to current peek index, bookmark index
252 	/// can be read.
253 	/// 
254 	/// Returns: bookmark id, or -1 if stack empty
255 	integer addBookmark(){
256 		if (_stackLength == 0)
257 			return -1;
258 		integer bookmarkId;
259 		for (bookmarkId = 0; bookmarkId <= integer.max; bookmarkId ++)
260 			if (bookmarkId !in _bookmarks)
261 				break;
262 		_bookmarks[bookmarkId] = _stackLength-1;
263 		return bookmarkId;
264 	}
265 	/// removes a bookmark
266 	/// 
267 	/// Returns: true if successful, false if does not exists
268 	bool removeBookmark(uinteger id){
269 		if (id !in _bookmarks)
270 			return false;
271 		_bookmarks.remove(id);
272 		return true;
273 	}
274 	/// gets relative index from current stack index to a bookmark
275 	/// 
276 	/// Returns: relative index, or integer.max if bookmark does not exist
277 	integer bookmarkRelIndex(uinteger id){
278 		if (id !in _bookmarks)
279 			return integer.max;
280 		return _stackLength.to!integer - (_bookmarks[id]+1).to!integer;
281 	}
282 }
283 
284 /// stores an data about available instruction
285 public struct NaInstruction{
286 	bool argIsJumpPos = false; /// if the argument to this instruction is a jump position
287 	string name; /// name of instruction, in lowercase
288 	ushort code = 0x0000; /// value when read as a ubyte
289 	bool needsArg; /// if this instruction needs an argument
290 	ubyte pushCount = 0; /// number of elements it will push to stack
291 	private ubyte _popCount = 0; /// number of elements it will pop (if ==255, then the argument is the number of elements to pop)
292 	void delegate() pointer; /// pointer to the delegate behind this instruction
293 	/// Returns: number of elements it will pop
294 	ubyte popCount(NaData arg){
295 		if (_popCount < 255)
296 			return _popCount;
297 		return cast(ubyte)(arg.intVal);
298 	}
299 	/// constructor, for instruction with no arg, no push/pop
300 	this (string name, integer code, void delegate() pointer){
301 		this.name = name.lowercase;
302 		this.code = cast(ushort)code;
303 		this.pointer = pointer;
304 		this.needsArg = false;
305 		this.pushCount = 0;
306 		this._popCount = 0;
307 	}
308 	/// constructor, for instruction with no arg, but pop and push
309 	this(string name, integer code, integer popCount, integer pushCount, void delegate() pointer){
310 		this.name = name.lowercase;
311 		this.code = cast(ushort)code;
312 		this.needsArg = false;
313 		this.pushCount = cast(ubyte)pushCount;
314 		this._popCount = cast(ubyte)popCount;
315 		this.pointer = pointer;
316 	}
317 	/// full constructor but arg is not jump position
318 	this (string name, integer code, bool needsArg, integer popCount, integer pushCount, void delegate() pointer){
319 		this.name = name.lowercase;
320 		this.code = cast(ushort)code;
321 		this.needsArg = needsArg;
322 		this.argIsJumpPos = false;
323 		this.pushCount = cast(ubyte)pushCount;
324 		this._popCount = cast(ubyte)popCount;
325 		this.pointer = pointer;
326 	}
327 	/// full constructor
328 	this (string name, integer code, bool needsArg, bool argIsJumpPos, integer popCount, integer pushCount, void delegate() pointer){
329 		this.name = name.lowercase;
330 		this.code = cast(ushort)code;
331 		this.needsArg = needsArg;
332 		this.argIsJumpPos = argIsJumpPos;
333 		this.pushCount = cast(ubyte)pushCount;
334 		this._popCount = cast(ubyte)popCount;
335 		this.pointer = pointer;
336 	}
337 }
338 
339 /// Reads data from a string (which can be string, double, integer, or array of any of those types, or array of array...)
340 /// 
341 /// Does not care if elements in array are of same type or not.
342 /// 
343 /// Returns: the data in NaData
344 /// 
345 /// Throws: Exception if data is invalid
346 public NaData readData(string strData){
347 	static string readElement(string array, uinteger startIndex){
348 		if (array[startIndex] == '[')
349 			return array[startIndex .. bracketPos(cast(char[])array, startIndex)+1];
350 		// search for ] or ,
351 		uinteger i = startIndex;
352 		while (i < array.length && ! [',',']'].hasElement(array[i]))
353 			i ++;
354 		return array[startIndex .. i];
355 	}
356 	if (strData.length == 0)
357 		return NaData();
358 	if (strData.isNum(false))
359 		return NaData(to!integer(strData));
360 	if (strData.isNum(true))
361 		return NaData(to!double(strData));
362 	if (["true", "false"].hasElement(strData))
363 		return NaData(strData == "true");
364 	// now checking for arrays
365 	if (strData[0] == '['){
366 		NaData r = NaData(cast(NaData[])[]);
367 		string[] elements = [];
368 		for (uinteger i = 1, bracketEnd = bracketPos(cast(char[])strData, 0); i < bracketEnd; i ++){
369 			if (strData[i] == ' ')
370 				continue;
371 			if (strData[i] != ']'){
372 				elements ~= readElement(strData, i);
373 				i += elements[$-1].length;
374 				// skip till ','
375 				while (![',',']'].hasElement(strData[i]))
376 					i ++;
377 			}
378 		}
379 		// now convert each of those elements to NaData
380 		r.arrayVal = new NaData[elements.length];
381 		foreach (i, element; elements)
382 			r.arrayVal[i] = readData(element);
383 		return r;
384 	}
385 	if (strData[0] == '\"'){
386 		// assume the whole thing is string, no need to find string end index
387 		NaData r;
388 		r.strVal = cast(dchar[])strReplaceSpecial(strData[1 .. $-1]).to!dstring;
389 		return r;
390 	}
391 	if (strData[0] == '\''){
392 		NaData r;
393 		strData = strData.dup;
394 		strData = strReplaceSpecial(strData[1 .. $ -1]);
395 		if (strData.length > 1)
396 			throw new Exception("'' can only contain 1 character");
397 		if (strData.length < 1)
398 			throw new Exception("no character provided in ''");
399 		r.dcharVal = strData[0];
400 		return r;
401 	}
402 	throw new Exception("invalid data");
403 }
404 
405 /// Removes whitespace from a string. And the remaining whitespace is only of one type. e.g: whitespace is ' ' & '\t', 
406 /// it will replace '\t' with ' ' so less conditions are needed after this
407 /// 
408 /// Returns: the string with minimal whitespace (just enough to separate identifiers)
409 private string removeWhitespace(char[] whitespace=[' ','\t'], char comment='#')(string line){
410 	char[] r = [];
411 	bool lastWasWhite = true;
412 	for (uinteger i = 0; i < line.length; i ++){
413 		if (line[i] == comment){
414 			break;
415 		}
416 		if (line[i] == '\"'){
417 			integer endIndex = line.strEnd(i);
418 			if (endIndex == -1)
419 				throw new Exception("string not terminated");
420 			r ~= line[i .. endIndex + 1];
421 			i = endIndex;
422 			lastWasWhite = false;
423 			continue;
424 		}
425 		if (whitespace.hasElement(line[i])){
426 			if (!lastWasWhite){
427 				r ~= whitespace[0];
428 			}
429 			lastWasWhite = true;
430 		}else{
431 			lastWasWhite = false;
432 			r ~= line[i];
433 		}
434 	}
435 	if (r.length > 0 && whitespace.hasElement(r[$-1])){
436 		r = r[0 .. $-1];
437 	}
438 	return cast(string)r;
439 }
440 /// 
441 unittest{
442 	assert("potato    potato".removeWhitespace == "potato potato");
443 	assert("potato    \t\t".removeWhitespace == "potato");
444 	assert("potato  \t  \t  potato  \t#comment".removeWhitespace == "potato potato");
445 	assert ("   ".removeWhitespace == "");
446 	assert ("   \t \t".removeWhitespace == "");
447 	assert ("  # \t \t".removeWhitespace == "");
448 	assert ("#  # \t \t".removeWhitespace == "");
449 }
450 
451 /// Removes whitespace from multiple strings.
452 /// 
453 /// Returns: string[] with minimal whitespace
454 string[] removeWhitespace(char[] whitespace=[' ','\t'], char comment='#')(string[] input){
455 	input = input.dup;
456 	uinteger writeIndex = 0;
457 	for (uinteger i = 0; i < input.length; i ++){
458 		const string line = input[i].removeWhitespace;
459 		input[writeIndex] = line;
460 		writeIndex ++;
461 	}
462 	input.length = writeIndex;
463 	return input;
464 }
465 
466 /// reads each line into words (separated by tab and space)
467 /// 
468 /// Returns: the words read (stuff inside square brackets is considerd a sinle word)
469 /// 
470 /// Throws: Exception if incorrect syntax (in brackets usually)
471 private string[][] readWords(string[] input){
472 	List!(string[]) lines = new List!(string[]);
473 	List!string words = new List!string;
474 	foreach (line; input){
475 		for (uinteger i = 0, readFrom = 0; i < line.length; i++){
476 			if (line[i] == '['){
477 				if (readFrom < i){
478 					words.append(line[readFrom .. i]);
479 					readFrom = i;
480 				}
481 				i = bracketPos(cast(char[])line, i);
482 				words.append(line[readFrom .. i + 1]);
483 				i ++; // skip the bracket end, the increment done by for will skip the space too
484 				readFrom = i+1;
485 				continue;
486 			}
487 			if (line[i] == '\"' || line[i] == '\''){
488 				if (readFrom < i){
489 					words.append(line[readFrom .. i]);
490 					readFrom = i;
491 				}
492 				immutable integer endIndex = line[i] == '\"' ? strEnd(line,i) : strEnd!('\\','\'')(line, i);
493 				if (endIndex == -1)
494 					throw new Exception("string not terminated");
495 				i = endIndex;
496 				words.append(line[readFrom .. i+1]);
497 				readFrom = i + 1;
498 				continue;
499 			}
500 			if (line[i] == ' ' || line[i] == '\t'){
501 				if (readFrom <= i && removeWhitespace(line[readFrom .. i]).length > 0)
502 					words.append(line[readFrom .. i]);
503 				readFrom = i + 1;
504 			}else if (i +1 == line.length && readFrom <= i){
505 				words.append(line[readFrom .. i + 1]);
506 			}
507 		}
508 		lines.append(words.toArray);
509 		words.clear;
510 	}
511 	.destroy(words);
512 	string[][] r = lines.toArray;
513 	.destroy(lines);
514 	return r;
515 }
516 ///
517 unittest{
518 	assert(["potato potato",
519 		"potato [asdf, sdfsdf, [0, 1, 2], 2] asd",
520 		"   \t",
521 		"potato \"some String\" \'c\'"].readWords == [
522 			["potato", "potato"],
523 			["potato", "[asdf, sdfsdf, [0, 1, 2], 2]", "asd"],
524 			["potato","\"some String\"","\'c\'"]
525 		]);
526 }
527 
528 /// Returns: the index where a string ends, -1 if not terminated
529 private integer strEnd(char specialCharBegin='\\', char strTerminator='"')(string s, uinteger startIndex){
530 	uinteger i;
531 	for (i = startIndex+1; i < s.length; i ++){
532 		if (s[i] == strTerminator){
533 			return i;
534 		}
535 		if (s[i] == specialCharBegin){
536 			i ++;
537 			continue;
538 		}
539 	}
540 	return -1;
541 }
542 ///
543 unittest{
544 	assert("st\"sdfsdfsd\"0".strEnd(2) == 11);
545 }
546 
547 /// Returns: string with special characters replaced with their actual characters (i.e, \t replaced with tab, \n with newline...)
548 private string strReplaceSpecial(char specialCharBegin='\\', char[char] map = ['t' : '\t', 'n' : '\n','\\':'\\'])
549 (string s){
550 	char[] r = [];
551 	for (uinteger i = 0; i < s.length; i ++){
552 		if (s[i] == specialCharBegin && i + 1 < s.length && s[i+1] in map){
553 			r ~= map[s[i+1]];
554 			i++;
555 			continue;
556 		}
557 		r ~= s[i];
558 	}
559 	return r;
560 }
561 ///
562 unittest{
563 	assert("newline:\\ntab:\\t".strReplaceSpecial == "newline:\ntab:\t");
564 }