1 module navm.bytecode; 2 3 import navm.defs; 4 5 import utils.lists; 6 import utils.misc; 7 8 import std.conv : to; 9 10 /// Class used for storing/constructing bytecode 11 public class NaBytecode{ 12 private: 13 /// where the bytecode is actually stored 14 string[] _instructions; 15 string[] _arguments; 16 /// stores number of elements sitting on stack right now after the last added instruction would be executed 17 uinteger _stackLength; 18 /// stores max number of elements sitting on stack at any time 19 uinteger _stackLengthMax; 20 /// stores any elements that have been "bookmarked". Useful for keeping track of elements during constructing byte code 21 uinteger[uinteger] _bookmarks; 22 /// stores the instruction table 23 NaInstruction[] _instructionTable; 24 public: 25 /// constructor 26 this(NaInstruction[] instructionTable){ 27 _instructionTable = instructionTable.dup; 28 } 29 ~this(){ 30 // nothing to do 31 } 32 /// Returns: true if a NaInstruction exists in instruction table 33 bool hasInstruction(string name, ref NaInstruction instruction){ 34 foreach (inst; _instructionTable){ 35 if (name.lowercase == inst.name){ 36 instruction = inst; 37 return true; 38 } 39 } 40 return false; 41 } 42 /// ditto 43 bool hasInstruction(string name){ 44 NaInstruction dummy; 45 return hasInstruction(name, dummy); 46 } 47 /// adds an instruction to the instruction table 48 /// 49 /// Returns: true if it was added, false if not due to name or code already used 50 bool addInstructionToTable(NaInstruction instruction){ 51 foreach (inst; _instructionTable){ 52 if (inst.name == instruction.name || inst.code == instruction.code) 53 return false; 54 } 55 _instructionTable ~= instruction; 56 return true; 57 } 58 /// goes over bytecode, checks if there are any errors, and converts jump positions to indexes 59 /// i.e: makes the byte code a bit more ready for execution 60 /// 61 /// Returns: errors in a string[], or an empty array in case no errors 62 string[] resolve(){ 63 string[] errors; 64 uinteger[string] jumpPos; 65 uinteger instCount = 0; 66 for (uinteger i=0, instIndex=0; i < _instructions.length; i ++){ 67 string name = _instructions[i]; 68 if (name.length && name[$-1] == ':'){ 69 name = name[0 .. $-1]; 70 if (name.lowercase in jumpPos){ 71 errors ~= "line#"~(i+1).to!string~' '~name~" as jump postion declared multiple times"; 72 continue; 73 } 74 if (name.isNum(true)){ 75 errors ~= "line#"~(i+1).to!string~' '~name~" is an invalid jump position, cannot be digits only."; 76 continue; 77 } 78 // check if its the last line 79 bool isLastLine = true; 80 foreach (j; i .. _instructions.length){ 81 if (!_instructions[j].length || _instructions[j][$-1] == ':') 82 continue; 83 isLastLine = false; 84 } 85 if (isLastLine){ 86 errors ~= "line#"~(i+1).to!string~" jump position at last line is now allowed, follow it by an instruction"; 87 continue; 88 } 89 jumpPos[name.lowercase] = instIndex; 90 continue; 91 } 92 instIndex ++; 93 instCount = instIndex; 94 } 95 for (uinteger i=0, writeIndex=0; i < _instructions.length; i ++){ 96 string name = _instructions[i]; 97 if (name.length && name[$-1] == ':') 98 continue; 99 if (writeIndex != i){ 100 _instructions[writeIndex] = name; 101 _arguments[writeIndex] = _arguments[i]; 102 } 103 NaInstruction instInfo; 104 if (this.hasInstruction(name, instInfo)){ 105 if (instInfo.needsArg && !_arguments[i].length) 106 errors ~= "line#"~(i+1).to!string~' '~name~" needs argument"; 107 if (instInfo.argIsJumpPos){ 108 string arg = _arguments[i].lowercase; 109 if (arg.isNum(false)){ // skip it if its an integer already 110 _arguments[writeIndex] = arg; 111 }else if (jumpPos.keys.hasElement(arg)) 112 _arguments[writeIndex] = jumpPos[arg].to!string; 113 else 114 errors ~= "line#"~(i+1).to!string~' '~arg~" is not a valid jump position"; 115 } 116 }else 117 errors ~= "line#"~(i+1).to!string~" instruction does not exist"; 118 writeIndex ++; 119 } 120 _instructions.length = instCount; 121 _arguments.length = instCount; 122 return errors; 123 } 124 /// Returns: the bytecode in a readable format 125 string[] getBytecodePretty(){ 126 string[] r; 127 r.length = _instructions.length; 128 foreach (i, inst; _instructions){ 129 r[i] = _instructions[i]; 130 if (_arguments[i].length) 131 r[i] ~= "\t\t" ~ _arguments[i]; 132 } 133 return r; 134 } 135 /// Call `resolve` before this or prepare for crashes 136 /// 137 /// Returns: pointers for all instructions 138 void delegate()[] getBytecodePointers(){ 139 void delegate()[] r; 140 r.length = _instructions.length; 141 foreach (i, inst; _instructions){ 142 NaInstruction instInfo; 143 hasInstruction(inst, instInfo); 144 r[i] = instInfo.pointer; 145 } 146 return r; 147 } 148 /// Call `resolve` before this or prepare for crashes 149 /// 150 /// Returns: codes for all instructions 151 ushort[] getBytecodeCodes(){ 152 ushort[] r; 153 r.length = _instructions.length; 154 foreach (i, inst; _instructions){ 155 NaInstruction instInfo; 156 hasInstruction(inst, instInfo); 157 r[i] = instInfo.code; 158 } 159 return r; 160 } 161 /// Call `resolve` before this. 162 /// 163 /// Returns: arguments for each instruction NaData[] 164 /// 165 /// Throws: Exception in case of error in argument 166 NaData[] getArgumentsNaData(){ 167 NaData[] r; 168 r.length = _arguments.length; 169 foreach (i, arg; _arguments){ 170 try{ 171 r[i] = readData(arg); 172 }catch (Exception e){ 173 e.msg = "Line#"~(i+1).to!string~' '~e.msg; 174 throw e; 175 } 176 } 177 return r; 178 } 179 /// Reads from a string[] (follows spec/syntax.md) 180 /// 181 /// Returns: errors in a string[], or [] if no errors 182 string[] readByteCode(string[] input){ 183 string[] errors; 184 immutable string[][] words = cast(immutable string[][])input.removeWhitespace.readWords(); 185 foreach (i, lineWords; words){ 186 if (!lineWords.length) 187 continue; 188 if (lineWords[0].length){ 189 if (lineWords[0][$-1] == ':'){ 190 this.addJumpPos(lineWords[0][0..$-1]); 191 continue; 192 } 193 string error = ""; 194 if (!this.addInstruction(lineWords[0], 195 lineWords.length>1 && lineWords[1].length ? lineWords[1] : "", error)) 196 errors ~= "line#"~(i+1).to!string~':'~error; 197 } 198 } 199 return errors; 200 } 201 // functions for generating byte code 202 203 /// appends an instruction 204 /// 205 /// Returns: true if successful, false if not (writes error in `error`) 206 bool addInstruction(string instName, string argument, ref string error){ 207 import navm.bytecode : removeWhitespace; 208 NaInstruction inst; 209 if (hasInstruction(instName, inst)){ 210 if (inst.needsArg && !removeWhitespace(argument).length){ 211 error = "instruction needs an argument"; 212 return false; 213 } 214 NaData arg; 215 if (inst.needsArg && !inst.argIsJumpPos){ 216 try{ 217 arg = readData(argument); 218 }catch (Exception e){ 219 error = "invalid argument: "~e.msg; 220 return false; 221 } 222 if (_stackLength < inst.popCount(arg)){ 223 error = "stack does not have enough elements for instruction"; 224 return false; 225 } 226 } 227 _instructions ~= instName; 228 _arguments ~= argument; 229 _stackLength -= inst.popCount(arg); 230 _stackLength += inst.pushCount; 231 _stackLengthMax = _stackLength > _stackLengthMax ? _stackLength : _stackLengthMax; 232 return true; 233 } 234 error = "instruction "~instName~" does not exist"; 235 return false; 236 } 237 /// ditto 238 bool addInstruction(string instName, string argument){ 239 string dummy; 240 return addInstruction(instName, argument, dummy); 241 } 242 /// adds a jump position 243 void addJumpPos(string name){ 244 _instructions ~= name~':'; 245 _arguments ~= ""; 246 } 247 /// Returns: the number of elements on stack after executing the last added instruction 248 @property uinteger elementCount(){ 249 return _stackLength; 250 } 251 /// adds a "bookmark" to the last element on stack, so later on, relative to current peek index, bookmark index 252 /// can be read. 253 /// 254 /// Returns: bookmark id, or -1 if stack empty 255 integer addBookmark(){ 256 if (_stackLength == 0) 257 return -1; 258 integer bookmarkId; 259 for (bookmarkId = 0; bookmarkId <= integer.max; bookmarkId ++) 260 if (bookmarkId !in _bookmarks) 261 break; 262 _bookmarks[bookmarkId] = _stackLength-1; 263 return bookmarkId; 264 } 265 /// removes a bookmark 266 /// 267 /// Returns: true if successful, false if does not exists 268 bool removeBookmark(uinteger id){ 269 if (id !in _bookmarks) 270 return false; 271 _bookmarks.remove(id); 272 return true; 273 } 274 /// gets relative index from current stack index to a bookmark 275 /// 276 /// Returns: relative index, or integer.max if bookmark does not exist 277 integer bookmarkRelIndex(uinteger id){ 278 if (id !in _bookmarks) 279 return integer.max; 280 return _stackLength.to!integer - (_bookmarks[id]+1).to!integer; 281 } 282 } 283 284 /// stores an data about available instruction 285 public struct NaInstruction{ 286 bool argIsJumpPos = false; /// if the argument to this instruction is a jump position 287 string name; /// name of instruction, in lowercase 288 ushort code = 0x0000; /// value when read as a ubyte 289 bool needsArg; /// if this instruction needs an argument 290 ubyte pushCount = 0; /// number of elements it will push to stack 291 private ubyte _popCount = 0; /// number of elements it will pop (if ==255, then the argument is the number of elements to pop) 292 void delegate() pointer; /// pointer to the delegate behind this instruction 293 /// Returns: number of elements it will pop 294 ubyte popCount(NaData arg){ 295 if (_popCount < 255) 296 return _popCount; 297 return cast(ubyte)(arg.intVal); 298 } 299 /// constructor, for instruction with no arg, no push/pop 300 this (string name, integer code, void delegate() pointer){ 301 this.name = name.lowercase; 302 this.code = cast(ushort)code; 303 this.pointer = pointer; 304 this.needsArg = false; 305 this.pushCount = 0; 306 this._popCount = 0; 307 } 308 /// constructor, for instruction with no arg, but pop and push 309 this(string name, integer code, integer popCount, integer pushCount, void delegate() pointer){ 310 this.name = name.lowercase; 311 this.code = cast(ushort)code; 312 this.needsArg = false; 313 this.pushCount = cast(ubyte)pushCount; 314 this._popCount = cast(ubyte)popCount; 315 this.pointer = pointer; 316 } 317 /// full constructor but arg is not jump position 318 this (string name, integer code, bool needsArg, integer popCount, integer pushCount, void delegate() pointer){ 319 this.name = name.lowercase; 320 this.code = cast(ushort)code; 321 this.needsArg = needsArg; 322 this.argIsJumpPos = false; 323 this.pushCount = cast(ubyte)pushCount; 324 this._popCount = cast(ubyte)popCount; 325 this.pointer = pointer; 326 } 327 /// full constructor 328 this (string name, integer code, bool needsArg, bool argIsJumpPos, integer popCount, integer pushCount, void delegate() pointer){ 329 this.name = name.lowercase; 330 this.code = cast(ushort)code; 331 this.needsArg = needsArg; 332 this.argIsJumpPos = argIsJumpPos; 333 this.pushCount = cast(ubyte)pushCount; 334 this._popCount = cast(ubyte)popCount; 335 this.pointer = pointer; 336 } 337 } 338 339 /// Reads data from a string (which can be string, double, integer, or array of any of those types, or array of array...) 340 /// 341 /// Does not care if elements in array are of same type or not. 342 /// 343 /// Returns: the data in NaData 344 /// 345 /// Throws: Exception if data is invalid 346 public NaData readData(string strData){ 347 static string readElement(string array, uinteger startIndex){ 348 if (array[startIndex] == '[') 349 return array[startIndex .. bracketPos(cast(char[])array, startIndex)+1]; 350 // search for ] or , 351 uinteger i = startIndex; 352 while (i < array.length && ! [',',']'].hasElement(array[i])) 353 i ++; 354 return array[startIndex .. i]; 355 } 356 if (strData.length == 0) 357 return NaData(); 358 if (strData.isNum(false)) 359 return NaData(to!integer(strData)); 360 if (strData.isNum(true)) 361 return NaData(to!double(strData)); 362 if (["true", "false"].hasElement(strData)) 363 return NaData(strData == "true"); 364 // now checking for arrays 365 if (strData[0] == '['){ 366 NaData r = NaData(cast(NaData[])[]); 367 string[] elements = []; 368 for (uinteger i = 1, bracketEnd = bracketPos(cast(char[])strData, 0); i < bracketEnd; i ++){ 369 if (strData[i] == ' ') 370 continue; 371 if (strData[i] != ']'){ 372 elements ~= readElement(strData, i); 373 i += elements[$-1].length; 374 // skip till ',' 375 while (![',',']'].hasElement(strData[i])) 376 i ++; 377 } 378 } 379 // now convert each of those elements to NaData 380 r.arrayVal = new NaData[elements.length]; 381 foreach (i, element; elements) 382 r.arrayVal[i] = readData(element); 383 return r; 384 } 385 if (strData[0] == '\"'){ 386 // assume the whole thing is string, no need to find string end index 387 NaData r; 388 r.strVal = cast(dchar[])strReplaceSpecial(strData[1 .. $-1]).to!dstring; 389 return r; 390 } 391 if (strData[0] == '\''){ 392 NaData r; 393 strData = strData.dup; 394 strData = strReplaceSpecial(strData[1 .. $ -1]); 395 if (strData.length > 1) 396 throw new Exception("'' can only contain 1 character"); 397 if (strData.length < 1) 398 throw new Exception("no character provided in ''"); 399 r.dcharVal = strData[0]; 400 return r; 401 } 402 throw new Exception("invalid data"); 403 } 404 405 /// Removes whitespace from a string. And the remaining whitespace is only of one type. e.g: whitespace is ' ' & '\t', 406 /// it will replace '\t' with ' ' so less conditions are needed after this 407 /// 408 /// Returns: the string with minimal whitespace (just enough to separate identifiers) 409 private string removeWhitespace(char[] whitespace=[' ','\t'], char comment='#')(string line){ 410 char[] r = []; 411 bool lastWasWhite = true; 412 for (uinteger i = 0; i < line.length; i ++){ 413 if (line[i] == comment){ 414 break; 415 } 416 if (line[i] == '\"'){ 417 integer endIndex = line.strEnd(i); 418 if (endIndex == -1) 419 throw new Exception("string not terminated"); 420 r ~= line[i .. endIndex + 1]; 421 i = endIndex; 422 lastWasWhite = false; 423 continue; 424 } 425 if (whitespace.hasElement(line[i])){ 426 if (!lastWasWhite){ 427 r ~= whitespace[0]; 428 } 429 lastWasWhite = true; 430 }else{ 431 lastWasWhite = false; 432 r ~= line[i]; 433 } 434 } 435 if (r.length > 0 && whitespace.hasElement(r[$-1])){ 436 r = r[0 .. $-1]; 437 } 438 return cast(string)r; 439 } 440 /// 441 unittest{ 442 assert("potato potato".removeWhitespace == "potato potato"); 443 assert("potato \t\t".removeWhitespace == "potato"); 444 assert("potato \t \t potato \t#comment".removeWhitespace == "potato potato"); 445 assert (" ".removeWhitespace == ""); 446 assert (" \t \t".removeWhitespace == ""); 447 assert (" # \t \t".removeWhitespace == ""); 448 assert ("# # \t \t".removeWhitespace == ""); 449 } 450 451 /// Removes whitespace from multiple strings. 452 /// 453 /// Returns: string[] with minimal whitespace 454 string[] removeWhitespace(char[] whitespace=[' ','\t'], char comment='#')(string[] input){ 455 input = input.dup; 456 uinteger writeIndex = 0; 457 for (uinteger i = 0; i < input.length; i ++){ 458 const string line = input[i].removeWhitespace; 459 input[writeIndex] = line; 460 writeIndex ++; 461 } 462 input.length = writeIndex; 463 return input; 464 } 465 466 /// reads each line into words (separated by tab and space) 467 /// 468 /// Returns: the words read (stuff inside square brackets is considerd a sinle word) 469 /// 470 /// Throws: Exception if incorrect syntax (in brackets usually) 471 private string[][] readWords(string[] input){ 472 List!(string[]) lines = new List!(string[]); 473 List!string words = new List!string; 474 foreach (line; input){ 475 for (uinteger i = 0, readFrom = 0; i < line.length; i++){ 476 if (line[i] == '['){ 477 if (readFrom < i){ 478 words.append(line[readFrom .. i]); 479 readFrom = i; 480 } 481 i = bracketPos(cast(char[])line, i); 482 words.append(line[readFrom .. i + 1]); 483 i ++; // skip the bracket end, the increment done by for will skip the space too 484 readFrom = i+1; 485 continue; 486 } 487 if (line[i] == '\"' || line[i] == '\''){ 488 if (readFrom < i){ 489 words.append(line[readFrom .. i]); 490 readFrom = i; 491 } 492 immutable integer endIndex = line[i] == '\"' ? strEnd(line,i) : strEnd!('\\','\'')(line, i); 493 if (endIndex == -1) 494 throw new Exception("string not terminated"); 495 i = endIndex; 496 words.append(line[readFrom .. i+1]); 497 readFrom = i + 1; 498 continue; 499 } 500 if (line[i] == ' ' || line[i] == '\t'){ 501 if (readFrom <= i && removeWhitespace(line[readFrom .. i]).length > 0) 502 words.append(line[readFrom .. i]); 503 readFrom = i + 1; 504 }else if (i +1 == line.length && readFrom <= i){ 505 words.append(line[readFrom .. i + 1]); 506 } 507 } 508 lines.append(words.toArray); 509 words.clear; 510 } 511 .destroy(words); 512 string[][] r = lines.toArray; 513 .destroy(lines); 514 return r; 515 } 516 /// 517 unittest{ 518 assert(["potato potato", 519 "potato [asdf, sdfsdf, [0, 1, 2], 2] asd", 520 " \t", 521 "potato \"some String\" \'c\'"].readWords == [ 522 ["potato", "potato"], 523 ["potato", "[asdf, sdfsdf, [0, 1, 2], 2]", "asd"], 524 ["potato","\"some String\"","\'c\'"] 525 ]); 526 } 527 528 /// Returns: the index where a string ends, -1 if not terminated 529 private integer strEnd(char specialCharBegin='\\', char strTerminator='"')(string s, uinteger startIndex){ 530 uinteger i; 531 for (i = startIndex+1; i < s.length; i ++){ 532 if (s[i] == strTerminator){ 533 return i; 534 } 535 if (s[i] == specialCharBegin){ 536 i ++; 537 continue; 538 } 539 } 540 return -1; 541 } 542 /// 543 unittest{ 544 assert("st\"sdfsdfsd\"0".strEnd(2) == 11); 545 } 546 547 /// Returns: string with special characters replaced with their actual characters (i.e, \t replaced with tab, \n with newline...) 548 private string strReplaceSpecial(char specialCharBegin='\\', char[char] map = ['t' : '\t', 'n' : '\n','\\':'\\']) 549 (string s){ 550 char[] r = []; 551 for (uinteger i = 0; i < s.length; i ++){ 552 if (s[i] == specialCharBegin && i + 1 < s.length && s[i+1] in map){ 553 r ~= map[s[i+1]]; 554 i++; 555 continue; 556 } 557 r ~= s[i]; 558 } 559 return r; 560 } 561 /// 562 unittest{ 563 assert("newline:\\ntab:\\t".strReplaceSpecial == "newline:\ntab:\t"); 564 }