1 module navm.bytecode; 2 3 import navm.defs; 4 5 import utils.lists; 6 import utils.misc; 7 8 import std.conv : to; 9 10 /// Class used for storing/constructing bytecode 11 public class NaBytecode{ 12 private: 13 /// where the bytecode is actually stored 14 string[] _instructions; 15 string[] _arguments; 16 /// stores number of elements sitting on stack right now after the last added instruction would be executed 17 uinteger _stackLength; 18 /// stores max number of elements sitting on stack at any time 19 uinteger _stackLengthMax; 20 /// stores any elements that have been "bookmarked". Useful for keeping track of elements during constructing byte code 21 uinteger[uinteger] _bookmarks; 22 /// stores the instruction table 23 NaInstruction[] _instructionTable; 24 public: 25 /// constructor 26 this(NaInstruction[] instructionTable){ 27 _instructionTable = instructionTable.dup; 28 } 29 ~this(){ 30 // nothing to do 31 } 32 /// Returns: true if a NaInstruction exists in instruction table 33 bool hasInstruction(string name, ref NaInstruction instruction){ 34 foreach (inst; _instructionTable){ 35 if (name.lowercase == inst.name){ 36 instruction = inst; 37 return true; 38 } 39 } 40 return false; 41 } 42 /// ditto 43 bool hasInstruction(string name){ 44 NaInstruction dummy; 45 return hasInstruction(name, dummy); 46 } 47 /// adds an instruction to the instruction table 48 /// 49 /// Returns: true if it was added, false if not due to name or code already used 50 bool addInstructionToTable(NaInstruction instruction){ 51 foreach (inst; _instructionTable){ 52 if (inst.name == instruction.name || inst.code == instruction.code) 53 return false; 54 } 55 _instructionTable ~= instruction; 56 return true; 57 } 58 /// goes over bytecode, checks if there are any errors, and converts jump positions to indexes 59 /// i.e: makes the byte code a bit more ready for execution 60 /// 61 /// Returns: errors in a string[], or an empty array in case no errors 62 string[] resolve(){ 63 string[] errors; 64 uinteger[string] jumpPos; 65 uinteger instCount = 0; 66 for (uinteger i=0, instIndex=0; i < _instructions.length; i ++){ 67 string name = _instructions[i]; 68 if (name.length && name[$-1] == ':'){ 69 name = name[0 .. $-1]; 70 if (name.lowercase in jumpPos){ 71 errors ~= "line#"~(i+1).to!string~' '~name~" as jump postion declared multiple times"; 72 continue; 73 } 74 if (name.isNum(true)){ 75 errors ~= "line#"~(i+1).to!string~' '~name~" is an invalid jump position, cannot be digits only."; 76 continue; 77 } 78 jumpPos[name.lowercase] = instIndex; 79 continue; 80 } 81 instIndex ++; 82 instCount = instIndex; 83 } 84 for (uinteger i=0, writeIndex=0; i < _instructions.length; i ++){ 85 string name = _instructions[i]; 86 if (name.length && name[$-1] == ':') 87 continue; 88 if (writeIndex != i){ 89 _instructions[writeIndex] = name; 90 _arguments[writeIndex] = _arguments[i]; 91 } 92 NaInstruction instInfo; 93 if (this.hasInstruction(name, instInfo)){ 94 if (instInfo.needsArg && !_arguments[i].length) 95 errors ~= "line#"~(i+1).to!string~' '~name~" needs argument"; 96 if (instInfo.argIsJumpPos){ 97 string arg = _arguments[i].lowercase; 98 if (arg.isNum(false)){ // skip it if its an integer already 99 _arguments[writeIndex] = arg; 100 }else if (jumpPos.keys.hasElement(arg)) 101 _arguments[writeIndex] = jumpPos[arg].to!string; 102 else 103 errors ~= "line#"~(i+1).to!string~' '~arg~" is not a valid jump position"; 104 } 105 }else 106 errors ~= "line#"~(i+1).to!string~" instruction does not exist"; 107 writeIndex ++; 108 } 109 _instructions.length = instCount; 110 _arguments.length = instCount; 111 return errors; 112 } 113 /// Returns: the bytecode in a readable format 114 string[] getBytecodePretty(){ 115 string[] r; 116 r.length = _instructions.length; 117 foreach (i, inst; _instructions){ 118 r[i] = _instructions[i]; 119 if (_arguments[i].length) 120 r[i] ~= "\t\t" ~ _arguments[i]; 121 } 122 return r; 123 } 124 /// Call `resolve` before this or prepare for crashes 125 /// 126 /// Returns: pointers for all instructions 127 void delegate()[] getBytecodePointers(){ 128 void delegate()[] r; 129 r.length = _instructions.length; 130 foreach (i, inst; _instructions){ 131 NaInstruction instInfo; 132 hasInstruction(inst, instInfo); 133 r[i] = instInfo.pointer; 134 } 135 return r; 136 } 137 /// Call `resolve` before this or prepare for crashes 138 /// 139 /// Returns: codes for all instructions 140 ushort[] getBytecodeCodes(){ 141 ushort[] r; 142 r.length = _instructions.length; 143 foreach (i, inst; _instructions){ 144 NaInstruction instInfo; 145 hasInstruction(inst, instInfo); 146 r[i] = instInfo.code; 147 } 148 return r; 149 } 150 /// Call `resolve` before this. 151 /// 152 /// Returns: arguments for each instruction NaData[] 153 /// 154 /// Throws: Exception in case of error in argument 155 NaData[] getArgumentsNaData(){ 156 NaData[] r; 157 r.length = _arguments.length; 158 foreach (i, arg; _arguments){ 159 try{ 160 r[i] = readData(arg); 161 }catch (Exception e){ 162 e.msg = "Line#"~(i+1).to!string~' '~e.msg; 163 throw e; 164 } 165 } 166 return r; 167 } 168 /// Reads from a string[] (follows spec/syntax.md) 169 /// 170 /// Returns: errors in a string[], or [] if no errors 171 string[] readByteCode(string[] input){ 172 string[] errors; 173 immutable string[][] words = cast(immutable string[][])input.removeWhitespace.readWords(); 174 foreach (i, lineWords; words){ 175 if (!lineWords.length) 176 continue; 177 if (lineWords[0].length){ 178 if (lineWords[0][$-1] == ':'){ 179 this.addJumpPos(lineWords[0][0..$-1]); 180 continue; 181 } 182 string error = ""; 183 if (!this.addInstruction(lineWords[0], 184 lineWords.length>1 && lineWords[1].length ? lineWords[1] : "", error)) 185 errors ~= "line#"~(i+1).to!string~':'~error; 186 } 187 } 188 return errors; 189 } 190 // functions for generating byte code 191 192 /// appends an instruction 193 /// 194 /// Returns: true if successful, false if not (writes error in `error`) 195 bool addInstruction(string instName, string argument, ref string error){ 196 import navm.bytecode : removeWhitespace; 197 NaInstruction inst; 198 if (hasInstruction(instName, inst)){ 199 if (inst.needsArg && !removeWhitespace(argument).length){ 200 error = "instruction needs an argument"; 201 return false; 202 } 203 NaData arg; 204 if (inst.needsArg && !inst.argIsJumpPos){ 205 try{ 206 arg = readData(argument); 207 }catch (Exception e){ 208 error = "invalid argument: "~e.msg; 209 return false; 210 } 211 if (_stackLength < inst.popCount(arg)){ 212 error = "stack does not have enough elements for instruction"; 213 return false; 214 } 215 } 216 _instructions ~= instName; 217 _arguments ~= argument; 218 _stackLength -= inst.popCount(arg); 219 _stackLength += inst.pushCount; 220 _stackLengthMax = _stackLength > _stackLengthMax ? _stackLength : _stackLengthMax; 221 return true; 222 } 223 error = "instruction "~instName~" does not exist"; 224 return false; 225 } 226 /// ditto 227 bool addInstruction(string instName, string argument){ 228 string dummy; 229 return addInstruction(instName, argument, dummy); 230 } 231 /// ditto 232 bool addInstruction(string instName){ 233 string dummy; 234 return addInstruction(instName, "", dummy); 235 } 236 /// adds a jump position 237 void addJumpPos(string name){ 238 _instructions ~= name~':'; 239 _arguments ~= ""; 240 } 241 /// Returns: the number of elements on stack after executing the last added instruction 242 @property uinteger elementCount(){ 243 return _stackLength; 244 } 245 /// adds a "bookmark" to the last element on stack, so later on, relative to current peek index, bookmark index 246 /// can be read. 247 /// 248 /// Returns: bookmark id, or -1 if stack empty 249 integer addBookmark(){ 250 if (_stackLength == 0) 251 return -1; 252 integer bookmarkId; 253 for (bookmarkId = 0; bookmarkId <= integer.max; bookmarkId ++) 254 if (bookmarkId !in _bookmarks) 255 break; 256 _bookmarks[bookmarkId] = _stackLength-1; 257 return bookmarkId; 258 } 259 /// removes a bookmark 260 /// 261 /// Returns: true if successful, false if does not exists 262 bool removeBookmark(uinteger id){ 263 if (id !in _bookmarks) 264 return false; 265 _bookmarks.remove(id); 266 return true; 267 } 268 /// gets relative index from current stack index to a bookmark 269 /// 270 /// Returns: relative index, or integer.max if bookmark does not exist 271 integer bookmarkRelIndex(uinteger id){ 272 if (id !in _bookmarks) 273 return integer.max; 274 return _stackLength.to!integer - (_bookmarks[id]+1).to!integer; 275 } 276 } 277 278 /// stores an data about available instruction 279 public struct NaInstruction{ 280 bool argIsJumpPos = false; /// if the argument to this instruction is a jump position 281 string name; /// name of instruction, in lowercase 282 ushort code = 0x0000; /// value when read as a ubyte 283 bool needsArg; /// if this instruction needs an argument 284 ubyte pushCount = 0; /// number of elements it will push to stack 285 private ubyte _popCount = 0; /// number of elements it will pop (if ==255, then the argument is the number of elements to pop) 286 void delegate() pointer; /// pointer to the delegate behind this instruction 287 /// Returns: number of elements it will pop 288 ubyte popCount(NaData arg){ 289 if (_popCount < 255) 290 return _popCount; 291 return cast(ubyte)(arg.intVal); 292 } 293 /// constructor, for instruction with no arg, no push/pop 294 this (string name, integer code, void delegate() pointer){ 295 this.name = name.lowercase; 296 this.code = cast(ushort)code; 297 this.pointer = pointer; 298 this.needsArg = false; 299 this.pushCount = 0; 300 this._popCount = 0; 301 } 302 /// constructor, for instruction with no arg, but pop and push 303 this(string name, integer code, integer popCount, integer pushCount, void delegate() pointer){ 304 this.name = name.lowercase; 305 this.code = cast(ushort)code; 306 this.needsArg = false; 307 this.pushCount = cast(ubyte)pushCount; 308 this._popCount = cast(ubyte)popCount; 309 this.pointer = pointer; 310 } 311 /// full constructor but arg is not jump position 312 this (string name, integer code, bool needsArg, integer popCount, integer pushCount, void delegate() pointer){ 313 this.name = name.lowercase; 314 this.code = cast(ushort)code; 315 this.needsArg = needsArg; 316 this.argIsJumpPos = false; 317 this.pushCount = cast(ubyte)pushCount; 318 this._popCount = cast(ubyte)popCount; 319 this.pointer = pointer; 320 } 321 /// full constructor 322 this (string name, integer code, bool needsArg, bool argIsJumpPos, integer popCount, integer pushCount, void delegate() pointer){ 323 this.name = name.lowercase; 324 this.code = cast(ushort)code; 325 this.needsArg = needsArg; 326 this.argIsJumpPos = argIsJumpPos; 327 this.pushCount = cast(ubyte)pushCount; 328 this._popCount = cast(ubyte)popCount; 329 this.pointer = pointer; 330 } 331 } 332 333 /// Reads data from a string (which can be string, double, integer, or array of any of those types, or array of array...) 334 /// 335 /// Does not care if elements in array are of same type or not. 336 /// 337 /// Returns: the data in NaData 338 /// 339 /// Throws: Exception if data is invalid 340 public NaData readData(string strData){ 341 static string readElement(string array, uinteger startIndex){ 342 if (array[startIndex] == '[') 343 return array[startIndex .. bracketPos(cast(char[])array, startIndex)+1]; 344 // search for ] or , 345 uinteger i = startIndex; 346 while (i < array.length && ! [',',']'].hasElement(array[i])) 347 i ++; 348 return array[startIndex .. i]; 349 } 350 if (strData.length == 0) 351 return NaData(); 352 if (strData.isNum(false)) 353 return NaData(to!integer(strData)); 354 if (strData.isNum(true)) 355 return NaData(to!double(strData)); 356 if (["true", "false"].hasElement(strData)) 357 return NaData(strData == "true"); 358 // now checking for arrays 359 if (strData[0] == '['){ 360 NaData r = NaData(cast(NaData[])[]); 361 string[] elements = []; 362 for (uinteger i = 1, bracketEnd = bracketPos(cast(char[])strData, 0); i < bracketEnd; i ++){ 363 if (strData[i] == ' ') 364 continue; 365 if (strData[i] != ']'){ 366 elements ~= readElement(strData, i); 367 i += elements[$-1].length; 368 // skip till ',' 369 while (![',',']'].hasElement(strData[i])) 370 i ++; 371 } 372 } 373 // now convert each of those elements to NaData 374 r.arrayVal = new NaData[elements.length]; 375 foreach (i, element; elements) 376 r.arrayVal[i] = readData(element); 377 return r; 378 } 379 if (strData[0] == '\"'){ 380 // assume the whole thing is string, no need to find string end index 381 NaData r; 382 r.strVal = cast(dchar[])strReplaceSpecial(strData[1 .. $-1]).to!dstring; 383 return r; 384 } 385 if (strData[0] == '\''){ 386 NaData r; 387 strData = strData.dup; 388 strData = strReplaceSpecial(strData[1 .. $ -1]); 389 if (strData.length > 1) 390 throw new Exception("'' can only contain 1 character"); 391 if (strData.length < 1) 392 throw new Exception("no character provided in ''"); 393 r.dcharVal = strData[0]; 394 return r; 395 } 396 throw new Exception("invalid data"); 397 } 398 399 /// Removes whitespace from a string. And the remaining whitespace is only of one type. e.g: whitespace is ' ' & '\t', 400 /// it will replace '\t' with ' ' so less conditions are needed after this 401 /// 402 /// Returns: the string with minimal whitespace (just enough to separate identifiers) 403 private string removeWhitespace(char[] whitespace=[' ','\t'], char comment='#')(string line){ 404 char[] r = []; 405 bool lastWasWhite = true; 406 for (uinteger i = 0; i < line.length; i ++){ 407 if (line[i] == comment){ 408 break; 409 } 410 if (line[i] == '\"'){ 411 integer endIndex = line.strEnd(i); 412 if (endIndex == -1) 413 throw new Exception("string not terminated"); 414 r ~= line[i .. endIndex + 1]; 415 i = endIndex; 416 lastWasWhite = false; 417 continue; 418 } 419 if (whitespace.hasElement(line[i])){ 420 if (!lastWasWhite){ 421 r ~= whitespace[0]; 422 } 423 lastWasWhite = true; 424 }else{ 425 lastWasWhite = false; 426 r ~= line[i]; 427 } 428 } 429 if (r.length > 0 && whitespace.hasElement(r[$-1])){ 430 r = r[0 .. $-1]; 431 } 432 return cast(string)r; 433 } 434 /// 435 unittest{ 436 assert("potato potato".removeWhitespace == "potato potato"); 437 assert("potato \t\t".removeWhitespace == "potato"); 438 assert("potato \t \t potato \t#comment".removeWhitespace == "potato potato"); 439 assert (" ".removeWhitespace == ""); 440 assert (" \t \t".removeWhitespace == ""); 441 assert (" # \t \t".removeWhitespace == ""); 442 assert ("# # \t \t".removeWhitespace == ""); 443 } 444 445 /// Removes whitespace from multiple strings. 446 /// 447 /// Returns: string[] with minimal whitespace 448 string[] removeWhitespace(char[] whitespace=[' ','\t'], char comment='#')(string[] input){ 449 input = input.dup; 450 uinteger writeIndex = 0; 451 for (uinteger i = 0; i < input.length; i ++){ 452 const string line = input[i].removeWhitespace; 453 input[writeIndex] = line; 454 writeIndex ++; 455 } 456 input.length = writeIndex; 457 return input; 458 } 459 460 /// reads each line into words (separated by tab and space) 461 /// 462 /// Returns: the words read (stuff inside square brackets is considerd a sinle word) 463 /// 464 /// Throws: Exception if incorrect syntax (in brackets usually) 465 private string[][] readWords(string[] input){ 466 List!(string[]) lines = new List!(string[]); 467 List!string words = new List!string; 468 foreach (line; input){ 469 for (uinteger i = 0, readFrom = 0; i < line.length; i++){ 470 if (line[i] == '['){ 471 if (readFrom < i){ 472 words.append(line[readFrom .. i]); 473 readFrom = i; 474 } 475 i = bracketPos(cast(char[])line, i); 476 words.append(line[readFrom .. i + 1]); 477 i ++; // skip the bracket end, the increment done by for will skip the space too 478 readFrom = i+1; 479 continue; 480 } 481 if (line[i] == '\"' || line[i] == '\''){ 482 if (readFrom < i){ 483 words.append(line[readFrom .. i]); 484 readFrom = i; 485 } 486 immutable integer endIndex = line[i] == '\"' ? strEnd(line,i) : strEnd!('\\','\'')(line, i); 487 if (endIndex == -1) 488 throw new Exception("string not terminated"); 489 i = endIndex; 490 words.append(line[readFrom .. i+1]); 491 readFrom = i + 1; 492 continue; 493 } 494 if (line[i] == ' ' || line[i] == '\t'){ 495 if (readFrom <= i && removeWhitespace(line[readFrom .. i]).length > 0) 496 words.append(line[readFrom .. i]); 497 readFrom = i + 1; 498 }else if (i +1 == line.length && readFrom <= i){ 499 words.append(line[readFrom .. i + 1]); 500 } 501 } 502 lines.append(words.toArray); 503 words.clear; 504 } 505 .destroy(words); 506 string[][] r = lines.toArray; 507 .destroy(lines); 508 return r; 509 } 510 /// 511 unittest{ 512 assert(["potato potato", 513 "potato [asdf, sdfsdf, [0, 1, 2], 2] asd", 514 " \t", 515 "potato \"some String\" \'c\'"].readWords == [ 516 ["potato", "potato"], 517 ["potato", "[asdf, sdfsdf, [0, 1, 2], 2]", "asd"], 518 ["potato","\"some String\"","\'c\'"] 519 ]); 520 } 521 522 /// Returns: the index where a string ends, -1 if not terminated 523 private integer strEnd(char specialCharBegin='\\', char strTerminator='"')(string s, uinteger startIndex){ 524 uinteger i; 525 for (i = startIndex+1; i < s.length; i ++){ 526 if (s[i] == strTerminator){ 527 return i; 528 } 529 if (s[i] == specialCharBegin){ 530 i ++; 531 continue; 532 } 533 } 534 return -1; 535 } 536 /// 537 unittest{ 538 assert("st\"sdfsdfsd\"0".strEnd(2) == 11); 539 } 540 541 /// Returns: string with special characters replaced with their actual characters (i.e, \t replaced with tab, \n with newline...) 542 private string strReplaceSpecial(char specialCharBegin='\\', char[char] map = ['t' : '\t', 'n' : '\n','\\':'\\']) 543 (string s){ 544 char[] r = []; 545 for (uinteger i = 0; i < s.length; i ++){ 546 if (s[i] == specialCharBegin && i + 1 < s.length && s[i+1] in map){ 547 r ~= map[s[i+1]]; 548 i++; 549 continue; 550 } 551 r ~= s[i]; 552 } 553 return r; 554 } 555 /// 556 unittest{ 557 assert("newline:\\ntab:\\t".strReplaceSpecial == "newline:\ntab:\t"); 558 }