' 2009-05-02 Modified to detect when inside DAT, handle : specially (for local labels) con _clkmode = xtal1 + pll8x _xinfreq = 10_000_000 _stack = 1000 obj f: "sxfile" floatString: "FltStr" kw: "keywords" bt: "bintree" pub Open( pFilename ) if f.Open( pFilename, "R") <> 0 abort string("can't open file") InitReadLine insideDAT~ Advance ' prime the pump pub Close f.Close { bytes~ InitReadLine GetNextToken ' prime the pump t := cnt repeat case tokenType EOL_TOKEN: term.str( @tokenText ) EOF_TOKEN: quit ID_TOKEN: term.str( @tokenText ) OP_TOKEN: INT_TOKEN: term.out( "#" ) term.dec( tokenValue ) FLOAT_TOKEN: term.out( "F" ) term.str( floatString.FloatToString( tokenValue ) ) term.out( "\" ) GetNextToken term.out( 13 ) '} { repeat while SkipWhitespace term.dec( lineNo ) term.out( "," ) term.dec( cp+1 ) term.out( ":" ) if cp == lineLength term.str( string("(eol)")) else term.out( lineBuffer[cp++] ) term.out( 13 ) '} { repeat r := sdfat.pread( @buf, BUFFERSIZE ) if r < 0 quit bytes += r '} { repeat 100 r := sdfat.pgetc if r < 0 quit ++bytes term.dec(r) term.out( "," ) '} con MAXTOKENLENGTH = 32 BUFFERSIZE = 200 var byte tokenText[MAXTOKENLENGTH+1] long tokenType word tokenLineNumber word tokenCol long tokenValue byte insideDAT pub GetPText return @tokenText pub Type return tokenType pub Column return tokenCol pub LineNumber return tokenLineNumber pub Value return tokenValue pub Advance | i, radix, fp, x, hack if inStringLiteral if comma comma~ ifnot tokenValue abort string("Unterminated string") if lineBuffer[cp] <> $22 ' double-quote tokenType := kw#kCOMMA return ++cp inStringLiteral~ else comma~~ tokenType := kw#kINTLITERAL tokenValue := lineBuffer[cp++] return if SkipWhitespace if cp == lineLength tokenType := kw#kEOL bytemove( @tokenText, string("(EOL)"), 6 ) tokenLineNumber := lineNo tokenCol := cp elseif lineBuffer[cp] == $22 ' double-quote tokenLineNumber := LineNo tokenCol := cp ++cp if LineBuffer[cp] == $22 abort string("Empty string") comma~ inStringLiteral~~ Advance elseif lineBuffer[cp] == "%" or lineBuffer[cp] == "$" tokenType := kw#kINTLITERAL tokenLineNumber := lineNo tokenCol := cp if lineBuffer[cp] == "%" if lineBuffer[cp+1] == "%" radix := 4 ++cp else radix := 2 else radix := 16 ++cp ifnot IsDigit( lineBuffer[cp], radix, false ) abort string("Bad character in number") tokenValue~ repeat while IsDigit( lineBuffer[cp], radix, true ) if lineBuffer[cp] <> "_" tokenValue := tokenValue * radix + DigitValue( linebuffer[cp] ) ++cp elseif IsDigit( lineBuffer[cp], 10, false ) tokenLineNumber := lineNo tokenCol := cp i~ fp~ {0 => integer 1 => have seen . 2 => have seen e or E 3 => have seen + or - } repeat while IsDigit( lineBuffer[cp], 10, true ) { } or ( fp < 1 and lineBuffer[cp] == "." and lineBuffer[cp+1] <> "." ) { } or ( fp < 2 and (lineBuffer[cp] == "e" or lineBuffer[cp] == "E") ) { } or ( (fp == 1 or fp == 2) and (lineBuffer[cp] == "+" or lineBuffer[cp] == "-") ) if lineBuffer[cp] == "." fp := 1 if lineBuffer[cp] == "e" or lineBuffer[cp] == "E" fp := 2 if lineBuffer[cp] == "+" or lineBuffer[cp] == "-" fp := 3 if lineBuffer[cp] <> "_" if i => MAXTOKENLENGTH abort string("Token too long") tokenText[i++] := lineBuffer[cp] ++cp tokenText[i]~ if fp tokenType := kw#kFLOATLITERAL tokenValue := floatString.StringToFloat( @tokenText ) else tokenType := kw#kINTLITERAL tokenValue~ i~ repeat while tokenText[i] tokenValue := tokenValue * 10 + DigitValue( tokenText[i++] ) elseif IsAlpha( lineBuffer[cp] ) i~ tokenLineNumber := lineNo tokenCol := cp repeat while IsAlpha( lineBuffer[cp] ) or IsDigit( lineBuffer[cp], 10, true ) if i => MAXTOKENLENGTH abort string("Token too long") tokenText[i++] := ToUpper( lineBuffer[cp++] ) tokenText[i]~ x := kw.KeywordLookup( @tokenText ) if x tokenType := bt.PeekL( x ) if kw#kCON =< tokenType and tokenType =< kw#kVAR and tokenCol == 0 insideDAT := tokenType == kw#kDAT else tokenType := kw#kID else ' non-alphanumeric tokenLineNumber := lineNo tokenCol := cp i~ if lineBuffer[cp] < $20 or lineBuffer[cp] => $80 abort string("Illegal character (UNICODE?)") if insideDat and lineBuffer[cp] == ":" and IsAlpha( lineBuffer[cp+1] ) tokenText[i++] := lineBuffer[cp++] repeat while IsAlpha( lineBuffer[cp] ) or IsDigit( lineBuffer[cp], 10, true ) if i => MAXTOKENLENGTH abort string("Token too long") tokenText[i++] := ToUpper( lineBuffer[cp++] ) tokenText[i]~ tokenType := kw#kID else repeat if cp < lineLength tokenText[i++] := lineBuffer[cp++] else tokenText[i++] := " " ++cp tokenText[i]~ while x := kw.KeywordLookup( @tokenText ) if i > 1 tokenText[--i]~ ' shorten token by 1 --cp tokenType := bt.PeekL( kw.KeywordLookup( @tokenText ) ) else tokenType := kw#kUNKNOWN if IsBinary ' This next block is a total hack to see if a binary op hack := blankLine ' hack is followed by "=" making it an assignment op SkipWhitespace ' It parses "and ==" as "(and=) =" instead of "(and)(==)" if lineBuffer[cp] == "=" ' PropTool does the latter. So this is wrong, but hopefully tokenText[i++] := lineBuffer[cp++] ' not *too* wrong. tokenText[i]~ tokenType |= kw#kASSIGNMENT else blankLine := hack ' restore (this hack is necessary to reset the eol condition trigger in SkipWhitespace) else tokenType := kw#kEOF bytemove( @tokenText, string("(EOF)"), 6 ) tokenLineNumber := lineNo tokenCol := cp pri ToUpper( ch ) if "a" =< ch and ch =< "z" ch += constant( "A" - "a" ) return ch pri IsAlpha( ch ) return "a" =< ch and ch =< "z" or "A" =< ch and ch =< "Z" or ch == "_" pri IsDigit( ch, radix, u ) | upper {{ Returns non-zero if ch is an acceptable digit given radix (2, 4, 10, or 16), false otherwise. Pass u = true if the underscore ("_") is an acceptable value for ch. }} if ch == "_" return u upper := constant("0"-1) + radix if radix =< 10 return "0" =< ch and ch =< upper ' else radix == 16 return "0" =< ch and ch =< "9" or "a" =< ch and ch =< "f" or "A" =< ch and ch =< "F" pri DigitValue( ch ) if ch =< "9" return ch - "0" if ch =< "F" return ch + constant( 10 - "A" ) if ch =< "f" return ch + constant( 10 - "a" ) pub IsBinary {{ Returns non-zero (not necessarily -1) if token is a binary operator. Returns 0 otherwise. }} return ( tokenType & constant(kw#kUNARY|kw#kBINARY) ) == kw#kBINARY var byte lineBuffer[BUFFERSIZE] byte lineLength byte lineRemainder byte eolLength word lineNo byte cp byte blankLine byte inStringLiteral ' indicates we are splitting up a string literal byte comma ' flag for returning commas when splitting up a string literal pri SkipWhitespace '' Returns true on success, false on failure (eof). '' lineBuffer[cp] is the next non-whitespace character '' (comments count as whitespace). repeat repeat while cp => lineLength ifnot blankLine blankLine~~ return true ifnot ReadLine return false if lineBuffer[cp] == 9 abort string("Illegal TAB character") if lineBuffer[cp] == "'" ' quote comment? cp := lineLength ' skip to end of line elseif lineBuffer[cp] == "}" abort string("Unexpected '}'") elseif lineBuffer[cp] == "{" SkipComment elseif lineBuffer[cp] == " " ++cp else ' non-blank character blankLine~ return true pri SkipComment | depth if cp+1 < lineLength and lineBuffer[cp+1] == "{" ++cp repeat repeat while cp => lineLength ifnot ReadLine abort string("Unterminated comment") if cp+1 < lineLength and lineBuffer[cp] == "}" and lineBuffer[cp+1] == "}" cp += 2 return ++cp else ++cp depth := 1 repeat repeat while cp => lineLength ifnot ReadLine abort string("Unterminated comment") if lineBuffer[cp] == "{" ++cp ++depth elseif lineBuffer[cp] == "}" ++cp ifnot --depth return else ++cp pri InitReadLine lineLength~ lineRemainder~ eolLength~ lineNo~ cp~ blankLine~~ inStringLiteral~ { |<--------------- BUFFERSIZE --------------->| +--------------------------------------------+ | | | | | +--------------------------------------------+ |<-lineLength ->| | | eolLength ->| |<- | |<- lineRemainder ->| } pri ReadLine | bytes, i '' Reads a line into lineBuff. Returns true on success, false on failure (eof) '' The line in lineBuff is terminated with a null byte. Calling programs should '' not modify any memory in lineBuff after the null (the "remainder" section in '' the diagram above) because that's the following line(s). ' First, move remainder up to start of buffer bytemove( @lineBuffer, @lineBuffer[lineLength+eolLength], lineRemainder ) ' Fill the rest of the buffer with new data bytes := f.Read( @lineBuffer[lineRemainder], BUFFERSIZE-lineRemainder ) ' pread returns #bytes read, but after eof it returns negative numbers if bytes > 0 lineRemainder += bytes ifnot lineRemainder ++lineNo cp~ return false repeat i from 0 to (lineRemainder-2) #> 0 if lineBuffer[i] == 13 or lineBuffer[i] == 10 eolLength := 1 ' cr or lf if lineBuffer[i] == 13 and lineBuffer[i+1] == 10 ++eolLength ' cr+lf lineBuffer[i]~ ' set terminating null lineLength := i lineRemainder -= lineLength + eolLength ++lineNo ' first line of file is line 1 cp~ return true if lineRemainder < BUFFERSIZE lineLength := lineRemainder~ lineBuffer[lineLength]~ ++lineNo cp~ return true abort string("Input line too long") {{ Copyright (c) 2009 Michael Park +------------------------------------------------------------------------------------------------------------------------------+ | TERMS OF USE: MIT License | +------------------------------------------------------------------------------------------------------------------------------+ |Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation | |files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, | |modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software| |is furnished to do so, subject to the following conditions: | | | |The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.| | | |THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE | |WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | |COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | +------------------------------------------------------------------------------------------------------------------------------+ }}