565 lines
16 KiB
Plaintext
565 lines
16 KiB
Plaintext
|
{{
|
||
|
Lex
|
||
|
|
||
|
usage: lex filename [options]
|
||
|
|
||
|
Reads filename.spn on SD card, writes tokens to filename.tok.
|
||
|
|
||
|
}}
|
||
|
con
|
||
|
|
||
|
_stack = 1000
|
||
|
|
||
|
obj
|
||
|
ios: "ios"
|
||
|
kw: "keywords"
|
||
|
str: "stringx"
|
||
|
bt: "bintree"
|
||
|
floatString: "FltStr"
|
||
|
|
||
|
var
|
||
|
|
||
|
byte parastr[64]
|
||
|
|
||
|
pub Main | err, p
|
||
|
ios.start
|
||
|
err := \Try
|
||
|
if err
|
||
|
if err > 0
|
||
|
ios.print( err )
|
||
|
ios.printnl
|
||
|
else
|
||
|
ios.print( string("Error ") )
|
||
|
ios.printdec( err )
|
||
|
ios.printchar( 13 )
|
||
|
ios.printdec( TokLineNumber )
|
||
|
ios.printchar( "," )
|
||
|
ios.printdec( TokColumn + 1 )
|
||
|
ios.printchar( " " )
|
||
|
ios.printchar( "'" )
|
||
|
ios.print( pTokenText )
|
||
|
ios.printchar( "'" )
|
||
|
ios.printchar( 13 )
|
||
|
ios.sdclose
|
||
|
bt.Stop
|
||
|
ios.stop
|
||
|
|
||
|
var
|
||
|
long pTokenText
|
||
|
|
||
|
pri Try
|
||
|
bt.Init( 0, 0 )
|
||
|
ProcessCommandLine
|
||
|
if verbosity => 1
|
||
|
ios.print( string("version 100225", 13) )
|
||
|
Start
|
||
|
|
||
|
pri ProcessCommandLine | nArgs, l
|
||
|
|
||
|
ios.parastart
|
||
|
repeat while ios.paranext(@parastr) 'parameter einlesen
|
||
|
if byte[@parastr][0] == "/" 'option?
|
||
|
case byte[@parastr][1]
|
||
|
"?": ios.print(string("help: lex /i fn.spn /o fn.tok [options]")) '/?
|
||
|
ios.printnl
|
||
|
abort
|
||
|
"i": ifnot ios.paranext(@spinFilename) '/i fn.spn
|
||
|
ios.print(string("error: input-filname"))
|
||
|
ios.printnl
|
||
|
"o": ifnot ios.paranext(@outputFilename) '/o fn.tok
|
||
|
ios.print(string("error: output-filname"))
|
||
|
ios.printnl
|
||
|
"c": compile~~
|
||
|
"l": compile~~
|
||
|
"v": if ios.paranext(@parastr) '/v n
|
||
|
verbosity := str2dec(@parastr)
|
||
|
else
|
||
|
ios.print(string("error: verbosity-level"))
|
||
|
ios.printnl
|
||
|
else
|
||
|
ios.print(string("usage: lex /i fn.spn /o fn.tok [options]"))
|
||
|
|
||
|
pri str2dec(stradr)|buffer,counter
|
||
|
|
||
|
buffer := byte[stradr]
|
||
|
counter := (strsize(stradr) <# 11)
|
||
|
repeat while(counter--)
|
||
|
result *= 10
|
||
|
result += lookdownz(byte[stradr++]: "0".."9")
|
||
|
if(buffer == "-")
|
||
|
-result
|
||
|
|
||
|
con
|
||
|
MAXFILENAMELENGTH = 8 + 1 + 3 ' 8.3
|
||
|
MAXSTRINGBUFFERLENGTH = 32
|
||
|
|
||
|
var
|
||
|
byte spinFilename[MAXFILENAMELENGTH+1] ' input .spi file
|
||
|
byte outputFilename[MAXFILENAMELENGTH+1] ' output .tok file
|
||
|
byte stringBuffer[MAXSTRINGBUFFERLENGTH+1] ' temp string buffer
|
||
|
|
||
|
dat
|
||
|
verbosity byte 0 ' set by /V option
|
||
|
compile byte 0 ' set by /C or /L: if non-zero, run compile.bin automatically.
|
||
|
type long 0
|
||
|
lineNum word 0
|
||
|
column word 0
|
||
|
|
||
|
pri Start | v
|
||
|
|
||
|
kw.Init
|
||
|
pTokenText := TokGetPText
|
||
|
|
||
|
if verbosity => 2
|
||
|
ios.print( string("Reading ") )
|
||
|
ios.print( @spinFilename )
|
||
|
ios.printchar( 13 )
|
||
|
TokOpen( @spinFilename )
|
||
|
|
||
|
if verbosity => 2
|
||
|
ios.print( string("Writing ") )
|
||
|
ios.print( @outputFilename )
|
||
|
ios.printchar( 13 )
|
||
|
if ios.sdnewfile( @outputFilename )
|
||
|
abort string("can't open o-file")
|
||
|
ios.sdopen( "W", @outputFilename )
|
||
|
|
||
|
repeat
|
||
|
type := TokType
|
||
|
lineNum := TokLineNumber
|
||
|
column := Column
|
||
|
ios.sdputblk( 8, @type ) ' long + word + word
|
||
|
|
||
|
if type == kw#kINTLITERAL or type == kw#kFLOATLITERAL
|
||
|
v := TokValue
|
||
|
ios.sdputblk( 4, @v )
|
||
|
else
|
||
|
ios.sdputblk( strsize(pTokenText) + 1, pTokenText )
|
||
|
|
||
|
if TokType == kw#kEOF
|
||
|
quit
|
||
|
TokAdvance
|
||
|
ios.sdclose
|
||
|
|
||
|
dat 'TOKENIZER
|
||
|
|
||
|
pub TokOpen( pFilename )
|
||
|
|
||
|
if ios.sdopen( "R", pFilename ) <> 0
|
||
|
abort string("can't open file")
|
||
|
|
||
|
TokInitReadLine
|
||
|
|
||
|
insideDAT~
|
||
|
|
||
|
TokAdvance ' prime the pump
|
||
|
|
||
|
pub TokClose
|
||
|
ios.sdclose
|
||
|
|
||
|
{
|
||
|
bytes~
|
||
|
InitReadLine
|
||
|
GetNextToken ' prime the pump
|
||
|
|
||
|
t := cnt
|
||
|
repeat
|
||
|
case tokenType
|
||
|
EOL_TOKEN:
|
||
|
term.str( @tokenText )
|
||
|
EOF_TOKEN:
|
||
|
quit
|
||
|
ID_TOKEN:
|
||
|
term.str( @tokenText )
|
||
|
OP_TOKEN:
|
||
|
INT_TOKEN:
|
||
|
term.out( "#" )
|
||
|
term.dec( tokenValue )
|
||
|
FLOAT_TOKEN:
|
||
|
term.out( "F" )
|
||
|
term.str( floatString.FloatToString( tokenValue ) )
|
||
|
term.out( "\" )
|
||
|
GetNextToken
|
||
|
term.out( 13 )
|
||
|
'}
|
||
|
{ repeat while SkipWhitespace
|
||
|
term.dec( lineNo )
|
||
|
term.out( "," )
|
||
|
term.dec( cp+1 )
|
||
|
term.out( ":" )
|
||
|
if cp == lineLength
|
||
|
term.str( string("(eol)"))
|
||
|
else
|
||
|
term.out( lineBuffer[cp++] )
|
||
|
term.out( 13 )
|
||
|
'}
|
||
|
{
|
||
|
repeat
|
||
|
r := sdfat.pread( @buf, BUFFERSIZE )
|
||
|
if r < 0
|
||
|
quit
|
||
|
bytes += r
|
||
|
'}
|
||
|
{
|
||
|
repeat 100
|
||
|
r := sdfat.pgetc
|
||
|
if r < 0
|
||
|
quit
|
||
|
++bytes
|
||
|
term.dec(r)
|
||
|
term.out( "," )
|
||
|
'}
|
||
|
|
||
|
con
|
||
|
MAXTOKENLENGTH = 32
|
||
|
BUFFERSIZE = 200
|
||
|
|
||
|
var
|
||
|
byte tokenText[MAXTOKENLENGTH+1]
|
||
|
long tokenType
|
||
|
word tokenLineNumber
|
||
|
word tokenCol
|
||
|
long tokenValue
|
||
|
byte insideDAT
|
||
|
|
||
|
pub TokGetPText
|
||
|
return @tokenText
|
||
|
|
||
|
pub TokType
|
||
|
return tokenType
|
||
|
|
||
|
pub TokColumn
|
||
|
return tokenCol
|
||
|
|
||
|
pub TokLineNumber
|
||
|
return tokenLineNumber
|
||
|
|
||
|
pub TokValue
|
||
|
return tokenValue
|
||
|
|
||
|
pub TokAdvance | i, radix, fp, x, hack
|
||
|
if inStringLiteral
|
||
|
if comma
|
||
|
comma~
|
||
|
ifnot tokenValue
|
||
|
abort string("Unterminated string")
|
||
|
if lineBuffer[cp] <> $22 ' double-quote
|
||
|
tokenType := kw#kCOMMA
|
||
|
return
|
||
|
++cp
|
||
|
inStringLiteral~
|
||
|
else
|
||
|
comma~~
|
||
|
tokenType := kw#kINTLITERAL
|
||
|
tokenValue := lineBuffer[cp++]
|
||
|
return
|
||
|
|
||
|
if TokSkipWhitespace
|
||
|
if cp == lineLength
|
||
|
tokenType := kw#kEOL
|
||
|
bytemove( @tokenText, string("(EOL)"), 6 )
|
||
|
tokenLineNumber := lineNo
|
||
|
tokenCol := cp
|
||
|
elseif lineBuffer[cp] == $22 ' double-quote
|
||
|
tokenLineNumber := LineNo
|
||
|
tokenCol := cp
|
||
|
++cp
|
||
|
if LineBuffer[cp] == $22
|
||
|
abort string("Empty string")
|
||
|
comma~
|
||
|
inStringLiteral~~
|
||
|
TokAdvance
|
||
|
elseif lineBuffer[cp] == "%" or lineBuffer[cp] == "$"
|
||
|
tokenType := kw#kINTLITERAL
|
||
|
tokenLineNumber := lineNo
|
||
|
tokenCol := cp
|
||
|
if lineBuffer[cp] == "%"
|
||
|
if lineBuffer[cp+1] == "%"
|
||
|
radix := 4
|
||
|
++cp
|
||
|
else
|
||
|
radix := 2
|
||
|
else
|
||
|
radix := 16
|
||
|
++cp
|
||
|
ifnot TokIsDigit( lineBuffer[cp], radix, false )
|
||
|
abort string("Bad character in number")
|
||
|
tokenValue~
|
||
|
repeat while TokIsDigit( lineBuffer[cp], radix, true )
|
||
|
if lineBuffer[cp] <> "_"
|
||
|
tokenValue := tokenValue * radix + TokDigitValue( linebuffer[cp] )
|
||
|
++cp
|
||
|
elseif TokIsDigit( lineBuffer[cp], 10, false )
|
||
|
tokenLineNumber := lineNo
|
||
|
tokenCol := cp
|
||
|
i~
|
||
|
fp~ {0 => integer
|
||
|
1 => have seen .
|
||
|
2 => have seen e or E
|
||
|
3 => have seen + or - }
|
||
|
repeat while TokIsDigit( lineBuffer[cp], 10, true ) {
|
||
|
} or ( fp < 1 and lineBuffer[cp] == "." and lineBuffer[cp+1] <> "." ) {
|
||
|
} or ( fp < 2 and (lineBuffer[cp] == "e" or lineBuffer[cp] == "E") ) {
|
||
|
} or ( (fp == 1 or fp == 2) and (lineBuffer[cp] == "+" or lineBuffer[cp] == "-") )
|
||
|
if lineBuffer[cp] == "."
|
||
|
fp := 1
|
||
|
if lineBuffer[cp] == "e" or lineBuffer[cp] == "E"
|
||
|
fp := 2
|
||
|
if lineBuffer[cp] == "+" or lineBuffer[cp] == "-"
|
||
|
fp := 3
|
||
|
if lineBuffer[cp] <> "_"
|
||
|
if i => MAXTOKENLENGTH
|
||
|
abort string("Token too long")
|
||
|
tokenText[i++] := lineBuffer[cp]
|
||
|
++cp
|
||
|
tokenText[i]~
|
||
|
if fp
|
||
|
tokenType := kw#kFLOATLITERAL
|
||
|
tokenValue := floatString.StringToFloat( @tokenText )
|
||
|
else
|
||
|
tokenType := kw#kINTLITERAL
|
||
|
tokenValue~
|
||
|
i~
|
||
|
repeat while tokenText[i]
|
||
|
tokenValue := tokenValue * 10 + TokDigitValue( tokenText[i++] )
|
||
|
elseif TokIsAlpha( lineBuffer[cp] )
|
||
|
i~
|
||
|
tokenLineNumber := lineNo
|
||
|
tokenCol := cp
|
||
|
repeat while TokIsAlpha( lineBuffer[cp] ) or TokIsDigit( lineBuffer[cp], 10, true )
|
||
|
if i => MAXTOKENLENGTH
|
||
|
abort string("Token too long")
|
||
|
tokenText[i++] := TokToUpper( lineBuffer[cp++] )
|
||
|
tokenText[i]~
|
||
|
x := kw.KeywordLookup( @tokenText )
|
||
|
if x
|
||
|
tokenType := bt.PeekL( x )
|
||
|
if kw#kCON =< tokenType and tokenType =< kw#kVAR and tokenCol == 0
|
||
|
insideDAT := tokenType == kw#kDAT
|
||
|
else
|
||
|
tokenType := kw#kID
|
||
|
else ' non-alphanumeric
|
||
|
tokenLineNumber := lineNo
|
||
|
tokenCol := cp
|
||
|
i~
|
||
|
if lineBuffer[cp] < $20 or lineBuffer[cp] => $80
|
||
|
abort string("Illegal character (UNICODE?)")
|
||
|
|
||
|
if insideDat and lineBuffer[cp] == ":" and TokIsAlpha( lineBuffer[cp+1] )
|
||
|
tokenText[i++] := lineBuffer[cp++]
|
||
|
repeat while TokIsAlpha( lineBuffer[cp] ) or TokIsDigit( lineBuffer[cp], 10, true )
|
||
|
if i => MAXTOKENLENGTH
|
||
|
abort string("Token too long")
|
||
|
tokenText[i++] := TokToUpper( lineBuffer[cp++] )
|
||
|
tokenText[i]~
|
||
|
tokenType := kw#kID
|
||
|
else
|
||
|
repeat
|
||
|
if cp < lineLength
|
||
|
tokenText[i++] := lineBuffer[cp++]
|
||
|
else
|
||
|
tokenText[i++] := " "
|
||
|
++cp
|
||
|
tokenText[i]~
|
||
|
while x := kw.KeywordLookup( @tokenText )
|
||
|
if i > 1
|
||
|
tokenText[--i]~ ' shorten token by 1
|
||
|
--cp
|
||
|
tokenType := bt.PeekL( kw.KeywordLookup( @tokenText ) )
|
||
|
else
|
||
|
tokenType := kw#kUNKNOWN
|
||
|
|
||
|
if TokIsBinary ' This next block is a total hack to see if a binary op
|
||
|
hack := blankLine ' hack is followed by "=" making it an assignment op
|
||
|
TokSkipWhitespace ' It parses "and ==" as "(and=) =" instead of "(and)(==)"
|
||
|
if lineBuffer[cp] == "=" ' PropTool does the latter. So this is wrong, but hopefully
|
||
|
tokenText[i++] := lineBuffer[cp++] ' not *too* wrong.
|
||
|
tokenText[i]~
|
||
|
tokenType |= kw#kASSIGNMENT
|
||
|
else
|
||
|
blankLine := hack ' restore (this hack is necessary to reset the eol condition trigger in SkipWhitespace)
|
||
|
else
|
||
|
tokenType := kw#kEOF
|
||
|
bytemove( @tokenText, string("(EOF)"), 6 )
|
||
|
tokenLineNumber := lineNo
|
||
|
tokenCol := cp
|
||
|
|
||
|
pri TokToUpper( ch )
|
||
|
if "a" =< ch and ch =< "z"
|
||
|
ch += constant( "A" - "a" )
|
||
|
return ch
|
||
|
|
||
|
pri TokIsAlpha( ch )
|
||
|
return "a" =< ch and ch =< "z" or "A" =< ch and ch =< "Z" or ch == "_"
|
||
|
|
||
|
pri TokIsDigit( ch, radix, u ) | upper
|
||
|
{{
|
||
|
Returns non-zero if ch is an acceptable digit given radix (2, 4, 10, or 16), false otherwise.
|
||
|
Pass u = true if the underscore ("_") is an acceptable value for ch.
|
||
|
}}
|
||
|
if ch == "_"
|
||
|
return u
|
||
|
upper := constant("0"-1) + radix
|
||
|
if radix =< 10
|
||
|
return "0" =< ch and ch =< upper
|
||
|
' else radix == 16
|
||
|
return "0" =< ch and ch =< "9" or "a" =< ch and ch =< "f" or "A" =< ch and ch =< "F"
|
||
|
|
||
|
pri TokDigitValue( ch )
|
||
|
if ch =< "9"
|
||
|
return ch - "0"
|
||
|
if ch =< "F"
|
||
|
return ch + constant( 10 - "A" )
|
||
|
if ch =< "f"
|
||
|
return ch + constant( 10 - "a" )
|
||
|
|
||
|
pub TokIsBinary
|
||
|
{{
|
||
|
Returns non-zero (not necessarily -1) if token is a binary operator.
|
||
|
Returns 0 otherwise.
|
||
|
}}
|
||
|
return ( tokenType & constant(kw#kUNARY|kw#kBINARY) ) == kw#kBINARY
|
||
|
|
||
|
var
|
||
|
byte lineBuffer[BUFFERSIZE]
|
||
|
byte lineLength
|
||
|
byte lineRemainder
|
||
|
byte eolLength
|
||
|
word lineNo
|
||
|
byte cp
|
||
|
byte blankLine
|
||
|
byte inStringLiteral ' indicates we are splitting up a string literal
|
||
|
byte comma ' flag for returning commas when splitting up a string literal
|
||
|
|
||
|
pri TokSkipWhitespace
|
||
|
'' Returns true on success, false on failure (eof).
|
||
|
'' lineBuffer[cp] is the next non-whitespace character
|
||
|
'' (comments count as whitespace).
|
||
|
repeat
|
||
|
repeat while cp => lineLength
|
||
|
ifnot blankLine
|
||
|
blankLine~~
|
||
|
return true
|
||
|
ifnot TokReadLine
|
||
|
return false
|
||
|
if lineBuffer[cp] == 9
|
||
|
abort string("Illegal TAB character")
|
||
|
if lineBuffer[cp] == "'" ' quote comment?
|
||
|
cp := lineLength ' skip to end of line
|
||
|
elseif lineBuffer[cp] == "}"
|
||
|
abort string("Unexpected '}'")
|
||
|
elseif lineBuffer[cp] == "{"
|
||
|
TokSkipComment
|
||
|
elseif lineBuffer[cp] == " "
|
||
|
++cp
|
||
|
else ' non-blank character
|
||
|
blankLine~
|
||
|
return true
|
||
|
|
||
|
pri TokSkipComment | depth
|
||
|
if cp+1 < lineLength and lineBuffer[cp+1] == "{"
|
||
|
++cp
|
||
|
repeat
|
||
|
repeat while cp => lineLength
|
||
|
ifnot TokReadLine
|
||
|
abort string("Unterminated comment")
|
||
|
if cp+1 < lineLength and lineBuffer[cp] == "}" and lineBuffer[cp+1] == "}"
|
||
|
cp += 2
|
||
|
return
|
||
|
++cp
|
||
|
else
|
||
|
++cp
|
||
|
depth := 1
|
||
|
repeat
|
||
|
repeat while cp => lineLength
|
||
|
ifnot TokReadLine
|
||
|
abort string("Unterminated comment")
|
||
|
if lineBuffer[cp] == "{"
|
||
|
++cp
|
||
|
++depth
|
||
|
elseif lineBuffer[cp] == "}"
|
||
|
++cp
|
||
|
ifnot --depth
|
||
|
return
|
||
|
else
|
||
|
++cp
|
||
|
|
||
|
pri TokInitReadLine
|
||
|
lineLength~
|
||
|
lineRemainder~
|
||
|
eolLength~
|
||
|
lineNo~
|
||
|
cp~
|
||
|
blankLine~~
|
||
|
inStringLiteral~
|
||
|
{
|
||
|
|<--------------- BUFFERSIZE --------------->|
|
||
|
+--------------------------------------------+
|
||
|
| | | | |
|
||
|
+--------------------------------------------+
|
||
|
|<-lineLength ->| | |
|
||
|
eolLength ->| |<- |
|
||
|
|<- lineRemainder ->|
|
||
|
}
|
||
|
pri TokReadLine | bytes, i
|
||
|
'' Reads a line into lineBuff. Returns true on success, false on failure (eof)
|
||
|
'' The line in lineBuff is terminated with a null byte. Calling programs should
|
||
|
'' not modify any memory in lineBuff after the null (the "remainder" section in
|
||
|
'' the diagram above) because that's the following line(s).
|
||
|
|
||
|
' First, move remainder up to start of buffer
|
||
|
bytemove( @lineBuffer, @lineBuffer[lineLength+eolLength], lineRemainder )
|
||
|
' Fill the rest of the buffer with new data
|
||
|
bytes := ios.sdgetblk( BUFFERSIZE-lineRemainder, @lineBuffer[lineRemainder] )
|
||
|
' pread returns #bytes read, but after eof it returns negative numbers
|
||
|
if bytes > 0
|
||
|
lineRemainder += bytes
|
||
|
|
||
|
ifnot lineRemainder
|
||
|
++lineNo
|
||
|
cp~
|
||
|
return false
|
||
|
|
||
|
repeat i from 0 to (lineRemainder-2) #> 0
|
||
|
if lineBuffer[i] == 13 or lineBuffer[i] == 10
|
||
|
eolLength := 1 ' cr or lf
|
||
|
if lineBuffer[i] == 13 and lineBuffer[i+1] == 10
|
||
|
++eolLength ' cr+lf
|
||
|
lineBuffer[i]~ ' set terminating null
|
||
|
lineLength := i
|
||
|
lineRemainder -= lineLength + eolLength
|
||
|
++lineNo ' first line of file is line 1
|
||
|
cp~
|
||
|
return true
|
||
|
|
||
|
if lineRemainder < BUFFERSIZE
|
||
|
lineLength := lineRemainder~
|
||
|
lineBuffer[lineLength]~
|
||
|
++lineNo
|
||
|
cp~
|
||
|
return true
|
||
|
|
||
|
abort string("Input line too long")
|
||
|
|
||
|
|
||
|
|
||
|
{{
|
||
|
Copyright (c) 2009 Michael Park
|
||
|
+------------------------------------------------------------------------------------------------------------------------------+
|
||
|
| TERMS OF USE: MIT License |
|
||
|
+------------------------------------------------------------------------------------------------------------------------------+
|
||
|
|Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation |
|
||
|
|files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, |
|
||
|
|modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software|
|
||
|
|is furnished to do so, subject to the following conditions: |
|
||
|
| |
|
||
|
|The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.|
|
||
|
| |
|
||
|
|THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE |
|
||
|
|WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR |
|
||
|
|COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
|
||
|
|ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
||
|
+------------------------------------------------------------------------------------------------------------------------------+
|
||
|
}}
|
||
|
|