TriOS-alt/zubehör/sphinx/hive-port/tokenizer/lex.spin

565 lines
16 KiB
Plaintext

{{
Lex
usage: lex filename [options]
Reads filename.spn on SD card, writes tokens to filename.tok.
}}
con
_stack = 1000
obj
ios: "ios"
kw: "keywords"
str: "stringx"
bt: "bintree"
floatString: "FltStr"
var
byte parastr[64]
pub Main | err, p
ios.start
err := \Try
if err
if err > 0
ios.print( err )
ios.printnl
else
ios.print( string("Error ") )
ios.printdec( err )
ios.printchar( 13 )
ios.printdec( TokLineNumber )
ios.printchar( "," )
ios.printdec( TokColumn + 1 )
ios.printchar( " " )
ios.printchar( "'" )
ios.print( pTokenText )
ios.printchar( "'" )
ios.printchar( 13 )
ios.sdclose
bt.Stop
ios.stop
var
long pTokenText
pri Try
bt.Init( 0, 0 )
ProcessCommandLine
if verbosity => 1
ios.print( string("version 100225", 13) )
Start
pri ProcessCommandLine | nArgs, l
ios.parastart
repeat while ios.paranext(@parastr) 'parameter einlesen
if byte[@parastr][0] == "/" 'option?
case byte[@parastr][1]
"?": ios.print(string("help: lex /i fn.spn /o fn.tok [options]")) '/?
ios.printnl
abort
"i": ifnot ios.paranext(@spinFilename) '/i fn.spn
ios.print(string("error: input-filname"))
ios.printnl
"o": ifnot ios.paranext(@outputFilename) '/o fn.tok
ios.print(string("error: output-filname"))
ios.printnl
"c": compile~~
"l": compile~~
"v": if ios.paranext(@parastr) '/v n
verbosity := str2dec(@parastr)
else
ios.print(string("error: verbosity-level"))
ios.printnl
else
ios.print(string("usage: lex /i fn.spn /o fn.tok [options]"))
pri str2dec(stradr)|buffer,counter
buffer := byte[stradr]
counter := (strsize(stradr) <# 11)
repeat while(counter--)
result *= 10
result += lookdownz(byte[stradr++]: "0".."9")
if(buffer == "-")
-result
con
MAXFILENAMELENGTH = 8 + 1 + 3 ' 8.3
MAXSTRINGBUFFERLENGTH = 32
var
byte spinFilename[MAXFILENAMELENGTH+1] ' input .spi file
byte outputFilename[MAXFILENAMELENGTH+1] ' output .tok file
byte stringBuffer[MAXSTRINGBUFFERLENGTH+1] ' temp string buffer
dat
verbosity byte 0 ' set by /V option
compile byte 0 ' set by /C or /L: if non-zero, run compile.bin automatically.
type long 0
lineNum word 0
column word 0
pri Start | v
kw.Init
pTokenText := TokGetPText
if verbosity => 2
ios.print( string("Reading ") )
ios.print( @spinFilename )
ios.printchar( 13 )
TokOpen( @spinFilename )
if verbosity => 2
ios.print( string("Writing ") )
ios.print( @outputFilename )
ios.printchar( 13 )
if ios.sdnewfile( @outputFilename )
abort string("can't open o-file")
ios.sdopen( "W", @outputFilename )
repeat
type := TokType
lineNum := TokLineNumber
column := Column
ios.sdputblk( 8, @type ) ' long + word + word
if type == kw#kINTLITERAL or type == kw#kFLOATLITERAL
v := TokValue
ios.sdputblk( 4, @v )
else
ios.sdputblk( strsize(pTokenText) + 1, pTokenText )
if TokType == kw#kEOF
quit
TokAdvance
ios.sdclose
dat 'TOKENIZER
pub TokOpen( pFilename )
if ios.sdopen( "R", pFilename ) <> 0
abort string("can't open file")
TokInitReadLine
insideDAT~
TokAdvance ' prime the pump
pub TokClose
ios.sdclose
{
bytes~
InitReadLine
GetNextToken ' prime the pump
t := cnt
repeat
case tokenType
EOL_TOKEN:
term.str( @tokenText )
EOF_TOKEN:
quit
ID_TOKEN:
term.str( @tokenText )
OP_TOKEN:
INT_TOKEN:
term.out( "#" )
term.dec( tokenValue )
FLOAT_TOKEN:
term.out( "F" )
term.str( floatString.FloatToString( tokenValue ) )
term.out( "\" )
GetNextToken
term.out( 13 )
'}
{ repeat while SkipWhitespace
term.dec( lineNo )
term.out( "," )
term.dec( cp+1 )
term.out( ":" )
if cp == lineLength
term.str( string("(eol)"))
else
term.out( lineBuffer[cp++] )
term.out( 13 )
'}
{
repeat
r := sdfat.pread( @buf, BUFFERSIZE )
if r < 0
quit
bytes += r
'}
{
repeat 100
r := sdfat.pgetc
if r < 0
quit
++bytes
term.dec(r)
term.out( "," )
'}
con
MAXTOKENLENGTH = 32
BUFFERSIZE = 200
var
byte tokenText[MAXTOKENLENGTH+1]
long tokenType
word tokenLineNumber
word tokenCol
long tokenValue
byte insideDAT
pub TokGetPText
return @tokenText
pub TokType
return tokenType
pub TokColumn
return tokenCol
pub TokLineNumber
return tokenLineNumber
pub TokValue
return tokenValue
pub TokAdvance | i, radix, fp, x, hack
if inStringLiteral
if comma
comma~
ifnot tokenValue
abort string("Unterminated string")
if lineBuffer[cp] <> $22 ' double-quote
tokenType := kw#kCOMMA
return
++cp
inStringLiteral~
else
comma~~
tokenType := kw#kINTLITERAL
tokenValue := lineBuffer[cp++]
return
if TokSkipWhitespace
if cp == lineLength
tokenType := kw#kEOL
bytemove( @tokenText, string("(EOL)"), 6 )
tokenLineNumber := lineNo
tokenCol := cp
elseif lineBuffer[cp] == $22 ' double-quote
tokenLineNumber := LineNo
tokenCol := cp
++cp
if LineBuffer[cp] == $22
abort string("Empty string")
comma~
inStringLiteral~~
TokAdvance
elseif lineBuffer[cp] == "%" or lineBuffer[cp] == "$"
tokenType := kw#kINTLITERAL
tokenLineNumber := lineNo
tokenCol := cp
if lineBuffer[cp] == "%"
if lineBuffer[cp+1] == "%"
radix := 4
++cp
else
radix := 2
else
radix := 16
++cp
ifnot TokIsDigit( lineBuffer[cp], radix, false )
abort string("Bad character in number")
tokenValue~
repeat while TokIsDigit( lineBuffer[cp], radix, true )
if lineBuffer[cp] <> "_"
tokenValue := tokenValue * radix + TokDigitValue( linebuffer[cp] )
++cp
elseif TokIsDigit( lineBuffer[cp], 10, false )
tokenLineNumber := lineNo
tokenCol := cp
i~
fp~ {0 => integer
1 => have seen .
2 => have seen e or E
3 => have seen + or - }
repeat while TokIsDigit( lineBuffer[cp], 10, true ) {
} or ( fp < 1 and lineBuffer[cp] == "." and lineBuffer[cp+1] <> "." ) {
} or ( fp < 2 and (lineBuffer[cp] == "e" or lineBuffer[cp] == "E") ) {
} or ( (fp == 1 or fp == 2) and (lineBuffer[cp] == "+" or lineBuffer[cp] == "-") )
if lineBuffer[cp] == "."
fp := 1
if lineBuffer[cp] == "e" or lineBuffer[cp] == "E"
fp := 2
if lineBuffer[cp] == "+" or lineBuffer[cp] == "-"
fp := 3
if lineBuffer[cp] <> "_"
if i => MAXTOKENLENGTH
abort string("Token too long")
tokenText[i++] := lineBuffer[cp]
++cp
tokenText[i]~
if fp
tokenType := kw#kFLOATLITERAL
tokenValue := floatString.StringToFloat( @tokenText )
else
tokenType := kw#kINTLITERAL
tokenValue~
i~
repeat while tokenText[i]
tokenValue := tokenValue * 10 + TokDigitValue( tokenText[i++] )
elseif TokIsAlpha( lineBuffer[cp] )
i~
tokenLineNumber := lineNo
tokenCol := cp
repeat while TokIsAlpha( lineBuffer[cp] ) or TokIsDigit( lineBuffer[cp], 10, true )
if i => MAXTOKENLENGTH
abort string("Token too long")
tokenText[i++] := TokToUpper( lineBuffer[cp++] )
tokenText[i]~
x := kw.KeywordLookup( @tokenText )
if x
tokenType := bt.PeekL( x )
if kw#kCON =< tokenType and tokenType =< kw#kVAR and tokenCol == 0
insideDAT := tokenType == kw#kDAT
else
tokenType := kw#kID
else ' non-alphanumeric
tokenLineNumber := lineNo
tokenCol := cp
i~
if lineBuffer[cp] < $20 or lineBuffer[cp] => $80
abort string("Illegal character (UNICODE?)")
if insideDat and lineBuffer[cp] == ":" and TokIsAlpha( lineBuffer[cp+1] )
tokenText[i++] := lineBuffer[cp++]
repeat while TokIsAlpha( lineBuffer[cp] ) or TokIsDigit( lineBuffer[cp], 10, true )
if i => MAXTOKENLENGTH
abort string("Token too long")
tokenText[i++] := TokToUpper( lineBuffer[cp++] )
tokenText[i]~
tokenType := kw#kID
else
repeat
if cp < lineLength
tokenText[i++] := lineBuffer[cp++]
else
tokenText[i++] := " "
++cp
tokenText[i]~
while x := kw.KeywordLookup( @tokenText )
if i > 1
tokenText[--i]~ ' shorten token by 1
--cp
tokenType := bt.PeekL( kw.KeywordLookup( @tokenText ) )
else
tokenType := kw#kUNKNOWN
if TokIsBinary ' This next block is a total hack to see if a binary op
hack := blankLine ' hack is followed by "=" making it an assignment op
TokSkipWhitespace ' It parses "and ==" as "(and=) =" instead of "(and)(==)"
if lineBuffer[cp] == "=" ' PropTool does the latter. So this is wrong, but hopefully
tokenText[i++] := lineBuffer[cp++] ' not *too* wrong.
tokenText[i]~
tokenType |= kw#kASSIGNMENT
else
blankLine := hack ' restore (this hack is necessary to reset the eol condition trigger in SkipWhitespace)
else
tokenType := kw#kEOF
bytemove( @tokenText, string("(EOF)"), 6 )
tokenLineNumber := lineNo
tokenCol := cp
pri TokToUpper( ch )
if "a" =< ch and ch =< "z"
ch += constant( "A" - "a" )
return ch
pri TokIsAlpha( ch )
return "a" =< ch and ch =< "z" or "A" =< ch and ch =< "Z" or ch == "_"
pri TokIsDigit( ch, radix, u ) | upper
{{
Returns non-zero if ch is an acceptable digit given radix (2, 4, 10, or 16), false otherwise.
Pass u = true if the underscore ("_") is an acceptable value for ch.
}}
if ch == "_"
return u
upper := constant("0"-1) + radix
if radix =< 10
return "0" =< ch and ch =< upper
' else radix == 16
return "0" =< ch and ch =< "9" or "a" =< ch and ch =< "f" or "A" =< ch and ch =< "F"
pri TokDigitValue( ch )
if ch =< "9"
return ch - "0"
if ch =< "F"
return ch + constant( 10 - "A" )
if ch =< "f"
return ch + constant( 10 - "a" )
pub TokIsBinary
{{
Returns non-zero (not necessarily -1) if token is a binary operator.
Returns 0 otherwise.
}}
return ( tokenType & constant(kw#kUNARY|kw#kBINARY) ) == kw#kBINARY
var
byte lineBuffer[BUFFERSIZE]
byte lineLength
byte lineRemainder
byte eolLength
word lineNo
byte cp
byte blankLine
byte inStringLiteral ' indicates we are splitting up a string literal
byte comma ' flag for returning commas when splitting up a string literal
pri TokSkipWhitespace
'' Returns true on success, false on failure (eof).
'' lineBuffer[cp] is the next non-whitespace character
'' (comments count as whitespace).
repeat
repeat while cp => lineLength
ifnot blankLine
blankLine~~
return true
ifnot TokReadLine
return false
if lineBuffer[cp] == 9
abort string("Illegal TAB character")
if lineBuffer[cp] == "'" ' quote comment?
cp := lineLength ' skip to end of line
elseif lineBuffer[cp] == "}"
abort string("Unexpected '}'")
elseif lineBuffer[cp] == "{"
TokSkipComment
elseif lineBuffer[cp] == " "
++cp
else ' non-blank character
blankLine~
return true
pri TokSkipComment | depth
if cp+1 < lineLength and lineBuffer[cp+1] == "{"
++cp
repeat
repeat while cp => lineLength
ifnot TokReadLine
abort string("Unterminated comment")
if cp+1 < lineLength and lineBuffer[cp] == "}" and lineBuffer[cp+1] == "}"
cp += 2
return
++cp
else
++cp
depth := 1
repeat
repeat while cp => lineLength
ifnot TokReadLine
abort string("Unterminated comment")
if lineBuffer[cp] == "{"
++cp
++depth
elseif lineBuffer[cp] == "}"
++cp
ifnot --depth
return
else
++cp
pri TokInitReadLine
lineLength~
lineRemainder~
eolLength~
lineNo~
cp~
blankLine~~
inStringLiteral~
{
|<--------------- BUFFERSIZE --------------->|
+--------------------------------------------+
| | | | |
+--------------------------------------------+
|<-lineLength ->| | |
eolLength ->| |<- |
|<- lineRemainder ->|
}
pri TokReadLine | bytes, i
'' Reads a line into lineBuff. Returns true on success, false on failure (eof)
'' The line in lineBuff is terminated with a null byte. Calling programs should
'' not modify any memory in lineBuff after the null (the "remainder" section in
'' the diagram above) because that's the following line(s).
' First, move remainder up to start of buffer
bytemove( @lineBuffer, @lineBuffer[lineLength+eolLength], lineRemainder )
' Fill the rest of the buffer with new data
bytes := ios.sdgetblk( BUFFERSIZE-lineRemainder, @lineBuffer[lineRemainder] )
' pread returns #bytes read, but after eof it returns negative numbers
if bytes > 0
lineRemainder += bytes
ifnot lineRemainder
++lineNo
cp~
return false
repeat i from 0 to (lineRemainder-2) #> 0
if lineBuffer[i] == 13 or lineBuffer[i] == 10
eolLength := 1 ' cr or lf
if lineBuffer[i] == 13 and lineBuffer[i+1] == 10
++eolLength ' cr+lf
lineBuffer[i]~ ' set terminating null
lineLength := i
lineRemainder -= lineLength + eolLength
++lineNo ' first line of file is line 1
cp~
return true
if lineRemainder < BUFFERSIZE
lineLength := lineRemainder~
lineBuffer[lineLength]~
++lineNo
cp~
return true
abort string("Input line too long")
{{
Copyright (c) 2009 Michael Park
+------------------------------------------------------------------------------------------------------------------------------+
| TERMS OF USE: MIT License |
+------------------------------------------------------------------------------------------------------------------------------+
|Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation |
|files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, |
|modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software|
|is furnished to do so, subject to the following conditions: |
| |
|The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.|
| |
|THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE |
|WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR |
|COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
|ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
+------------------------------------------------------------------------------------------------------------------------------+
}}