-- file Scanner.Mesa -- last modified by Satterthwaite, January 8, 1980 10:33 AM DIRECTORY AltoDefs: FROM "altodefs" USING [CharsPerWord, maxword, PageSize], CharIO: FROM "chario" USING [CR, TAB, PutChar, PutNumber, PutString], LiteralOps: FROM "literalops" USING [FindDescriptor, Find, FindString], P1: FROM "p1" USING [Token], ParseTable: FROM "parsetable" USING [ Handle, HashIndex, TSymbol, VocabHashEntry, EndMarker, tokenARROW, tokenCHAR, tokenDOT, tokenDOTS, tokenEQUAL, tokenGE, tokenGREATER, tokenID, tokenLE, tokenLESS, tokenLNUM, tokenLSTR, tokenMINUS, tokenNUM, tokenSTR], StreamDefs: FROM "streamdefs" USING [ StreamHandle, StreamIndex, GetIndex, ModifyIndex, NormalizeIndex, ReadBlock, SetIndex, StreamError], StringDefs: FROM "stringdefs" USING [SubStringDescriptor, AppendString], SymbolOps: FROM "symbolops" USING [EnterString], SystemDefs: FROM "systemdefs" USING [ AllocateHeapString, AllocatePages, FreeHeapString, FreePages, PruneHeap]; Scanner: PROGRAM IMPORTS CharIO, LiteralOps, StreamDefs, StringDefs, SymbolOps, SystemDefs EXPORTS P1 = BEGIN OPEN ParseTable; hashTab: POINTER TO ARRAY HashIndex OF VocabHashEntry; scanTab: POINTER TO ARRAY CHARACTER [40C..177C] OF TSymbol; vocab: STRING; vocabIndex: POINTER TO ARRAY TSymbol OF CARDINAL; NUL: CHARACTER = 0C; CR: CHARACTER = CharIO.CR; ControlZ: CHARACTER = 32C; -- Bravo escape char stream: StreamDefs.StreamHandle; -- the input stream streamOrigin: StreamDefs.StreamIndex; errorStream: StreamDefs.StreamHandle; -- the error stream TextPages: CARDINAL = 6; TextWords: CARDINAL = TextPages*AltoDefs.PageSize; TextChars: CARDINAL = TextWords*AltoDefs.CharsPerWord; tB: POINTER TO PACKED ARRAY [0..TextChars) OF CHARACTER; tI, tMax: [0..TextChars]; tOrigin, tLimit: CARDINAL; tEnded: BOOLEAN; FillTextBuffer: PROCEDURE = BEGIN words: [0..TextWords]; bytes: [0..AltoDefs.CharsPerWord); tOrigin _ tLimit; IF tEnded THEN tMax _ 0 ELSE BEGIN words _ StreamDefs.ReadBlock[stream, tB, TextWords]; bytes _ StreamDefs.GetIndex[stream].byte MOD AltoDefs.CharsPerWord; IF bytes # 0 THEN words _ words-1; tMax _ words*AltoDefs.CharsPerWord + bytes; IF tMax < TextChars THEN tEnded _ TRUE; tLimit _ tOrigin + tMax; END; IF tMax = 0 THEN BEGIN tB[0] _ NUL; tMax _ 1 END; tI _ 0; END; buffer: STRING _ NIL; -- token assembly area iMax: CARDINAL; -- iMax = buffer.maxlength desc: StringDefs.SubStringDescriptor; -- initial buffer segment nTokens: CARDINAL; -- token count nErrors: CARDINAL; -- lexical errors BufferOverflow: ERROR = CODE; ExpandBuffer: PROCEDURE = BEGIN oldBuffer: STRING _ buffer; IF oldBuffer.length > 2000 THEN ERROR BufferOverflow; buffer _ SystemDefs.AllocateHeapString[2*oldBuffer.length]; StringDefs.AppendString[buffer, oldBuffer]; iMax _ buffer.length _ buffer.maxlength; SystemDefs.FreeHeapString[oldBuffer]; desc.base _ buffer; END; char: CHARACTER; -- current (most recently scanned) character NextChar: PROCEDURE = -- also expanded inline within Atom BEGIN IF (tI_tI+1) = tMax THEN FillTextBuffer[]; char _ tB[tI]; END; Atom: PUBLIC PROCEDURE RETURNS [token: P1.Token] = BEGIN OPEN token; DO WHILE char IN [NUL..' ] DO SELECT char FROM ControlZ => UNTIL char = CR DO IF (tI_tI+1) = tMax THEN BEGIN IF tEnded THEN GO TO EndFile; FillTextBuffer[] END; char _ tB[tI]; ENDLOOP; ENDCASE; IF (tI_tI+1) = tMax THEN BEGIN IF tEnded THEN GO TO EndFile; FillTextBuffer[] END; char _ tB[tI]; ENDLOOP; index _ tOrigin + tI; value _ 0; SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z => BEGIN i: CARDINAL; i _ 0; DO buffer[i] _ char; IF (tI_tI+1) = tMax THEN FillTextBuffer[]; char _ tB[tI]; SELECT char FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => IF (i _ i+1) >= iMax THEN ExpandBuffer[]; ENDCASE => EXIT; ENDLOOP; desc.length _ i+1; class _ tokenID; value _ SymbolOps.EnterString[@desc]; GO TO GotNext END; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => BEGIN first, last: CARDINAL; uId: BOOLEAN; i, j: CARDINAL; h: HashIndex; s1, s2: CARDINAL; i _ 0; uId _ TRUE; first _ last _ char-0C; DO buffer[i] _ char; IF (tI_tI+1) = tMax THEN FillTextBuffer[]; char _ tB[tI]; SELECT char FROM IN ['A..'Z] => BEGIN last _ char-0C; IF (i _ i+1) >= iMax THEN ExpandBuffer[]; END; IN ['a..'z], IN ['0..'9] => BEGIN uId _ FALSE; IF (i _ i+1) >= iMax THEN ExpandBuffer[]; END; ENDCASE => EXIT; ENDLOOP; i _ i+1; IF uId THEN BEGIN h _ ((first*128-first) + last) MOD LAST[HashIndex] + 1; WHILE (j _ hashTab[h].symbol) # 0 DO IF vocabIndex[j]-(s2_vocabIndex[j-1]) = i THEN FOR s1 IN [0 .. i) DO IF buffer[s1] # vocab[s2] THEN EXIT; s2 _ s2+1; REPEAT FINISHED => BEGIN class _ j; GO TO GotNext END; ENDLOOP; IF (h _ hashTab[h].link) = 0 THEN EXIT; ENDLOOP; END; desc.length _ i; class _ tokenID; value _ SymbolOps.EnterString[@desc]; GO TO GotNext END; '0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => BEGIN v, v10, v8: LONG INTEGER; scale: CARDINAL; valid, valid10, valid8, octal: BOOLEAN; MaxLiteral: CARDINAL = AltoDefs.maxword; vRep: ARRAY [0..SIZE[LONG INTEGER]) OF WORD; -- machine dependent v10 _ v8 _ 0; valid10 _ valid8 _ TRUE; WHILE char IN ['0..'9] DO IF valid10 THEN [v10, valid10] _ AppendDigit10[v10, char]; IF valid8 THEN [v8, valid8] _ AppendDigit8[v8, char]; NextChar[]; ENDLOOP; SELECT char FROM 'B, 'C => BEGIN class _ IF char = 'C THEN tokenCHAR ELSE tokenNUM; v _ v8; valid _ valid8; octal _ TRUE; END; ENDCASE => BEGIN class _ tokenNUM; v _ v10; valid _ valid10; octal _ FALSE; END; SELECT char FROM 'B, 'C, 'D => BEGIN NextChar[]; IF class = tokenNUM THEN BEGIN scale _ 0; WHILE char IN ['0..'9] DO scale _ 10*scale + CARDINAL[char-'0]; NextChar[]; ENDLOOP; THROUGH [1 .. scale] WHILE valid DO IF octal THEN [v, valid] _ AppendDigit8[v, '0] ELSE [v, valid] _ AppendDigit10[v, '0]; ENDLOOP; END; END; ENDCASE; vRep _ LOOPHOLE[v]; IF vRep[1] = 0 --v <= MaxLiteral-- THEN value _ LiteralOps.Find[vRep[0]] ELSE BEGIN IF class = tokenCHAR THEN valid _ FALSE; class _ tokenLNUM; value _ LiteralOps.FindDescriptor[DESCRIPTOR[vRep]]; END; IF ~valid THEN BEGIN nErrors _ nErrors + 1; ScanError[number, index]; END; GO TO GotNext END; ',, ';, ':, '_, '#, '~, '+, '*, '/, '^, '@, '!, '(, '), '[, '], '{, '} => BEGIN class _ scanTab[char]; GO TO GetNext END; '' => BEGIN NextChar[]; class _ tokenCHAR; value _ LiteralOps.Find[char-0C]; GO TO GetNext END; '" => BEGIN i: CARDINAL; i _ 0; DO IF (tI_tI+1) = tMax THEN BEGIN IF tEnded THEN GO TO EOFEnd; FillTextBuffer[] END; char _ tB[tI]; SELECT char FROM '" => BEGIN IF (tI_tI+1) = tMax THEN FillTextBuffer[]; char _ tB[tI]; IF char # '" THEN GO TO QuoteEnd; END; ENDCASE; IF i >= iMax THEN ExpandBuffer[ !BufferOverflow => BEGIN nErrors _ nErrors + 1; ScanError[string, tOrigin+tI]; i _ 0; CONTINUE END]; buffer[i] _ char; i _ i+1; REPEAT QuoteEnd => NULL; EOFEnd => BEGIN FillTextBuffer[]; char _ tB[tI] END; ENDLOOP; desc.length _ i; value _ LiteralOps.FindString[@desc]; IF char = 'L THEN BEGIN class _ tokenLSTR; GO TO GetNext END ELSE BEGIN class _ tokenSTR; GO TO GotNext END END; '- => BEGIN pChar: CHARACTER; NextChar[]; IF char # '- THEN BEGIN class _ tokenMINUS; GO TO GotNext END; char _ NUL; DO pChar _ char; IF (tI_tI+1) = tMax THEN BEGIN IF tEnded THEN GO TO EndFile; FillTextBuffer[] END; char _ tB[tI]; SELECT char FROM '- => IF pChar = '- THEN EXIT; CR => EXIT; ENDCASE; ENDLOOP; NextChar[]; END; '. => BEGIN NextChar[]; IF char = '. THEN BEGIN class _ tokenDOTS; GO TO GetNext END ELSE BEGIN class _ tokenDOT; GO TO GotNext END END; '= => BEGIN NextChar[]; IF char = '> THEN BEGIN class _ tokenARROW; GO TO GetNext END ELSE BEGIN class _ tokenEQUAL; GO TO GotNext END END; '< => BEGIN NextChar[]; IF char = '= THEN BEGIN class _ tokenLE; GO TO GetNext END ELSE BEGIN class _ tokenLESS; GO TO GotNext END END; '> => BEGIN NextChar[]; IF char = '= THEN BEGIN class _ tokenGE; GO TO GetNext END ELSE BEGIN class _ tokenGREATER; GO TO GotNext END END; ENDCASE => BEGIN class _ scanTab[char]; IF class # 0 THEN GO TO GetNext; NextChar[]; nErrors _ nErrors + 1; ScanError[char, index]; END; REPEAT GetNext => BEGIN IF (tI_tI+1) = tMax THEN FillTextBuffer[]; char _ tB[tI]; END; GotNext => NULL; EndFile => BEGIN FillTextBuffer[]; char _ tB[tI]; class _ EndMarker; index _ tOrigin; value _ 0; END; ENDLOOP; nTokens _ nTokens + 1; RETURN END; -- numerical conversion Digit: ARRAY CHARACTER ['0..'9] OF CARDINAL = [0,1,2,3,4,5,6,7,8,9]; AppendDigit10: PROCEDURE [v: LONG INTEGER, digit: CHARACTER ['0..'9]] RETURNS [newV: LONG INTEGER, valid: BOOLEAN] = BEGIN MaxV: LONG INTEGER = 429496729; -- (2**32-1)/10 MaxD: CARDINAL = 5; -- (2**32-1) MOD 10 d: [0..9] = Digit[digit]; valid _ v < MaxV OR (v = MaxV AND d <= MaxD); newV _ 10*v + d; RETURN END; AppendDigit8: PROCEDURE [v: LONG INTEGER, digit: CHARACTER ['0..'9]] RETURNS [newV: LONG INTEGER, valid: BOOLEAN] = BEGIN MaxV: LONG INTEGER = 3777777777B; -- (2**32-1)/8 MaxD: CARDINAL = 7B; -- (2**32-1) MOD 8 d: [0..9] = Digit[digit]; valid _ (d < 8) AND (v < MaxV OR (v = MaxV AND d <= MaxD)); newV _ 8*v + d; RETURN END; -- initialization/finalization ScanInit: PUBLIC PROCEDURE [ sourceStream, messageStream: StreamDefs.StreamHandle, table: ParseTable.Handle] = BEGIN hashTab _ @table.scanTable.hashTab; scanTab _ @table.scanTable.scanTab; vocab _ LOOPHOLE[@table.scanTable.vocabBody, STRING]; vocabIndex _ @table.scanTable.vocabIndex; IF buffer = NIL THEN buffer _ SystemDefs.AllocateHeapString[256]; iMax _ buffer.length _ buffer.maxlength; desc.base _ buffer; desc.offset _ 0; stream _ sourceStream; errorStream _ messageStream; streamOrigin _ StreamDefs.GetIndex[stream]; tB _ SystemDefs.AllocatePages[TextPages]; tOrigin _ tLimit _ 0; tMax _ 0; tEnded _ FALSE; FillTextBuffer[]; char _ tB[tI]; nTokens _ nErrors _ 0; END; ScanReset: PUBLIC PROCEDURE RETURNS [CARDINAL, CARDINAL] = BEGIN SystemDefs.FreePages[tB]; IF buffer # NIL THEN BEGIN SystemDefs.FreeHeapString[buffer]; buffer _ NIL END; [] _ SystemDefs.PruneHeap[]; RETURN [nTokens, nErrors] END; -- error handling StreamIndex: TYPE = StreamDefs.StreamIndex; NewLine: PROCEDURE = BEGIN CharIO.PutChar[errorStream, CR] END; PrintTextLine: PROCEDURE [origin: StreamIndex] RETURNS [start: StreamIndex] = BEGIN OPEN CharIO; lineIndex: StreamIndex; char: CHARACTER; n: [1..100]; start _ lineIndex _ origin; FOR n IN [1..100] UNTIL lineIndex = [0, 0] DO lineIndex _ StreamDefs.ModifyIndex[lineIndex, -1]; StreamDefs.SetIndex[stream, lineIndex]; IF stream.get[stream] = CR THEN EXIT; start _ lineIndex; ENDLOOP; StreamDefs.SetIndex[stream, start]; FOR n IN [1..100] UNTIL stream.endof[stream] DO char _ stream.get[stream]; SELECT char FROM CR, ControlZ => EXIT; ENDCASE => PutChar[errorStream, char]; ENDLOOP; NewLine[]; RETURN END; ResetScanIndex: PUBLIC PROCEDURE [index: CARDINAL] = BEGIN page: CARDINAL; IF index ~IN [tOrigin .. tLimit) THEN BEGIN page _ index/(AltoDefs.PageSize*AltoDefs.CharsPerWord); tOrigin _ tLimit _ page*(AltoDefs.PageSize*AltoDefs.CharsPerWord); tMax _ 0; tEnded _ FALSE; StreamDefs.SetIndex[stream, [page: streamOrigin.page+page, byte: streamOrigin.byte]]; FillTextBuffer[]; END; tI _ index - tOrigin; IF tI >= tMax THEN FillTextBuffer[]; char _ tB[tI]; END; ScanError: PROCEDURE [code: {number, string, char}, tokenIndex: CARDINAL] = BEGIN ErrorContext[ SELECT code FROM number => "Invalid Number"L, string => "String Too Long"L, char => "Invalid Character"L, ENDCASE => NIL, tokenIndex]; NewLine[]; END; ErrorContext: PUBLIC PROCEDURE [message: STRING, tokenIndex: CARDINAL] = BEGIN OPEN CharIO; saveIndex: StreamIndex = StreamDefs.GetIndex[stream]; origin: StreamIndex = StreamDefs.NormalizeIndex[ [page: streamOrigin.page, byte: streamOrigin.byte+tokenIndex]]; char: CHARACTER; StreamDefs.SetIndex[stream, PrintTextLine[origin]]; UNTIL StreamDefs.GetIndex[stream] = origin DO char _ stream.get[stream ! StreamDefs.StreamError => EXIT]; PutChar[errorStream, IF char = TAB THEN TAB ELSE ' ]; ENDLOOP; PutString[errorStream, "^ "L]; PutString[errorStream, message]; PutString[errorStream, " ["L]; PutNumber[errorStream, tokenIndex, [base:10, zerofill:FALSE, unsigned:TRUE, columns:0]]; PutChar[errorStream, ']]; NewLine[]; StreamDefs.SetIndex[stream, saveIndex]; END; END.