-- file Scanner.Mesa
-- last modified by Satterthwaite, January 8, 1980  10:33 AM

DIRECTORY
  AltoDefs: FROM "altodefs" USING [CharsPerWord, maxword, PageSize],
  CharIO: FROM "chario" USING [CR, TAB, PutChar, PutNumber, PutString],
  LiteralOps: FROM "literalops"  USING [FindDescriptor, Find, FindString],
  P1: FROM "p1" USING [Token],
  ParseTable: FROM "parsetable"
    USING [
      Handle, HashIndex, TSymbol, VocabHashEntry, EndMarker,
      tokenARROW, tokenCHAR, tokenDOT, tokenDOTS, tokenEQUAL,
      tokenGE, tokenGREATER, tokenID, tokenLE, tokenLESS,
      tokenLNUM, tokenLSTR, tokenMINUS, tokenNUM, tokenSTR],
  StreamDefs: FROM "streamdefs"
    USING [
      StreamHandle, StreamIndex,
      GetIndex, ModifyIndex, NormalizeIndex, ReadBlock, SetIndex,
      StreamError],
  StringDefs: FROM "stringdefs" USING [SubStringDescriptor, AppendString],
  SymbolOps: FROM "symbolops" USING [EnterString],
  SystemDefs: FROM "systemdefs"
    USING [
      AllocateHeapString, AllocatePages, FreeHeapString, FreePages,
      PruneHeap];

Scanner: PROGRAM
    IMPORTS CharIO, LiteralOps, StreamDefs, StringDefs, SymbolOps, SystemDefs 
    EXPORTS P1 =
  BEGIN
  OPEN ParseTable;

  hashTab: POINTER TO ARRAY HashIndex OF VocabHashEntry;
  scanTab: POINTER TO ARRAY CHARACTER [40C..177C] OF TSymbol;
  vocab: STRING;
  vocabIndex: POINTER TO ARRAY TSymbol OF CARDINAL;

  NUL: CHARACTER = 0C;
  CR: CHARACTER = CharIO.CR;
  ControlZ: CHARACTER = 32C;		-- Bravo escape char

  stream: StreamDefs.StreamHandle;		-- the input stream
  streamOrigin: StreamDefs.StreamIndex;

  errorStream: StreamDefs.StreamHandle;	-- the error stream

  TextPages: CARDINAL = 6;
  TextWords: CARDINAL = TextPages*AltoDefs.PageSize;
  TextChars: CARDINAL = TextWords*AltoDefs.CharsPerWord;

  tB: POINTER TO PACKED ARRAY [0..TextChars) OF CHARACTER;
  tI, tMax: [0..TextChars];
  tOrigin, tLimit: CARDINAL;
  tEnded: BOOLEAN;

  FillTextBuffer: PROCEDURE =
    BEGIN
    words: [0..TextWords];
    bytes: [0..AltoDefs.CharsPerWord);
    tOrigin ← tLimit;
    IF tEnded
      THEN  tMax ← 0
      ELSE
	BEGIN
	words ← StreamDefs.ReadBlock[stream, tB, TextWords];
	bytes ← StreamDefs.GetIndex[stream].byte MOD AltoDefs.CharsPerWord;
	IF bytes # 0 THEN  words ← words-1;
	tMax ← words*AltoDefs.CharsPerWord + bytes;
	IF tMax < TextChars THEN  tEnded ← TRUE;
	tLimit ← tOrigin + tMax;
	END;
    IF tMax = 0 THEN  BEGIN  tB[0] ← NUL;  tMax ← 1  END;
    tI ← 0;
    END;


  buffer: STRING ← NIL;			-- token assembly area
  iMax: CARDINAL;			-- iMax = buffer.maxlength
  desc: StringDefs.SubStringDescriptor;	-- initial buffer segment

  nTokens: CARDINAL;			-- token count
  nErrors: CARDINAL;			-- lexical errors

  BufferOverflow: ERROR = CODE;

  ExpandBuffer: PROCEDURE =
    BEGIN
    oldBuffer: STRING ← buffer;
    IF oldBuffer.length > 2000 THEN ERROR BufferOverflow;
    buffer ← SystemDefs.AllocateHeapString[2*oldBuffer.length];
    StringDefs.AppendString[buffer, oldBuffer];
    iMax ← buffer.length ← buffer.maxlength;
    SystemDefs.FreeHeapString[oldBuffer];
    desc.base ← buffer;
    END;


  char: CHARACTER;	-- current (most recently scanned) character

  NextChar: PROCEDURE =	-- also expanded inline within Atom
    BEGIN
    IF (tI←tI+1) = tMax THEN FillTextBuffer[];
    char ← tB[tI];
    END;


  Atom: PUBLIC PROCEDURE RETURNS [token: P1.Token] =
    BEGIN
    OPEN token;
      DO
      WHILE char IN [NUL..' ] 
	DO
	SELECT char FROM
	  ControlZ =>
	    UNTIL char = CR
	      DO
	      IF (tI←tI+1) = tMax THEN
		BEGIN IF tEnded THEN GO TO EndFile; FillTextBuffer[] END;
	      char ← tB[tI];
	      ENDLOOP;
	  ENDCASE;
	IF (tI←tI+1) = tMax THEN
	  BEGIN IF tEnded THEN GO TO EndFile; FillTextBuffer[] END;
	char ← tB[tI];
	ENDLOOP;
      index ← tOrigin + tI;  value ← 0;
      SELECT char FROM

	'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm,
	'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z  =>
	  BEGIN
	  i: CARDINAL;
	  i ← 0;
	    DO
	    buffer[i] ← char;
	    IF (tI←tI+1) = tMax THEN FillTextBuffer[];
	    char ← tB[tI];
	    SELECT char FROM
	      IN ['a..'z], IN ['A..'Z], IN ['0..'9] =>
		IF (i ← i+1) >= iMax THEN ExpandBuffer[];
	      ENDCASE =>  EXIT;
	    ENDLOOP;
	  desc.length ← i+1;
	  class ← tokenID;  value ← SymbolOps.EnterString[@desc];
	  GO TO GotNext
	  END;

	'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M,
	'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z  =>
	  BEGIN
	  first, last: CARDINAL;
	  uId: BOOLEAN;
	  i, j: CARDINAL;
	  h: HashIndex;
	  s1, s2: CARDINAL;
	  i ← 0;  uId ← TRUE;  first ← last ← char-0C;
	    DO
	    buffer[i] ← char;
	    IF (tI←tI+1) = tMax THEN FillTextBuffer[];
	    char ← tB[tI];
	    SELECT char FROM
	      IN ['A..'Z] =>
		BEGIN  last ← char-0C;
		IF (i ← i+1) >= iMax THEN ExpandBuffer[];
		END;
	      IN ['a..'z], IN ['0..'9] =>
		BEGIN  uId ← FALSE;
		IF (i ← i+1) >= iMax THEN ExpandBuffer[];
		END;
	      ENDCASE =>  EXIT;
	    ENDLOOP;
	  i ← i+1;
	  IF uId
	    THEN
	      BEGIN
	      h ← ((first*128-first) + last) MOD LAST[HashIndex] + 1;
	      WHILE (j ← hashTab[h].symbol) # 0 
		DO
		IF vocabIndex[j]-(s2←vocabIndex[j-1]) = i
		  THEN
		    FOR s1 IN [0 .. i) 
		      DO
		      IF buffer[s1] # vocab[s2] THEN EXIT;
		      s2 ← s2+1;
		      REPEAT
			FINISHED => BEGIN class ← j; GO TO GotNext END;
		      ENDLOOP;
		IF (h ← hashTab[h].link) = 0 THEN EXIT;
		ENDLOOP;
	      END;
	  desc.length ← i;
	  class ← tokenID;  value ← SymbolOps.EnterString[@desc];
	  GO TO GotNext
	  END;

	'0, '1, '2, '3, '4, '5, '6, '7, '8, '9  =>
	  BEGIN
	  v, v10, v8: LONG INTEGER;
	  scale: CARDINAL;
	  valid, valid10, valid8, octal: BOOLEAN;
	  MaxLiteral: CARDINAL = AltoDefs.maxword;
	  vRep: ARRAY [0..SIZE[LONG INTEGER]) OF WORD;  -- machine dependent
	  v10 ← v8 ← 0;  valid10 ← valid8 ← TRUE;
	  WHILE char IN ['0..'9] 
	    DO
	    IF valid10 THEN [v10, valid10] ← AppendDigit10[v10, char];
	    IF valid8 THEN [v8, valid8] ← AppendDigit8[v8, char];
	    NextChar[];
	    ENDLOOP;
	  SELECT char FROM
	    'B, 'C =>
	      BEGIN
	      class ← IF char = 'C THEN tokenCHAR ELSE tokenNUM;
	      v ← v8;  valid ← valid8;  octal ← TRUE;
	      END;
	    ENDCASE =>
	      BEGIN
	      class ← tokenNUM;  v ← v10;  valid ← valid10;  octal ← FALSE;
	      END;
	  SELECT char FROM
	    'B, 'C, 'D =>
	      BEGIN 
	      NextChar[];
	      IF class = tokenNUM
		THEN
		  BEGIN  scale ← 0;
		  WHILE char IN ['0..'9] 
		    DO
		    scale ← 10*scale + CARDINAL[char-'0];
		    NextChar[];
		    ENDLOOP;
		  THROUGH [1 .. scale]  WHILE valid
		    DO
		    IF octal
		      THEN [v, valid] ← AppendDigit8[v, '0]
		      ELSE [v, valid] ← AppendDigit10[v, '0];
		    ENDLOOP;
		  END;
	      END;
	    ENDCASE;
	  vRep ← LOOPHOLE[v];
	  IF vRep[1] = 0  --v <= MaxLiteral--
	    THEN value ← LiteralOps.Find[vRep[0]]
	    ELSE
	      BEGIN
	      IF class = tokenCHAR THEN valid ← FALSE;
	      class ← tokenLNUM;
	      value ← LiteralOps.FindDescriptor[DESCRIPTOR[vRep]];
	      END;
	  IF ~valid THEN
	    BEGIN
	    nErrors ← nErrors + 1;  ScanError[number, index];
	    END;
          GO TO GotNext
          END;

	',, ';, ':, '←, '#, '~, '+, '*, '/, '↑, '@, '!,
	'(, '), '[, '], '{, '}  =>
	  BEGIN
	  class ← scanTab[char];  GO TO GetNext
	  END;

	'' =>
	  BEGIN
	  NextChar[];
	  class ← tokenCHAR;  value ← LiteralOps.Find[char-0C];
	  GO TO GetNext
	  END;

	'" =>
	  BEGIN
	  i: CARDINAL;
	  i ← 0;
	    DO 
	    IF (tI←tI+1) = tMax THEN
	      BEGIN  IF tEnded THEN GO TO EOFEnd;  FillTextBuffer[]  END;
	    char ← tB[tI];
	    SELECT char FROM
	      '" =>
		BEGIN
		IF (tI←tI+1) = tMax THEN FillTextBuffer[];
		char ← tB[tI];
		IF char # '" THEN GO TO QuoteEnd;
		END;
	      ENDCASE;
	    IF i >= iMax
	      THEN ExpandBuffer[
		!BufferOverflow =>
		  BEGIN
		  nErrors ← nErrors + 1;
		  ScanError[string, tOrigin+tI];
		  i ← 0;
		  CONTINUE
		  END];
	    buffer[i] ← char;  i ← i+1;
	    REPEAT
	      QuoteEnd => NULL;
	      EOFEnd => BEGIN  FillTextBuffer[];  char ← tB[tI]  END;
	    ENDLOOP;
	  desc.length ← i;
	  value ← LiteralOps.FindString[@desc];
	  IF char = 'L
	    THEN  BEGIN  class ← tokenLSTR;  GO TO GetNext  END
	    ELSE  BEGIN  class ← tokenSTR;   GO TO GotNext  END
	  END;

	'- =>
	  BEGIN
	  pChar: CHARACTER;
	  NextChar[];
	  IF char # '- 
	    THEN  BEGIN  class ← tokenMINUS;  GO TO GotNext  END;
	  char ← NUL;
	    DO
	    pChar ← char;
	    IF (tI←tI+1) = tMax THEN
	      BEGIN  IF tEnded THEN GO TO EndFile;  FillTextBuffer[]  END;
	    char ← tB[tI];
	    SELECT char FROM
	      '- =>  IF pChar = '- THEN EXIT;
	      CR =>  EXIT;
	      ENDCASE;
	    ENDLOOP;
	  NextChar[];
	  END;

	'. =>
	  BEGIN
	  NextChar[];
	  IF char = '. 
	    THEN  BEGIN  class ← tokenDOTS;  GO TO GetNext  END
	    ELSE  BEGIN  class ← tokenDOT;   GO TO GotNext  END
	  END;

	'= =>
	  BEGIN
	  NextChar[];
	  IF char = '> 
	    THEN  BEGIN  class ← tokenARROW;  GO TO GetNext  END
	    ELSE  BEGIN  class ← tokenEQUAL;  GO TO GotNext  END
	  END;

	'< =>
	  BEGIN
	  NextChar[];
	  IF char = '= 
	    THEN  BEGIN  class ← tokenLE;    GO TO GetNext  END
	    ELSE  BEGIN  class ← tokenLESS;  GO TO GotNext  END
	  END;

	'> =>
	  BEGIN
	  NextChar[];
	  IF char = '= 
	    THEN  BEGIN  class ← tokenGE;       GO TO GetNext  END
	    ELSE  BEGIN  class ← tokenGREATER;  GO TO GotNext  END
	  END;

	ENDCASE =>
	  BEGIN
	  class ← scanTab[char];
	  IF class # 0 THEN  GO TO GetNext;
	  NextChar[];
	  nErrors ← nErrors + 1;  ScanError[char, index];
	  END;

      REPEAT
	GetNext =>
	  BEGIN
	  IF (tI←tI+1) = tMax THEN FillTextBuffer[];
	  char ← tB[tI];
	  END;
	GotNext =>  NULL;
	EndFile =>
	  BEGIN
	  FillTextBuffer[];  char ← tB[tI];
	  class ← EndMarker;  index ← tOrigin;  value ← 0;
	  END;
      ENDLOOP;
    nTokens ← nTokens + 1;
    RETURN
    END;


 -- numerical conversion

  Digit: ARRAY CHARACTER ['0..'9] OF CARDINAL = [0,1,2,3,4,5,6,7,8,9];

  AppendDigit10: PROCEDURE [v: LONG INTEGER, digit: CHARACTER ['0..'9]]
      RETURNS [newV: LONG INTEGER, valid: BOOLEAN] =
    BEGIN
    MaxV: LONG INTEGER = 429496729;		-- (2**32-1)/10
    MaxD: CARDINAL = 5;			-- (2**32-1) MOD 10
    d: [0..9] = Digit[digit];
    valid ← v < MaxV OR (v = MaxV AND d <= MaxD);
    newV ← 10*v + d;
    RETURN
    END;

  AppendDigit8: PROCEDURE [v: LONG INTEGER, digit: CHARACTER ['0..'9]]
      RETURNS [newV: LONG INTEGER, valid: BOOLEAN] =
    BEGIN
    MaxV: LONG INTEGER = 3777777777B;		-- (2**32-1)/8
    MaxD: CARDINAL = 7B;			-- (2**32-1) MOD 8
    d: [0..9] = Digit[digit];
    valid ← (d < 8) AND (v < MaxV OR (v = MaxV AND d <= MaxD));
    newV ← 8*v + d;
    RETURN
    END;


 -- initialization/finalization

  ScanInit: PUBLIC PROCEDURE [
      sourceStream, messageStream: StreamDefs.StreamHandle,
      table: ParseTable.Handle] =
    BEGIN
    hashTab ← @table.scanTable.hashTab;
    scanTab ← @table.scanTable.scanTab;
    vocab ← LOOPHOLE[@table.scanTable.vocabBody, STRING];
    vocabIndex ← @table.scanTable.vocabIndex;
    IF buffer = NIL THEN buffer ← SystemDefs.AllocateHeapString[256];
    iMax ← buffer.length ← buffer.maxlength;
    desc.base ← buffer;  desc.offset ← 0;
    stream ← sourceStream;  errorStream ← messageStream;
    streamOrigin ← StreamDefs.GetIndex[stream];
    tB ← SystemDefs.AllocatePages[TextPages];
    tOrigin ← tLimit ← 0;  tMax ← 0;  tEnded ← FALSE;
    FillTextBuffer[];  char ← tB[tI];
    nTokens ← nErrors ← 0;
    END;

  ScanReset: PUBLIC PROCEDURE RETURNS [CARDINAL, CARDINAL] =
    BEGIN
    SystemDefs.FreePages[tB];
    IF buffer # NIL
      THEN BEGIN  SystemDefs.FreeHeapString[buffer];  buffer ← NIL END;
    [] ← SystemDefs.PruneHeap[];
    RETURN [nTokens, nErrors]
    END;


 -- error handling

  StreamIndex: TYPE = StreamDefs.StreamIndex;

  NewLine: PROCEDURE = BEGIN CharIO.PutChar[errorStream, CR] END;

  PrintTextLine: PROCEDURE [origin: StreamIndex] RETURNS [start: StreamIndex] =
    BEGIN  OPEN CharIO;
    lineIndex: StreamIndex;
    char: CHARACTER;
    n: [1..100];
    start ← lineIndex ← origin;
    FOR n IN [1..100] UNTIL lineIndex = [0, 0]
      DO
      lineIndex ← StreamDefs.ModifyIndex[lineIndex, -1];
      StreamDefs.SetIndex[stream, lineIndex];
      IF stream.get[stream] = CR THEN EXIT;
      start ← lineIndex;
      ENDLOOP;
    StreamDefs.SetIndex[stream, start];
    FOR n IN [1..100] UNTIL stream.endof[stream]
      DO
      char ← stream.get[stream];
      SELECT char FROM
	CR, ControlZ => EXIT;
	ENDCASE => PutChar[errorStream, char];
      ENDLOOP;
    NewLine[];  RETURN
    END;


  ResetScanIndex: PUBLIC PROCEDURE [index: CARDINAL] =
    BEGIN
    page: CARDINAL;
    IF index ~IN [tOrigin .. tLimit)
      THEN
	BEGIN
	page ← index/(AltoDefs.PageSize*AltoDefs.CharsPerWord);
	tOrigin ← tLimit ← page*(AltoDefs.PageSize*AltoDefs.CharsPerWord);
	tMax ← 0;  tEnded ← FALSE;
	StreamDefs.SetIndex[stream,
	   [page: streamOrigin.page+page, byte: streamOrigin.byte]];
	FillTextBuffer[];
	END;
    tI ← index - tOrigin;
    IF tI >= tMax THEN FillTextBuffer[];  char ← tB[tI];
    END;


  ScanError: PROCEDURE [code: {number, string, char}, tokenIndex: CARDINAL] =
    BEGIN
    ErrorContext[
      SELECT code FROM
	number => "Invalid Number"L,
	string => "String Too Long"L,
	char => "Invalid Character"L,
	ENDCASE => NIL,
      tokenIndex];
    NewLine[];
    END;


  ErrorContext: PUBLIC PROCEDURE [message: STRING, tokenIndex: CARDINAL] =
    BEGIN  OPEN CharIO;
    saveIndex: StreamIndex = StreamDefs.GetIndex[stream];
    origin: StreamIndex = StreamDefs.NormalizeIndex[
	[page: streamOrigin.page, byte: streamOrigin.byte+tokenIndex]];
    char: CHARACTER;
    StreamDefs.SetIndex[stream, PrintTextLine[origin]];
    UNTIL StreamDefs.GetIndex[stream] = origin
      DO
      char ← stream.get[stream ! StreamDefs.StreamError => EXIT];
      PutChar[errorStream, IF char = TAB THEN TAB ELSE ' ];
      ENDLOOP;
    PutString[errorStream, "↑ "L];  PutString[errorStream, message];
    PutString[errorStream, " ["L];
    PutNumber[errorStream, tokenIndex,
	[base:10, zerofill:FALSE, unsigned:TRUE, columns:0]];
    PutChar[errorStream, ']];   NewLine[];
    StreamDefs.SetIndex[stream, saveIndex];
    END;

  END.