about summary refs log tree commit diff
path: root/vendor/modernc.org/gc/v3/scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/modernc.org/gc/v3/scanner.go')
-rw-r--r--vendor/modernc.org/gc/v3/scanner.go1446
1 files changed, 1446 insertions, 0 deletions
diff --git a/vendor/modernc.org/gc/v3/scanner.go b/vendor/modernc.org/gc/v3/scanner.go
new file mode 100644
index 0000000..a3815a8
--- /dev/null
+++ b/vendor/modernc.org/gc/v3/scanner.go
@@ -0,0 +1,1446 @@
+// Copyright 2022 The Gc Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gc // import "modernc.org/gc/v3"
+
+import (
+	"bytes"
+	"fmt"
+	"go/token"
+	"path/filepath"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+
+	"modernc.org/mathutil"
+	mtoken "modernc.org/token"
+)
+
+var (
+	_ Node = (*Token)(nil)
+	_ Node = (*nonode)(nil)
+
+	keywords = map[string]token.Token{
+		"break":       BREAK,
+		"case":        CASE,
+		"chan":        CHAN,
+		"const":       CONST,
+		"continue":    CONTINUE,
+		"default":     DEFAULT,
+		"defer":       DEFER,
+		"else":        ELSE,
+		"fallthrough": FALLTHROUGH,
+		"for":         FOR,
+		"func":        FUNC,
+		"go":          GO,
+		"goto":        GOTO,
+		"if":          IF,
+		"import":      IMPORT,
+		"interface":   INTERFACE,
+		"map":         MAP,
+		"package":     PACKAGE,
+		"range":       RANGE,
+		"return":      RETURN,
+		"select":      SELECT,
+		"struct":      STRUCT,
+		"switch":      SWITCH,
+		"type":        TYPE,
+		"var":         VAR,
+	}
+
+	lineCommentTag = []byte("line ")
+	znode          = &nonode{}
+)
+
+type nonode struct{}
+
+func (*nonode) Position() (r token.Position) { return r }
+func (*nonode) Source(full bool) string      { return "" }
+
+// Token represents a lexeme, its position and its semantic value.
+type Token struct { // 16 bytes on 64 bit arch
+	source *source
+
+	ch    int32
+	index int32
+}
+
+// Ch returns which token t represents
+func (t Token) Ch() token.Token { return token.Token(t.ch) }
+
+// Source implements Node.
+func (t Token) Source(full bool) string {
+	// trc("%10s %v: #%v sep %v, src %v, buf %v", tokSource(t.Ch()), t.Position(), t.index, t.source.toks[t.index].sep, t.source.toks[t.index].src, len(t.source.buf))
+	sep := t.Sep()
+	if !full && sep != "" {
+		sep = " "
+	}
+	src := t.Src()
+	if !full && strings.ContainsRune(src, '\n') {
+		src = " "
+	}
+	// trc("%q %q -> %q %q", t.Sep(), t.Src(), sep, src)
+	return sep + src
+}
+
+// Positions implements Node.
+func (t Token) Position() (r token.Position) {
+	if t.source == nil {
+		return r
+	}
+
+	s := t.source
+	off := mathutil.MinInt32(int32(len(s.buf)), s.toks[t.index].src)
+	return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true))
+}
+
+// Prev returns the token preceding t or a zero value if no such token exists.
+func (t Token) Prev() (r Token) {
+	if index := t.index - 1; index >= 0 {
+		s := t.source
+		return Token{source: s, ch: s.toks[index].ch, index: index}
+	}
+
+	return r
+}
+
+// Next returns the token following t or a zero value if no such token exists.
+func (t Token) Next() (r Token) {
+	if index := t.index + 1; index < int32(len(t.source.toks)) {
+		s := t.source
+		return Token{source: s, ch: s.toks[index].ch, index: index}
+	}
+
+	return r
+}
+
+// Sep returns any separators, combined, preceding t.
+func (t Token) Sep() string {
+	s := t.source
+	if p, ok := s.sepPatches[t.index]; ok {
+		return p
+	}
+
+	return string(s.buf[s.toks[t.index].sep:s.toks[t.index].src])
+}
+
+// SetSep sets t's separator.
+func (t Token) SetSep(s string) {
+	src := t.source
+	if src.sepPatches == nil {
+		src.sepPatches = map[int32]string{}
+	}
+	src.sepPatches[t.index] = s
+}
+
+// Src returns t's source form.
+func (t Token) Src() string {
+	s := t.source
+	if p, ok := s.srcPatches[t.index]; ok {
+		return p
+	}
+
+	if t.ch != int32(EOF) {
+		next := t.source.off
+		if t.index < int32(len(s.toks))-1 {
+			next = s.toks[t.index+1].sep
+		}
+		return string(s.buf[s.toks[t.index].src:next])
+	}
+
+	return ""
+}
+
+// SetSrc sets t's source form.
+func (t Token) SetSrc(s string) {
+	src := t.source
+	if src.srcPatches == nil {
+		src.srcPatches = map[int32]string{}
+	}
+	src.srcPatches[t.index] = s
+}
+
+// IsValid reports t is a valid token. Zero value reports false.
+func (t Token) IsValid() bool { return t.source != nil }
+
+type tok struct { // 12 bytes
+	ch  int32
+	sep int32
+	src int32
+}
+
+func (t *tok) token() token.Token { return token.Token(t.ch) }
+
+func (t *tok) position(s *source) (r token.Position) {
+	off := mathutil.MinInt32(int32(len(s.buf)), t.src)
+	return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true))
+}
+
+// source represents a single Go source file, editor text buffer etc.
+type source struct {
+	buf        []byte
+	file       *mtoken.File
+	name       string
+	sepPatches map[int32]string
+	srcPatches map[int32]string
+	toks       []tok
+
+	base int32
+	off  int32
+}
+
+// 'buf' becomes owned by the result and must not be modified afterwards.
+func newSource(name string, buf []byte) *source {
+	file := mtoken.NewFile(name, len(buf))
+	return &source{
+		buf:  buf,
+		file: file,
+		name: name,
+		base: int32(file.Base()),
+	}
+}
+
+type ErrWithPosition struct {
+	pos token.Position
+	err error
+}
+
+func (e ErrWithPosition) String() string {
+	switch {
+	case e.pos.IsValid():
+		return fmt.Sprintf("%v: %v", e.pos, e.err)
+	default:
+		return fmt.Sprintf("%v", e.err)
+	}
+}
+
+type errList []ErrWithPosition
+
+func (e errList) Err() (r error) {
+	if len(e) == 0 {
+		return nil
+	}
+
+	return e
+}
+
+func (e errList) Error() string {
+	w := 0
+	prev := ErrWithPosition{pos: token.Position{Offset: -1}}
+	for _, v := range e {
+		if v.pos.Line == 0 || v.pos.Offset != prev.pos.Offset || v.err.Error() != prev.err.Error() {
+			e[w] = v
+			w++
+			prev = v
+		}
+	}
+
+	var a []string
+	for _, v := range e {
+		a = append(a, fmt.Sprint(v))
+	}
+	return strings.Join(a, "\n")
+}
+
+func (e *errList) err(pos token.Position, msg string, args ...interface{}) {
+	if trcErrors {
+		trc("FAIL "+msg, args...)
+	}
+	switch {
+	case len(args) == 0:
+		*e = append(*e, ErrWithPosition{pos, fmt.Errorf("%s", msg)})
+	default:
+		*e = append(*e, ErrWithPosition{pos, fmt.Errorf(msg, args...)})
+	}
+}
+
+type scanner struct {
+	*source
+	dir  string
+	errs errList
+	tok  tok
+
+	last int32
+
+	errBudget int
+
+	c byte // Lookahead byte.
+
+	eof      bool
+	isClosed bool
+}
+
+func newScanner(name string, buf []byte) *scanner {
+	dir, _ := filepath.Split(name)
+	r := &scanner{source: newSource(name, buf), errBudget: 10, dir: dir}
+	switch {
+	case len(buf) == 0:
+		r.eof = true
+	default:
+		r.c = buf[0]
+		if r.c == '\n' {
+			r.file.AddLine(int(r.base + r.off))
+		}
+	}
+	return r
+}
+
+func isDigit(c byte) bool      { return c >= '0' && c <= '9' }
+func isHexDigit(c byte) bool   { return isDigit(c) || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' }
+func isIDNext(c byte) bool     { return isIDFirst(c) || isDigit(c) }
+func isOctalDigit(c byte) bool { return c >= '0' && c <= '7' }
+
+func isIDFirst(c byte) bool {
+	return c >= 'a' && c <= 'z' ||
+		c >= 'A' && c <= 'Z' ||
+		c == '_'
+}
+
+func (s *scanner) position() token.Position {
+	return token.Position(s.source.file.PositionFor(mtoken.Pos(s.base+s.off), true))
+}
+
+func (s *scanner) pos(off int32) token.Position {
+	return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true))
+}
+
+func (s *scanner) token() Token {
+	return Token{source: s.source, ch: s.tok.ch, index: int32(len(s.toks) - 1)}
+}
+
+func (s *scanner) err(off int32, msg string, args ...interface{}) {
+	if s.errBudget <= 0 {
+		s.close()
+		return
+	}
+
+	s.errBudget--
+	if n := int32(len(s.buf)); off >= n {
+		off = n
+	}
+	s.errs.err(s.pos(off), msg, args...)
+}
+
+func (s *scanner) close() {
+	if s.isClosed {
+		return
+	}
+
+	s.tok.ch = int32(ILLEGAL)
+	s.eof = true
+	s.isClosed = true
+}
+
+func (s *scanner) next() {
+	if s.eof {
+		return
+	}
+
+	s.off++
+	if int(s.off) == len(s.buf) {
+		s.c = 0
+		s.eof = true
+		return
+	}
+
+	s.c = s.buf[s.off]
+	if s.c == '\n' {
+		s.file.AddLine(int(s.base + s.off))
+	}
+}
+
+func (s *scanner) nextN(n int) {
+	if int(s.off) == len(s.buf)-n {
+		s.c = 0
+		s.eof = true
+		return
+	}
+
+	s.off += int32(n)
+	s.c = s.buf[s.off]
+	if s.c == '\n' {
+		s.file.AddLine(int(s.base + s.off))
+	}
+}
+
+func (s *scanner) scan() (r bool) {
+	if s.isClosed {
+		return false
+	}
+
+	s.last = s.tok.ch
+	s.tok.sep = s.off
+	s.tok.ch = -1
+	for {
+		if r = s.scan0(); !r || s.tok.ch >= 0 {
+			s.toks = append(s.toks, s.tok)
+			// trc("", dump(s.token()))
+			return r
+		}
+	}
+}
+
+func (s *scanner) scan0() (r bool) {
+	s.tok.src = mathutil.MinInt32(s.off, int32(len(s.buf)))
+	switch s.c {
+	case ' ', '\t', '\r', '\n':
+		// White space, formed from spaces (U+0020), horizontal tabs (U+0009), carriage
+		// returns (U+000D), and newlines (U+000A), is ignored except as it separates
+		// tokens that would otherwise combine into a single token.
+		if s.c == '\n' && s.injectSemi() {
+			return true
+		}
+
+		s.next()
+		return true
+	case '/':
+		off := s.off
+		s.next()
+		switch s.c {
+		case '=':
+			s.next()
+			s.tok.ch = int32(QUO_ASSIGN)
+		case '/':
+			// Line comments start with the character sequence // and stop at the end of
+			// the line.
+			s.next()
+			s.lineComment(off)
+			return true
+		case '*':
+			// General comments start with the character sequence /* and stop with the
+			// first subsequent character sequence */.
+			s.next()
+			s.generalComment(off)
+			return true
+		default:
+			s.tok.ch = int32(QUO)
+		}
+	case '(':
+		s.tok.ch = int32(LPAREN)
+		s.next()
+	case ')':
+		s.tok.ch = int32(RPAREN)
+		s.next()
+	case '[':
+		s.tok.ch = int32(LBRACK)
+		s.next()
+	case ']':
+		s.tok.ch = int32(RBRACK)
+		s.next()
+	case '{':
+		s.tok.ch = int32(LBRACE)
+		s.next()
+	case '}':
+		s.tok.ch = int32(RBRACE)
+		s.next()
+	case ',':
+		s.tok.ch = int32(COMMA)
+		s.next()
+	case ';':
+		s.tok.ch = int32(SEMICOLON)
+		s.next()
+	case '~':
+		s.tok.ch = int32(TILDE)
+		s.next()
+	case '"':
+		off := s.off
+		s.next()
+		s.stringLiteral(off)
+	case '\'':
+		off := s.off
+		s.next()
+		s.runeLiteral(off)
+	case '`':
+		s.next()
+		for {
+			switch {
+			case s.c == '`':
+				s.next()
+				s.tok.ch = int32(STRING)
+				return true
+			case s.eof:
+				s.err(s.off, "raw string literal not terminated")
+				s.tok.ch = int32(STRING)
+				return true
+			case s.c == 0:
+				panic(todo("%v: %#U", s.position(), s.c))
+			default:
+				s.next()
+			}
+		}
+	case '.':
+		s.next()
+		off := s.off
+		if isDigit(s.c) {
+			s.dot(false, true)
+			return true
+		}
+
+		if s.c != '.' {
+			s.tok.ch = int32(PERIOD)
+			return true
+		}
+
+		s.next()
+		if s.c != '.' {
+			s.off = off
+			s.c = '.'
+			s.tok.ch = int32(PERIOD)
+			return true
+		}
+
+		s.next()
+		s.tok.ch = int32(ELLIPSIS)
+		return true
+	case '%':
+		s.next()
+		switch s.c {
+		case '=':
+			s.next()
+			s.tok.ch = int32(REM_ASSIGN)
+		default:
+			s.tok.ch = int32(REM)
+		}
+	case '*':
+		s.next()
+		switch s.c {
+		case '=':
+			s.next()
+			s.tok.ch = int32(MUL_ASSIGN)
+		default:
+			s.tok.ch = int32(MUL)
+		}
+	case '^':
+		s.next()
+		switch s.c {
+		case '=':
+			s.next()
+			s.tok.ch = int32(XOR_ASSIGN)
+		default:
+			s.tok.ch = int32(XOR)
+		}
+	case '+':
+		s.next()
+		switch s.c {
+		case '+':
+			s.next()
+			s.tok.ch = int32(INC)
+		case '=':
+			s.next()
+			s.tok.ch = int32(ADD_ASSIGN)
+		default:
+			s.tok.ch = int32(ADD)
+		}
+	case '-':
+		s.next()
+		switch s.c {
+		case '-':
+			s.next()
+			s.tok.ch = int32(DEC)
+		case '=':
+			s.next()
+			s.tok.ch = int32(SUB_ASSIGN)
+		default:
+			s.tok.ch = int32(SUB)
+		}
+	case ':':
+		s.next()
+		switch {
+		case s.c == '=':
+			s.next()
+			s.tok.ch = int32(DEFINE)
+		default:
+			s.tok.ch = int32(COLON)
+		}
+	case '=':
+		s.next()
+		switch {
+		case s.c == '=':
+			s.next()
+			s.tok.ch = int32(EQL)
+		default:
+			s.tok.ch = int32(ASSIGN)
+		}
+	case '!':
+		s.next()
+		switch {
+		case s.c == '=':
+			s.next()
+			s.tok.ch = int32(NEQ)
+		default:
+			s.tok.ch = int32(NOT)
+		}
+	case '>':
+		s.next()
+		switch s.c {
+		case '=':
+			s.next()
+			s.tok.ch = int32(GEQ)
+		case '>':
+			s.next()
+			switch s.c {
+			case '=':
+				s.next()
+				s.tok.ch = int32(SHR_ASSIGN)
+			default:
+				s.tok.ch = int32(SHR)
+			}
+		default:
+			s.tok.ch = int32(GTR)
+		}
+	case '<':
+		s.next()
+		switch s.c {
+		case '=':
+			s.next()
+			s.tok.ch = int32(LEQ)
+		case '<':
+			s.next()
+			switch s.c {
+			case '=':
+				s.next()
+				s.tok.ch = int32(SHL_ASSIGN)
+			default:
+				s.tok.ch = int32(SHL)
+			}
+		case '-':
+			s.next()
+			s.tok.ch = int32(ARROW)
+		default:
+			s.tok.ch = int32(LSS)
+		}
+	case '|':
+		s.next()
+		switch s.c {
+		case '|':
+			s.next()
+			s.tok.ch = int32(LOR)
+		case '=':
+			s.next()
+			s.tok.ch = int32(OR_ASSIGN)
+		default:
+			s.tok.ch = int32(OR)
+		}
+	case '&':
+		s.next()
+		switch s.c {
+		case '&':
+			s.next()
+			s.tok.ch = int32(LAND)
+		case '^':
+			s.next()
+			switch s.c {
+			case '=':
+				s.next()
+				s.tok.ch = int32(AND_NOT_ASSIGN)
+			default:
+				s.tok.ch = int32(AND_NOT)
+			}
+		case '=':
+			s.next()
+			s.tok.ch = int32(AND_ASSIGN)
+		default:
+			s.tok.ch = int32(AND)
+		}
+	default:
+		switch {
+		case isIDFirst(s.c):
+			s.next()
+			s.identifierOrKeyword()
+		case isDigit(s.c):
+			s.numericLiteral()
+		case s.c >= 0x80:
+			off := s.off
+			switch r := s.rune(); {
+			case unicode.IsLetter(r):
+				s.identifierOrKeyword()
+			case r == 0xfeff:
+				if off == 0 { // Ignore BOM, but only at buffer start.
+					return true
+				}
+
+				s.err(off, "illegal byte order mark")
+				s.tok.ch = int32(ILLEGAL)
+			default:
+				s.err(s.off, "illegal character %#U", r)
+				s.tok.ch = int32(ILLEGAL)
+			}
+		case s.eof:
+			if s.injectSemi() {
+				return true
+			}
+
+			s.close()
+			s.tok.ch = int32(EOF)
+			s.tok.sep = mathutil.MinInt32(s.tok.sep, s.tok.src)
+			return false
+		// case s.c == 0:
+		// 	panic(todo("%v: %#U", s.position(), s.c))
+		default:
+			s.err(s.off, "illegal character %#U", s.c)
+			s.next()
+			s.tok.ch = int32(ILLEGAL)
+		}
+	}
+	return true
+}
+
+func (s *scanner) runeLiteral(off int32) {
+	// Leading ' consumed.
+	ok := 0
+	s.tok.ch = int32(CHAR)
+	expOff := int32(-1)
+	if s.eof {
+		s.err(off, "rune literal not terminated")
+		return
+	}
+
+	for {
+		switch s.c {
+		case '\\':
+			ok++
+			s.next()
+			switch s.c {
+			case '\'', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v':
+				s.next()
+			case 'x', 'X':
+				s.next()
+				for i := 0; i < 2; i++ {
+					if s.c == '\'' {
+						if i != 2 {
+							s.err(s.off, "illegal character %#U in escape sequence", s.c)
+						}
+						s.next()
+						return
+					}
+
+					if !isHexDigit(s.c) {
+						s.err(s.off, "illegal character %#U in escape sequence", s.c)
+						break
+					}
+					s.next()
+				}
+			case 'u':
+				s.u(4)
+			case 'U':
+				s.u(8)
+			default:
+				switch {
+				case s.eof:
+					s.err(s.base+s.off, "escape sequence not terminated")
+					return
+				case isOctalDigit(s.c):
+					for i := 0; i < 3; i++ {
+						s.next()
+						if s.c == '\'' {
+							if i != 2 {
+								s.err(s.off, "illegal character %#U in escape sequence", s.c)
+							}
+							s.next()
+							return
+						}
+
+						if !isOctalDigit(s.c) {
+							s.err(s.off, "illegal character %#U in escape sequence", s.c)
+							break
+						}
+					}
+				default:
+					s.err(s.off, "unknown escape sequence")
+				}
+			}
+		case '\'':
+			s.next()
+			if ok != 1 {
+				s.err(off, "illegal rune literal")
+			}
+			return
+		case '\t':
+			s.next()
+			ok++
+		default:
+			switch {
+			case s.eof:
+				switch {
+				case ok != 0:
+					s.err(expOff, "rune literal not terminated")
+				default:
+					s.err(s.base+s.off, "rune literal not terminated")
+				}
+				return
+			case s.c == 0:
+				panic(todo("%v: %#U", s.position(), s.c))
+			case s.c < ' ':
+				ok++
+				s.err(s.off, "non-printable character: %#U", s.c)
+				s.next()
+			case s.c >= 0x80:
+				ok++
+				off := s.off
+				if c := s.rune(); c == 0xfeff {
+					s.err(off, "illegal byte order mark")
+				}
+			default:
+				ok++
+				s.next()
+			}
+		}
+		if ok != 0 && expOff < 0 {
+			expOff = s.off
+			if s.eof {
+				expOff++
+			}
+		}
+	}
+}
+
+func (s *scanner) stringLiteral(off int32) {
+	// Leadind " consumed.
+	s.tok.ch = int32(STRING)
+	for {
+		switch {
+		case s.c == '"':
+			s.next()
+			return
+		case s.c == '\\':
+			s.next()
+			switch s.c {
+			case '"', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v':
+				s.next()
+				continue
+			case 'x', 'X':
+				s.next()
+				if !isHexDigit(s.c) {
+					panic(todo("%v: %#U", s.position(), s.c))
+				}
+
+				s.next()
+				if !isHexDigit(s.c) {
+					panic(todo("%v: %#U", s.position(), s.c))
+				}
+
+				s.next()
+				continue
+			case 'u':
+				s.u(4)
+				continue
+			case 'U':
+				s.u(8)
+				continue
+			default:
+				switch {
+				case isOctalDigit(s.c):
+					s.next()
+					if isOctalDigit(s.c) {
+						s.next()
+					}
+					if isOctalDigit(s.c) {
+						s.next()
+					}
+					continue
+				default:
+					s.err(off-1, "unknown escape sequence")
+				}
+			}
+		case s.c == '\n':
+			fallthrough
+		case s.eof:
+			s.err(off, "string literal not terminated")
+			return
+		case s.c == 0:
+			s.err(s.off, "illegal character NUL")
+		}
+
+		switch {
+		case s.c >= 0x80:
+			off := s.off
+			if s.rune() == 0xfeff {
+				s.err(off, "illegal byte order mark")
+			}
+			continue
+		}
+
+		s.next()
+	}
+}
+
+func (s *scanner) u(n int) (r rune) {
+	// Leading u/U not consumed.
+	s.next()
+	off := s.off
+	for i := 0; i < n; i++ {
+		switch {
+		case isHexDigit(s.c):
+			var n rune
+			switch {
+			case s.c >= '0' && s.c <= '9':
+				n = rune(s.c) - '0'
+			case s.c >= 'a' && s.c <= 'f':
+				n = rune(s.c) - 'a' + 10
+			case s.c >= 'A' && s.c <= 'F':
+				n = rune(s.c) - 'A' + 10
+			}
+			r = 16*r + n
+		default:
+			switch {
+			case s.eof:
+				s.err(s.base+s.off, "escape sequence not terminated")
+			default:
+				s.err(s.off, "illegal character %#U in escape sequence", s.c)
+			}
+			return r
+		}
+
+		s.next()
+	}
+	if r < 0 || r > unicode.MaxRune || r >= 0xd800 && r <= 0xdfff {
+		s.err(off-1, "escape sequence is invalid Unicode code point")
+	}
+	return r
+}
+
+func (s *scanner) identifierOrKeyword() {
+out:
+	for {
+		switch {
+		case isIDNext(s.c):
+			s.next()
+		case s.c >= 0x80:
+			off := s.off
+			c := s.c
+			switch r := s.rune(); {
+			case unicode.IsLetter(r) || unicode.IsDigit(r):
+				// already consumed
+			default:
+				s.off = off
+				s.c = c
+				break out
+			}
+		case s.eof:
+			break out
+		case s.c == 0:
+			s.err(s.off, "illegal character NUL")
+			break out
+		default:
+			break out
+		}
+	}
+	if s.tok.ch = int32(keywords[string(s.buf[s.tok.src:s.off])]); s.tok.ch == 0 {
+		s.tok.ch = int32(IDENT)
+	}
+}
+
+func (s *scanner) numericLiteral() {
+	// Leading decimal digit not consumed.
+	var hasHexMantissa, needFrac bool
+more:
+	switch s.c {
+	case '0':
+		s.next()
+		switch s.c {
+		case '.':
+			// nop
+		case 'b', 'B':
+			s.next()
+			s.binaryLiteral()
+			return
+		case 'e', 'E':
+			s.exponent()
+			s.tok.ch = int32(FLOAT)
+			return
+		case 'p', 'P':
+			s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
+			s.exponent()
+			s.tok.ch = int32(FLOAT)
+			return
+		case 'o', 'O':
+			s.next()
+			s.octalLiteral()
+			return
+		case 'x', 'X':
+			hasHexMantissa = true
+			needFrac = true
+			s.tok.ch = int32(INT)
+			s.next()
+			if s.c == '.' {
+				s.next()
+				s.dot(hasHexMantissa, needFrac)
+				return
+			}
+
+			if s.hexadecimals() == 0 {
+				s.err(s.base+s.off, "hexadecimal literal has no digits")
+				return
+			}
+
+			needFrac = false
+		case 'i':
+			s.next()
+			s.tok.ch = int32(IMAG)
+			return
+		default:
+			invalidOff := int32(-1)
+			var invalidDigit byte
+			for {
+				if s.c == '_' {
+					for n := 0; s.c == '_'; n++ {
+						if n == 1 {
+							s.err(s.off, "'_' must separate successive digits")
+						}
+						s.next()
+					}
+					if !isDigit(s.c) {
+						s.err(s.off-1, "'_' must separate successive digits")
+					}
+				}
+				if isOctalDigit(s.c) {
+					s.next()
+					continue
+				}
+
+				if isDigit(s.c) {
+					if invalidOff < 0 {
+						invalidOff = s.off
+						invalidDigit = s.c
+					}
+					s.next()
+					continue
+				}
+
+				break
+			}
+			switch s.c {
+			case '.', 'e', 'E', 'i':
+				break more
+			}
+			if isDigit(s.c) {
+				break more
+			}
+			if invalidOff > 0 {
+				s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit)
+			}
+			s.tok.ch = int32(INT)
+			return
+		}
+	default:
+		s.decimals()
+	}
+	switch s.c {
+	case '.':
+		s.next()
+		s.dot(hasHexMantissa, needFrac)
+	case 'p', 'P':
+		if !hasHexMantissa {
+			s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
+		}
+		fallthrough
+	case 'e', 'E':
+		s.exponent()
+		if s.c == 'i' {
+			s.next()
+			s.tok.ch = int32(IMAG)
+			return
+		}
+
+		s.tok.ch = int32(FLOAT)
+	case 'i':
+		s.next()
+		s.tok.ch = int32(IMAG)
+	default:
+		s.tok.ch = int32(INT)
+	}
+}
+
+func (s *scanner) octalLiteral() {
+	// Leading 0o consumed.
+	ok := false
+	invalidOff := int32(-1)
+	var invalidDigit byte
+	s.tok.ch = int32(INT)
+	for {
+		for n := 0; s.c == '_'; n++ {
+			if n == 1 {
+				s.err(s.off, "'_' must separate successive digits")
+			}
+			s.next()
+		}
+		switch s.c {
+		case '0', '1', '2', '3', '4', '5', '6', '7':
+			s.next()
+			ok = true
+		case '8', '9':
+			if invalidOff < 0 {
+				invalidOff = s.off
+				invalidDigit = s.c
+			}
+			s.next()
+		case '.':
+			s.tok.ch = int32(FLOAT)
+			s.err(s.off, "invalid radix point in octal literal")
+			s.next()
+		case 'e', 'E':
+			s.tok.ch = int32(FLOAT)
+			s.err(s.off, "'%c' exponent requires decimal mantissa", s.c)
+			s.exponent()
+		case 'p', 'P':
+			s.tok.ch = int32(FLOAT)
+			s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
+			s.exponent()
+		default:
+			switch {
+			case !ok:
+				s.err(s.base+s.off, "octal literal has no digits")
+			case invalidOff > 0:
+				s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit)
+			}
+			if s.c == 'i' {
+				s.next()
+				s.tok.ch = int32(IMAG)
+			}
+			return
+		}
+	}
+}
+
+func (s *scanner) binaryLiteral() {
+	// Leading 0b consumed.
+	ok := false
+	invalidOff := int32(-1)
+	var invalidDigit byte
+	s.tok.ch = int32(INT)
+	for {
+		for n := 0; s.c == '_'; n++ {
+			if n == 1 {
+				s.err(s.off, "'_' must separate successive digits")
+			}
+			s.next()
+		}
+		switch s.c {
+		case '0', '1':
+			s.next()
+			ok = true
+		case '.':
+			s.tok.ch = int32(FLOAT)
+			s.err(s.off, "invalid radix point in binary literal")
+			s.next()
+		case 'e', 'E':
+			s.tok.ch = int32(FLOAT)
+			s.err(s.off, "'%c' exponent requires decimal mantissa", s.c)
+			s.exponent()
+		case 'p', 'P':
+			s.tok.ch = int32(FLOAT)
+			s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
+			s.exponent()
+		default:
+			if isDigit(s.c) {
+				if invalidOff < 0 {
+					invalidOff = s.off
+					invalidDigit = s.c
+				}
+				s.next()
+				continue
+			}
+
+			switch {
+			case !ok:
+				s.err(s.base+s.off, "binary literal has no digits")
+			case invalidOff > 0:
+				s.err(invalidOff, "invalid digit '%c' in binary literal", invalidDigit)
+			}
+			if s.c == 'i' {
+				s.next()
+				s.tok.ch = int32(IMAG)
+			}
+			return
+		}
+	}
+}
+
+func (s *scanner) generalComment(off int32) (injectSemi bool) {
+	// Leading /* consumed
+	off0 := s.off - 2
+	var nl bool
+	for {
+		switch {
+		case s.c == '*':
+			s.next()
+			switch s.c {
+			case '/':
+				s.lineInfo(off0, s.off+1)
+				s.next()
+				if nl {
+					return s.injectSemi()
+				}
+
+				return false
+			}
+		case s.c == '\n':
+			nl = true
+			s.next()
+		case s.eof:
+			s.tok.ch = 0
+			s.err(off, "comment not terminated")
+			return true
+		case s.c == 0:
+			panic(todo("%v: %#U", s.position(), s.c))
+		default:
+			s.next()
+		}
+	}
+}
+
+func (s *scanner) lineComment(off int32) (injectSemi bool) {
+	// Leading // consumed
+	off0 := s.off - 2
+	for {
+		switch {
+		case s.c == '\n':
+			s.lineInfo(off0, s.off+1)
+			if s.injectSemi() {
+				return true
+			}
+
+			s.next()
+			return false
+		case s.c >= 0x80:
+			if c := s.rune(); c == 0xfeff {
+				s.err(off+2, "illegal byte order mark")
+			}
+		case s.eof:
+			s.off++
+			if s.injectSemi() {
+				return true
+			}
+
+			return false
+		case s.c == 0:
+			return false
+		default:
+			s.next()
+		}
+	}
+}
+
+func (s *scanner) lineInfo(off, next int32) {
+	if off != 0 && s.buf[off+1] != '*' && s.buf[off-1] != '\n' && s.buf[off-1] != '\r' {
+		return
+	}
+
+	str := s.buf[off:next]
+	if !bytes.HasPrefix(str[len("//"):], lineCommentTag) {
+		return
+	}
+
+	switch {
+	case str[1] == '*':
+		str = str[:len(str)-len("*/")]
+	default:
+		str = str[:len(str)-len("\n")]
+	}
+	str = str[len("//"):]
+
+	str, ln, ok := s.lineInfoNum(str[len("line "):])
+	col := 0
+	if ok == liBadNum || ok == liNoNum {
+		return
+	}
+
+	hasCol := false
+	var n int
+	if str, n, ok = s.lineInfoNum(str); ok == liBadNum {
+		return
+	}
+
+	if ok != liNoNum {
+		col = ln
+		ln = n
+		hasCol = true
+	}
+
+	fn := strings.TrimSpace(string(str))
+	switch {
+	case fn == "" && hasCol:
+		fn = s.pos(off).Filename
+	case fn != "":
+		fn = filepath.Clean(fn)
+		if !filepath.IsAbs(fn) {
+			fn = filepath.Join(s.dir, fn)
+		}
+	}
+	// trc("set %v %q %v %v", next, fn, ln, col)
+	s.file.AddLineColumnInfo(int(next), fn, ln, col)
+}
+
+const (
+	liNoNum = iota
+	liBadNum
+	liOK
+)
+
+func (s *scanner) lineInfoNum(str []byte) (_ []byte, n, r int) {
+	// trc("==== %q", str)
+	x := len(str) - 1
+	if x < 0 || !isDigit(str[x]) {
+		return str, 0, liNoNum
+	}
+
+	mul := 1
+	for x > 0 && isDigit(str[x]) {
+		n += mul * (int(str[x]) - '0')
+		mul *= 10
+		x--
+		if n < 0 {
+			return str, 0, liBadNum
+		}
+	}
+	if x < 0 || str[x] != ':' {
+		return str, 0, liBadNum
+	}
+
+	// trc("---- %q %v %v", str[:x], n, liOK)
+	return str[:x], n, liOK
+}
+
+func (s *scanner) rune() rune {
+	switch r, sz := utf8.DecodeRune(s.buf[s.off:]); {
+	case r == utf8.RuneError && sz == 0:
+		panic(todo("%v: %#U", s.position(), s.c))
+	case r == utf8.RuneError && sz == 1:
+		s.err(s.off, "illegal UTF-8 encoding")
+		s.next()
+		return r
+	default:
+		s.nextN(sz)
+		return r
+	}
+}
+
+func (s *scanner) dot(hasHexMantissa, needFrac bool) {
+	// '.' already consumed
+	switch {
+	case hasHexMantissa:
+		if s.hexadecimals() == 0 && needFrac {
+			s.err(s.off, "hexadecimal literal has no digits")
+		}
+		switch s.c {
+		case 'p', 'P':
+			// ok
+		default:
+			s.err(s.off, "hexadecimal mantissa requires a 'p' exponent")
+		}
+	default:
+		if s.decimals() == 0 && needFrac {
+			panic(todo("%v: %#U", s.position(), s.c))
+		}
+	}
+	switch s.c {
+	case 'p', 'P':
+		if !hasHexMantissa {
+			s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
+		}
+		fallthrough
+	case 'e', 'E':
+		s.exponent()
+		if s.c == 'i' {
+			s.next()
+			s.tok.ch = int32(IMAG)
+			return
+		}
+
+		s.tok.ch = int32(FLOAT)
+	case 'i':
+		s.next()
+		s.tok.ch = int32(IMAG)
+	default:
+		s.tok.ch = int32(FLOAT)
+	}
+}
+
+func (s *scanner) exponent() {
+	// Leanding e or E not consumed.
+	s.next()
+	switch s.c {
+	case '+', '-':
+		s.next()
+	}
+	if !isDigit(s.c) {
+		s.err(s.base+s.off, "exponent has no digits")
+		return
+	}
+
+	s.decimals()
+}
+
+func (s *scanner) decimals() (r int) {
+	first := true
+	for {
+		switch {
+		case isDigit(s.c):
+			first = false
+			s.next()
+			r++
+		case s.c == '_':
+			for n := 0; s.c == '_'; n++ {
+				if first || n == 1 {
+					s.err(s.off, "'_' must separate successive digits")
+				}
+				s.next()
+			}
+			if !isDigit(s.c) {
+				s.err(s.off-1, "'_' must separate successive digits")
+			}
+		default:
+			return r
+		}
+	}
+}
+
+func (s *scanner) hexadecimals() (r int) {
+	for {
+		switch {
+		case isHexDigit(s.c):
+			s.next()
+			r++
+		case s.c == '_':
+			for n := 0; s.c == '_'; n++ {
+				if n == 1 {
+					s.err(s.off, "'_' must separate successive digits")
+				}
+				s.next()
+			}
+			if !isHexDigit(s.c) {
+				s.err(s.off-1, "'_' must separate successive digits")
+			}
+		default:
+			return r
+		}
+	}
+}
+
+// When the input is broken into tokens, a semicolon is automatically inserted
+// into the token stream immediately after a line's final token if that token
+// is
+//
+//   - an identifier
+//   - an integer, floating-point, imaginary, rune, or string literal
+//   - one of the keywords break, continue, fallthrough, or return
+//   - one of the operators and punctuation ++, --, ), ], or }
+func (s *scanner) injectSemi() bool {
+	switch token.Token(s.last) {
+	case
+		IDENT, INT, FLOAT, IMAG, CHAR, STRING,
+		BREAK, CONTINUE, FALLTHROUGH, RETURN,
+		INC, DEC, RPAREN, RBRACK, RBRACE:
+
+		s.tok.ch = int32(SEMICOLON)
+		s.last = 0
+		if s.c == '\n' {
+			s.next()
+		}
+		return true
+	}
+
+	s.last = 0
+	return false
+}