From 1a57267a17c2fc17fb6e104846fabc3e363c326c Mon Sep 17 00:00:00 2001 From: Emile Date: Fri, 16 Aug 2024 19:50:26 +0200 Subject: initial commit --- vendor/modernc.org/gc/v3/scanner.go | 1446 +++++++++++++++++++++++++++++++++++ 1 file changed, 1446 insertions(+) create mode 100644 vendor/modernc.org/gc/v3/scanner.go (limited to 'vendor/modernc.org/gc/v3/scanner.go') diff --git a/vendor/modernc.org/gc/v3/scanner.go b/vendor/modernc.org/gc/v3/scanner.go new file mode 100644 index 0000000..a3815a8 --- /dev/null +++ b/vendor/modernc.org/gc/v3/scanner.go @@ -0,0 +1,1446 @@ +// Copyright 2022 The Gc Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gc // import "modernc.org/gc/v3" + +import ( + "bytes" + "fmt" + "go/token" + "path/filepath" + "strings" + "unicode" + "unicode/utf8" + + "modernc.org/mathutil" + mtoken "modernc.org/token" +) + +var ( + _ Node = (*Token)(nil) + _ Node = (*nonode)(nil) + + keywords = map[string]token.Token{ + "break": BREAK, + "case": CASE, + "chan": CHAN, + "const": CONST, + "continue": CONTINUE, + "default": DEFAULT, + "defer": DEFER, + "else": ELSE, + "fallthrough": FALLTHROUGH, + "for": FOR, + "func": FUNC, + "go": GO, + "goto": GOTO, + "if": IF, + "import": IMPORT, + "interface": INTERFACE, + "map": MAP, + "package": PACKAGE, + "range": RANGE, + "return": RETURN, + "select": SELECT, + "struct": STRUCT, + "switch": SWITCH, + "type": TYPE, + "var": VAR, + } + + lineCommentTag = []byte("line ") + znode = &nonode{} +) + +type nonode struct{} + +func (*nonode) Position() (r token.Position) { return r } +func (*nonode) Source(full bool) string { return "" } + +// Token represents a lexeme, its position and its semantic value. +type Token struct { // 16 bytes on 64 bit arch + source *source + + ch int32 + index int32 +} + +// Ch returns which token t represents +func (t Token) Ch() token.Token { return token.Token(t.ch) } + +// Source implements Node. +func (t Token) Source(full bool) string { + // trc("%10s %v: #%v sep %v, src %v, buf %v", tokSource(t.Ch()), t.Position(), t.index, t.source.toks[t.index].sep, t.source.toks[t.index].src, len(t.source.buf)) + sep := t.Sep() + if !full && sep != "" { + sep = " " + } + src := t.Src() + if !full && strings.ContainsRune(src, '\n') { + src = " " + } + // trc("%q %q -> %q %q", t.Sep(), t.Src(), sep, src) + return sep + src +} + +// Positions implements Node. +func (t Token) Position() (r token.Position) { + if t.source == nil { + return r + } + + s := t.source + off := mathutil.MinInt32(int32(len(s.buf)), s.toks[t.index].src) + return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true)) +} + +// Prev returns the token preceding t or a zero value if no such token exists. +func (t Token) Prev() (r Token) { + if index := t.index - 1; index >= 0 { + s := t.source + return Token{source: s, ch: s.toks[index].ch, index: index} + } + + return r +} + +// Next returns the token following t or a zero value if no such token exists. +func (t Token) Next() (r Token) { + if index := t.index + 1; index < int32(len(t.source.toks)) { + s := t.source + return Token{source: s, ch: s.toks[index].ch, index: index} + } + + return r +} + +// Sep returns any separators, combined, preceding t. +func (t Token) Sep() string { + s := t.source + if p, ok := s.sepPatches[t.index]; ok { + return p + } + + return string(s.buf[s.toks[t.index].sep:s.toks[t.index].src]) +} + +// SetSep sets t's separator. +func (t Token) SetSep(s string) { + src := t.source + if src.sepPatches == nil { + src.sepPatches = map[int32]string{} + } + src.sepPatches[t.index] = s +} + +// Src returns t's source form. +func (t Token) Src() string { + s := t.source + if p, ok := s.srcPatches[t.index]; ok { + return p + } + + if t.ch != int32(EOF) { + next := t.source.off + if t.index < int32(len(s.toks))-1 { + next = s.toks[t.index+1].sep + } + return string(s.buf[s.toks[t.index].src:next]) + } + + return "" +} + +// SetSrc sets t's source form. +func (t Token) SetSrc(s string) { + src := t.source + if src.srcPatches == nil { + src.srcPatches = map[int32]string{} + } + src.srcPatches[t.index] = s +} + +// IsValid reports t is a valid token. Zero value reports false. +func (t Token) IsValid() bool { return t.source != nil } + +type tok struct { // 12 bytes + ch int32 + sep int32 + src int32 +} + +func (t *tok) token() token.Token { return token.Token(t.ch) } + +func (t *tok) position(s *source) (r token.Position) { + off := mathutil.MinInt32(int32(len(s.buf)), t.src) + return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true)) +} + +// source represents a single Go source file, editor text buffer etc. +type source struct { + buf []byte + file *mtoken.File + name string + sepPatches map[int32]string + srcPatches map[int32]string + toks []tok + + base int32 + off int32 +} + +// 'buf' becomes owned by the result and must not be modified afterwards. +func newSource(name string, buf []byte) *source { + file := mtoken.NewFile(name, len(buf)) + return &source{ + buf: buf, + file: file, + name: name, + base: int32(file.Base()), + } +} + +type ErrWithPosition struct { + pos token.Position + err error +} + +func (e ErrWithPosition) String() string { + switch { + case e.pos.IsValid(): + return fmt.Sprintf("%v: %v", e.pos, e.err) + default: + return fmt.Sprintf("%v", e.err) + } +} + +type errList []ErrWithPosition + +func (e errList) Err() (r error) { + if len(e) == 0 { + return nil + } + + return e +} + +func (e errList) Error() string { + w := 0 + prev := ErrWithPosition{pos: token.Position{Offset: -1}} + for _, v := range e { + if v.pos.Line == 0 || v.pos.Offset != prev.pos.Offset || v.err.Error() != prev.err.Error() { + e[w] = v + w++ + prev = v + } + } + + var a []string + for _, v := range e { + a = append(a, fmt.Sprint(v)) + } + return strings.Join(a, "\n") +} + +func (e *errList) err(pos token.Position, msg string, args ...interface{}) { + if trcErrors { + trc("FAIL "+msg, args...) + } + switch { + case len(args) == 0: + *e = append(*e, ErrWithPosition{pos, fmt.Errorf("%s", msg)}) + default: + *e = append(*e, ErrWithPosition{pos, fmt.Errorf(msg, args...)}) + } +} + +type scanner struct { + *source + dir string + errs errList + tok tok + + last int32 + + errBudget int + + c byte // Lookahead byte. + + eof bool + isClosed bool +} + +func newScanner(name string, buf []byte) *scanner { + dir, _ := filepath.Split(name) + r := &scanner{source: newSource(name, buf), errBudget: 10, dir: dir} + switch { + case len(buf) == 0: + r.eof = true + default: + r.c = buf[0] + if r.c == '\n' { + r.file.AddLine(int(r.base + r.off)) + } + } + return r +} + +func isDigit(c byte) bool { return c >= '0' && c <= '9' } +func isHexDigit(c byte) bool { return isDigit(c) || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' } +func isIDNext(c byte) bool { return isIDFirst(c) || isDigit(c) } +func isOctalDigit(c byte) bool { return c >= '0' && c <= '7' } + +func isIDFirst(c byte) bool { + return c >= 'a' && c <= 'z' || + c >= 'A' && c <= 'Z' || + c == '_' +} + +func (s *scanner) position() token.Position { + return token.Position(s.source.file.PositionFor(mtoken.Pos(s.base+s.off), true)) +} + +func (s *scanner) pos(off int32) token.Position { + return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true)) +} + +func (s *scanner) token() Token { + return Token{source: s.source, ch: s.tok.ch, index: int32(len(s.toks) - 1)} +} + +func (s *scanner) err(off int32, msg string, args ...interface{}) { + if s.errBudget <= 0 { + s.close() + return + } + + s.errBudget-- + if n := int32(len(s.buf)); off >= n { + off = n + } + s.errs.err(s.pos(off), msg, args...) +} + +func (s *scanner) close() { + if s.isClosed { + return + } + + s.tok.ch = int32(ILLEGAL) + s.eof = true + s.isClosed = true +} + +func (s *scanner) next() { + if s.eof { + return + } + + s.off++ + if int(s.off) == len(s.buf) { + s.c = 0 + s.eof = true + return + } + + s.c = s.buf[s.off] + if s.c == '\n' { + s.file.AddLine(int(s.base + s.off)) + } +} + +func (s *scanner) nextN(n int) { + if int(s.off) == len(s.buf)-n { + s.c = 0 + s.eof = true + return + } + + s.off += int32(n) + s.c = s.buf[s.off] + if s.c == '\n' { + s.file.AddLine(int(s.base + s.off)) + } +} + +func (s *scanner) scan() (r bool) { + if s.isClosed { + return false + } + + s.last = s.tok.ch + s.tok.sep = s.off + s.tok.ch = -1 + for { + if r = s.scan0(); !r || s.tok.ch >= 0 { + s.toks = append(s.toks, s.tok) + // trc("", dump(s.token())) + return r + } + } +} + +func (s *scanner) scan0() (r bool) { + s.tok.src = mathutil.MinInt32(s.off, int32(len(s.buf))) + switch s.c { + case ' ', '\t', '\r', '\n': + // White space, formed from spaces (U+0020), horizontal tabs (U+0009), carriage + // returns (U+000D), and newlines (U+000A), is ignored except as it separates + // tokens that would otherwise combine into a single token. + if s.c == '\n' && s.injectSemi() { + return true + } + + s.next() + return true + case '/': + off := s.off + s.next() + switch s.c { + case '=': + s.next() + s.tok.ch = int32(QUO_ASSIGN) + case '/': + // Line comments start with the character sequence // and stop at the end of + // the line. + s.next() + s.lineComment(off) + return true + case '*': + // General comments start with the character sequence /* and stop with the + // first subsequent character sequence */. + s.next() + s.generalComment(off) + return true + default: + s.tok.ch = int32(QUO) + } + case '(': + s.tok.ch = int32(LPAREN) + s.next() + case ')': + s.tok.ch = int32(RPAREN) + s.next() + case '[': + s.tok.ch = int32(LBRACK) + s.next() + case ']': + s.tok.ch = int32(RBRACK) + s.next() + case '{': + s.tok.ch = int32(LBRACE) + s.next() + case '}': + s.tok.ch = int32(RBRACE) + s.next() + case ',': + s.tok.ch = int32(COMMA) + s.next() + case ';': + s.tok.ch = int32(SEMICOLON) + s.next() + case '~': + s.tok.ch = int32(TILDE) + s.next() + case '"': + off := s.off + s.next() + s.stringLiteral(off) + case '\'': + off := s.off + s.next() + s.runeLiteral(off) + case '`': + s.next() + for { + switch { + case s.c == '`': + s.next() + s.tok.ch = int32(STRING) + return true + case s.eof: + s.err(s.off, "raw string literal not terminated") + s.tok.ch = int32(STRING) + return true + case s.c == 0: + panic(todo("%v: %#U", s.position(), s.c)) + default: + s.next() + } + } + case '.': + s.next() + off := s.off + if isDigit(s.c) { + s.dot(false, true) + return true + } + + if s.c != '.' { + s.tok.ch = int32(PERIOD) + return true + } + + s.next() + if s.c != '.' { + s.off = off + s.c = '.' + s.tok.ch = int32(PERIOD) + return true + } + + s.next() + s.tok.ch = int32(ELLIPSIS) + return true + case '%': + s.next() + switch s.c { + case '=': + s.next() + s.tok.ch = int32(REM_ASSIGN) + default: + s.tok.ch = int32(REM) + } + case '*': + s.next() + switch s.c { + case '=': + s.next() + s.tok.ch = int32(MUL_ASSIGN) + default: + s.tok.ch = int32(MUL) + } + case '^': + s.next() + switch s.c { + case '=': + s.next() + s.tok.ch = int32(XOR_ASSIGN) + default: + s.tok.ch = int32(XOR) + } + case '+': + s.next() + switch s.c { + case '+': + s.next() + s.tok.ch = int32(INC) + case '=': + s.next() + s.tok.ch = int32(ADD_ASSIGN) + default: + s.tok.ch = int32(ADD) + } + case '-': + s.next() + switch s.c { + case '-': + s.next() + s.tok.ch = int32(DEC) + case '=': + s.next() + s.tok.ch = int32(SUB_ASSIGN) + default: + s.tok.ch = int32(SUB) + } + case ':': + s.next() + switch { + case s.c == '=': + s.next() + s.tok.ch = int32(DEFINE) + default: + s.tok.ch = int32(COLON) + } + case '=': + s.next() + switch { + case s.c == '=': + s.next() + s.tok.ch = int32(EQL) + default: + s.tok.ch = int32(ASSIGN) + } + case '!': + s.next() + switch { + case s.c == '=': + s.next() + s.tok.ch = int32(NEQ) + default: + s.tok.ch = int32(NOT) + } + case '>': + s.next() + switch s.c { + case '=': + s.next() + s.tok.ch = int32(GEQ) + case '>': + s.next() + switch s.c { + case '=': + s.next() + s.tok.ch = int32(SHR_ASSIGN) + default: + s.tok.ch = int32(SHR) + } + default: + s.tok.ch = int32(GTR) + } + case '<': + s.next() + switch s.c { + case '=': + s.next() + s.tok.ch = int32(LEQ) + case '<': + s.next() + switch s.c { + case '=': + s.next() + s.tok.ch = int32(SHL_ASSIGN) + default: + s.tok.ch = int32(SHL) + } + case '-': + s.next() + s.tok.ch = int32(ARROW) + default: + s.tok.ch = int32(LSS) + } + case '|': + s.next() + switch s.c { + case '|': + s.next() + s.tok.ch = int32(LOR) + case '=': + s.next() + s.tok.ch = int32(OR_ASSIGN) + default: + s.tok.ch = int32(OR) + } + case '&': + s.next() + switch s.c { + case '&': + s.next() + s.tok.ch = int32(LAND) + case '^': + s.next() + switch s.c { + case '=': + s.next() + s.tok.ch = int32(AND_NOT_ASSIGN) + default: + s.tok.ch = int32(AND_NOT) + } + case '=': + s.next() + s.tok.ch = int32(AND_ASSIGN) + default: + s.tok.ch = int32(AND) + } + default: + switch { + case isIDFirst(s.c): + s.next() + s.identifierOrKeyword() + case isDigit(s.c): + s.numericLiteral() + case s.c >= 0x80: + off := s.off + switch r := s.rune(); { + case unicode.IsLetter(r): + s.identifierOrKeyword() + case r == 0xfeff: + if off == 0 { // Ignore BOM, but only at buffer start. + return true + } + + s.err(off, "illegal byte order mark") + s.tok.ch = int32(ILLEGAL) + default: + s.err(s.off, "illegal character %#U", r) + s.tok.ch = int32(ILLEGAL) + } + case s.eof: + if s.injectSemi() { + return true + } + + s.close() + s.tok.ch = int32(EOF) + s.tok.sep = mathutil.MinInt32(s.tok.sep, s.tok.src) + return false + // case s.c == 0: + // panic(todo("%v: %#U", s.position(), s.c)) + default: + s.err(s.off, "illegal character %#U", s.c) + s.next() + s.tok.ch = int32(ILLEGAL) + } + } + return true +} + +func (s *scanner) runeLiteral(off int32) { + // Leading ' consumed. + ok := 0 + s.tok.ch = int32(CHAR) + expOff := int32(-1) + if s.eof { + s.err(off, "rune literal not terminated") + return + } + + for { + switch s.c { + case '\\': + ok++ + s.next() + switch s.c { + case '\'', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v': + s.next() + case 'x', 'X': + s.next() + for i := 0; i < 2; i++ { + if s.c == '\'' { + if i != 2 { + s.err(s.off, "illegal character %#U in escape sequence", s.c) + } + s.next() + return + } + + if !isHexDigit(s.c) { + s.err(s.off, "illegal character %#U in escape sequence", s.c) + break + } + s.next() + } + case 'u': + s.u(4) + case 'U': + s.u(8) + default: + switch { + case s.eof: + s.err(s.base+s.off, "escape sequence not terminated") + return + case isOctalDigit(s.c): + for i := 0; i < 3; i++ { + s.next() + if s.c == '\'' { + if i != 2 { + s.err(s.off, "illegal character %#U in escape sequence", s.c) + } + s.next() + return + } + + if !isOctalDigit(s.c) { + s.err(s.off, "illegal character %#U in escape sequence", s.c) + break + } + } + default: + s.err(s.off, "unknown escape sequence") + } + } + case '\'': + s.next() + if ok != 1 { + s.err(off, "illegal rune literal") + } + return + case '\t': + s.next() + ok++ + default: + switch { + case s.eof: + switch { + case ok != 0: + s.err(expOff, "rune literal not terminated") + default: + s.err(s.base+s.off, "rune literal not terminated") + } + return + case s.c == 0: + panic(todo("%v: %#U", s.position(), s.c)) + case s.c < ' ': + ok++ + s.err(s.off, "non-printable character: %#U", s.c) + s.next() + case s.c >= 0x80: + ok++ + off := s.off + if c := s.rune(); c == 0xfeff { + s.err(off, "illegal byte order mark") + } + default: + ok++ + s.next() + } + } + if ok != 0 && expOff < 0 { + expOff = s.off + if s.eof { + expOff++ + } + } + } +} + +func (s *scanner) stringLiteral(off int32) { + // Leadind " consumed. + s.tok.ch = int32(STRING) + for { + switch { + case s.c == '"': + s.next() + return + case s.c == '\\': + s.next() + switch s.c { + case '"', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v': + s.next() + continue + case 'x', 'X': + s.next() + if !isHexDigit(s.c) { + panic(todo("%v: %#U", s.position(), s.c)) + } + + s.next() + if !isHexDigit(s.c) { + panic(todo("%v: %#U", s.position(), s.c)) + } + + s.next() + continue + case 'u': + s.u(4) + continue + case 'U': + s.u(8) + continue + default: + switch { + case isOctalDigit(s.c): + s.next() + if isOctalDigit(s.c) { + s.next() + } + if isOctalDigit(s.c) { + s.next() + } + continue + default: + s.err(off-1, "unknown escape sequence") + } + } + case s.c == '\n': + fallthrough + case s.eof: + s.err(off, "string literal not terminated") + return + case s.c == 0: + s.err(s.off, "illegal character NUL") + } + + switch { + case s.c >= 0x80: + off := s.off + if s.rune() == 0xfeff { + s.err(off, "illegal byte order mark") + } + continue + } + + s.next() + } +} + +func (s *scanner) u(n int) (r rune) { + // Leading u/U not consumed. + s.next() + off := s.off + for i := 0; i < n; i++ { + switch { + case isHexDigit(s.c): + var n rune + switch { + case s.c >= '0' && s.c <= '9': + n = rune(s.c) - '0' + case s.c >= 'a' && s.c <= 'f': + n = rune(s.c) - 'a' + 10 + case s.c >= 'A' && s.c <= 'F': + n = rune(s.c) - 'A' + 10 + } + r = 16*r + n + default: + switch { + case s.eof: + s.err(s.base+s.off, "escape sequence not terminated") + default: + s.err(s.off, "illegal character %#U in escape sequence", s.c) + } + return r + } + + s.next() + } + if r < 0 || r > unicode.MaxRune || r >= 0xd800 && r <= 0xdfff { + s.err(off-1, "escape sequence is invalid Unicode code point") + } + return r +} + +func (s *scanner) identifierOrKeyword() { +out: + for { + switch { + case isIDNext(s.c): + s.next() + case s.c >= 0x80: + off := s.off + c := s.c + switch r := s.rune(); { + case unicode.IsLetter(r) || unicode.IsDigit(r): + // already consumed + default: + s.off = off + s.c = c + break out + } + case s.eof: + break out + case s.c == 0: + s.err(s.off, "illegal character NUL") + break out + default: + break out + } + } + if s.tok.ch = int32(keywords[string(s.buf[s.tok.src:s.off])]); s.tok.ch == 0 { + s.tok.ch = int32(IDENT) + } +} + +func (s *scanner) numericLiteral() { + // Leading decimal digit not consumed. + var hasHexMantissa, needFrac bool +more: + switch s.c { + case '0': + s.next() + switch s.c { + case '.': + // nop + case 'b', 'B': + s.next() + s.binaryLiteral() + return + case 'e', 'E': + s.exponent() + s.tok.ch = int32(FLOAT) + return + case 'p', 'P': + s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) + s.exponent() + s.tok.ch = int32(FLOAT) + return + case 'o', 'O': + s.next() + s.octalLiteral() + return + case 'x', 'X': + hasHexMantissa = true + needFrac = true + s.tok.ch = int32(INT) + s.next() + if s.c == '.' { + s.next() + s.dot(hasHexMantissa, needFrac) + return + } + + if s.hexadecimals() == 0 { + s.err(s.base+s.off, "hexadecimal literal has no digits") + return + } + + needFrac = false + case 'i': + s.next() + s.tok.ch = int32(IMAG) + return + default: + invalidOff := int32(-1) + var invalidDigit byte + for { + if s.c == '_' { + for n := 0; s.c == '_'; n++ { + if n == 1 { + s.err(s.off, "'_' must separate successive digits") + } + s.next() + } + if !isDigit(s.c) { + s.err(s.off-1, "'_' must separate successive digits") + } + } + if isOctalDigit(s.c) { + s.next() + continue + } + + if isDigit(s.c) { + if invalidOff < 0 { + invalidOff = s.off + invalidDigit = s.c + } + s.next() + continue + } + + break + } + switch s.c { + case '.', 'e', 'E', 'i': + break more + } + if isDigit(s.c) { + break more + } + if invalidOff > 0 { + s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit) + } + s.tok.ch = int32(INT) + return + } + default: + s.decimals() + } + switch s.c { + case '.': + s.next() + s.dot(hasHexMantissa, needFrac) + case 'p', 'P': + if !hasHexMantissa { + s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) + } + fallthrough + case 'e', 'E': + s.exponent() + if s.c == 'i' { + s.next() + s.tok.ch = int32(IMAG) + return + } + + s.tok.ch = int32(FLOAT) + case 'i': + s.next() + s.tok.ch = int32(IMAG) + default: + s.tok.ch = int32(INT) + } +} + +func (s *scanner) octalLiteral() { + // Leading 0o consumed. + ok := false + invalidOff := int32(-1) + var invalidDigit byte + s.tok.ch = int32(INT) + for { + for n := 0; s.c == '_'; n++ { + if n == 1 { + s.err(s.off, "'_' must separate successive digits") + } + s.next() + } + switch s.c { + case '0', '1', '2', '3', '4', '5', '6', '7': + s.next() + ok = true + case '8', '9': + if invalidOff < 0 { + invalidOff = s.off + invalidDigit = s.c + } + s.next() + case '.': + s.tok.ch = int32(FLOAT) + s.err(s.off, "invalid radix point in octal literal") + s.next() + case 'e', 'E': + s.tok.ch = int32(FLOAT) + s.err(s.off, "'%c' exponent requires decimal mantissa", s.c) + s.exponent() + case 'p', 'P': + s.tok.ch = int32(FLOAT) + s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) + s.exponent() + default: + switch { + case !ok: + s.err(s.base+s.off, "octal literal has no digits") + case invalidOff > 0: + s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit) + } + if s.c == 'i' { + s.next() + s.tok.ch = int32(IMAG) + } + return + } + } +} + +func (s *scanner) binaryLiteral() { + // Leading 0b consumed. + ok := false + invalidOff := int32(-1) + var invalidDigit byte + s.tok.ch = int32(INT) + for { + for n := 0; s.c == '_'; n++ { + if n == 1 { + s.err(s.off, "'_' must separate successive digits") + } + s.next() + } + switch s.c { + case '0', '1': + s.next() + ok = true + case '.': + s.tok.ch = int32(FLOAT) + s.err(s.off, "invalid radix point in binary literal") + s.next() + case 'e', 'E': + s.tok.ch = int32(FLOAT) + s.err(s.off, "'%c' exponent requires decimal mantissa", s.c) + s.exponent() + case 'p', 'P': + s.tok.ch = int32(FLOAT) + s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) + s.exponent() + default: + if isDigit(s.c) { + if invalidOff < 0 { + invalidOff = s.off + invalidDigit = s.c + } + s.next() + continue + } + + switch { + case !ok: + s.err(s.base+s.off, "binary literal has no digits") + case invalidOff > 0: + s.err(invalidOff, "invalid digit '%c' in binary literal", invalidDigit) + } + if s.c == 'i' { + s.next() + s.tok.ch = int32(IMAG) + } + return + } + } +} + +func (s *scanner) generalComment(off int32) (injectSemi bool) { + // Leading /* consumed + off0 := s.off - 2 + var nl bool + for { + switch { + case s.c == '*': + s.next() + switch s.c { + case '/': + s.lineInfo(off0, s.off+1) + s.next() + if nl { + return s.injectSemi() + } + + return false + } + case s.c == '\n': + nl = true + s.next() + case s.eof: + s.tok.ch = 0 + s.err(off, "comment not terminated") + return true + case s.c == 0: + panic(todo("%v: %#U", s.position(), s.c)) + default: + s.next() + } + } +} + +func (s *scanner) lineComment(off int32) (injectSemi bool) { + // Leading // consumed + off0 := s.off - 2 + for { + switch { + case s.c == '\n': + s.lineInfo(off0, s.off+1) + if s.injectSemi() { + return true + } + + s.next() + return false + case s.c >= 0x80: + if c := s.rune(); c == 0xfeff { + s.err(off+2, "illegal byte order mark") + } + case s.eof: + s.off++ + if s.injectSemi() { + return true + } + + return false + case s.c == 0: + return false + default: + s.next() + } + } +} + +func (s *scanner) lineInfo(off, next int32) { + if off != 0 && s.buf[off+1] != '*' && s.buf[off-1] != '\n' && s.buf[off-1] != '\r' { + return + } + + str := s.buf[off:next] + if !bytes.HasPrefix(str[len("//"):], lineCommentTag) { + return + } + + switch { + case str[1] == '*': + str = str[:len(str)-len("*/")] + default: + str = str[:len(str)-len("\n")] + } + str = str[len("//"):] + + str, ln, ok := s.lineInfoNum(str[len("line "):]) + col := 0 + if ok == liBadNum || ok == liNoNum { + return + } + + hasCol := false + var n int + if str, n, ok = s.lineInfoNum(str); ok == liBadNum { + return + } + + if ok != liNoNum { + col = ln + ln = n + hasCol = true + } + + fn := strings.TrimSpace(string(str)) + switch { + case fn == "" && hasCol: + fn = s.pos(off).Filename + case fn != "": + fn = filepath.Clean(fn) + if !filepath.IsAbs(fn) { + fn = filepath.Join(s.dir, fn) + } + } + // trc("set %v %q %v %v", next, fn, ln, col) + s.file.AddLineColumnInfo(int(next), fn, ln, col) +} + +const ( + liNoNum = iota + liBadNum + liOK +) + +func (s *scanner) lineInfoNum(str []byte) (_ []byte, n, r int) { + // trc("==== %q", str) + x := len(str) - 1 + if x < 0 || !isDigit(str[x]) { + return str, 0, liNoNum + } + + mul := 1 + for x > 0 && isDigit(str[x]) { + n += mul * (int(str[x]) - '0') + mul *= 10 + x-- + if n < 0 { + return str, 0, liBadNum + } + } + if x < 0 || str[x] != ':' { + return str, 0, liBadNum + } + + // trc("---- %q %v %v", str[:x], n, liOK) + return str[:x], n, liOK +} + +func (s *scanner) rune() rune { + switch r, sz := utf8.DecodeRune(s.buf[s.off:]); { + case r == utf8.RuneError && sz == 0: + panic(todo("%v: %#U", s.position(), s.c)) + case r == utf8.RuneError && sz == 1: + s.err(s.off, "illegal UTF-8 encoding") + s.next() + return r + default: + s.nextN(sz) + return r + } +} + +func (s *scanner) dot(hasHexMantissa, needFrac bool) { + // '.' already consumed + switch { + case hasHexMantissa: + if s.hexadecimals() == 0 && needFrac { + s.err(s.off, "hexadecimal literal has no digits") + } + switch s.c { + case 'p', 'P': + // ok + default: + s.err(s.off, "hexadecimal mantissa requires a 'p' exponent") + } + default: + if s.decimals() == 0 && needFrac { + panic(todo("%v: %#U", s.position(), s.c)) + } + } + switch s.c { + case 'p', 'P': + if !hasHexMantissa { + s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) + } + fallthrough + case 'e', 'E': + s.exponent() + if s.c == 'i' { + s.next() + s.tok.ch = int32(IMAG) + return + } + + s.tok.ch = int32(FLOAT) + case 'i': + s.next() + s.tok.ch = int32(IMAG) + default: + s.tok.ch = int32(FLOAT) + } +} + +func (s *scanner) exponent() { + // Leanding e or E not consumed. + s.next() + switch s.c { + case '+', '-': + s.next() + } + if !isDigit(s.c) { + s.err(s.base+s.off, "exponent has no digits") + return + } + + s.decimals() +} + +func (s *scanner) decimals() (r int) { + first := true + for { + switch { + case isDigit(s.c): + first = false + s.next() + r++ + case s.c == '_': + for n := 0; s.c == '_'; n++ { + if first || n == 1 { + s.err(s.off, "'_' must separate successive digits") + } + s.next() + } + if !isDigit(s.c) { + s.err(s.off-1, "'_' must separate successive digits") + } + default: + return r + } + } +} + +func (s *scanner) hexadecimals() (r int) { + for { + switch { + case isHexDigit(s.c): + s.next() + r++ + case s.c == '_': + for n := 0; s.c == '_'; n++ { + if n == 1 { + s.err(s.off, "'_' must separate successive digits") + } + s.next() + } + if !isHexDigit(s.c) { + s.err(s.off-1, "'_' must separate successive digits") + } + default: + return r + } + } +} + +// When the input is broken into tokens, a semicolon is automatically inserted +// into the token stream immediately after a line's final token if that token +// is +// +// - an identifier +// - an integer, floating-point, imaginary, rune, or string literal +// - one of the keywords break, continue, fallthrough, or return +// - one of the operators and punctuation ++, --, ), ], or } +func (s *scanner) injectSemi() bool { + switch token.Token(s.last) { + case + IDENT, INT, FLOAT, IMAG, CHAR, STRING, + BREAK, CONTINUE, FALLTHROUGH, RETURN, + INC, DEC, RPAREN, RBRACK, RBRACE: + + s.tok.ch = int32(SEMICOLON) + s.last = 0 + if s.c == '\n' { + s.next() + } + return true + } + + s.last = 0 + return false +} -- cgit 1.4.1