// Copyright 2022 The Gc Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package gc // import "modernc.org/gc/v3" import ( "bytes" "fmt" "go/token" "path/filepath" "strings" "unicode" "unicode/utf8" "modernc.org/mathutil" mtoken "modernc.org/token" ) var ( _ Node = (*Token)(nil) _ Node = (*nonode)(nil) keywords = map[string]token.Token{ "break": BREAK, "case": CASE, "chan": CHAN, "const": CONST, "continue": CONTINUE, "default": DEFAULT, "defer": DEFER, "else": ELSE, "fallthrough": FALLTHROUGH, "for": FOR, "func": FUNC, "go": GO, "goto": GOTO, "if": IF, "import": IMPORT, "interface": INTERFACE, "map": MAP, "package": PACKAGE, "range": RANGE, "return": RETURN, "select": SELECT, "struct": STRUCT, "switch": SWITCH, "type": TYPE, "var": VAR, } lineCommentTag = []byte("line ") znode = &nonode{} ) type nonode struct{} func (*nonode) Position() (r token.Position) { return r } func (*nonode) Source(full bool) string { return "" } // Token represents a lexeme, its position and its semantic value. type Token struct { // 16 bytes on 64 bit arch source *source ch int32 index int32 } // Ch returns which token t represents func (t Token) Ch() token.Token { return token.Token(t.ch) } // Source implements Node. func (t Token) Source(full bool) string { // trc("%10s %v: #%v sep %v, src %v, buf %v", tokSource(t.Ch()), t.Position(), t.index, t.source.toks[t.index].sep, t.source.toks[t.index].src, len(t.source.buf)) sep := t.Sep() if !full && sep != "" { sep = " " } src := t.Src() if !full && strings.ContainsRune(src, '\n') { src = " " } // trc("%q %q -> %q %q", t.Sep(), t.Src(), sep, src) return sep + src } // Positions implements Node. func (t Token) Position() (r token.Position) { if t.source == nil { return r } s := t.source off := mathutil.MinInt32(int32(len(s.buf)), s.toks[t.index].src) return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true)) } // Prev returns the token preceding t or a zero value if no such token exists. func (t Token) Prev() (r Token) { if index := t.index - 1; index >= 0 { s := t.source return Token{source: s, ch: s.toks[index].ch, index: index} } return r } // Next returns the token following t or a zero value if no such token exists. func (t Token) Next() (r Token) { if index := t.index + 1; index < int32(len(t.source.toks)) { s := t.source return Token{source: s, ch: s.toks[index].ch, index: index} } return r } // Sep returns any separators, combined, preceding t. func (t Token) Sep() string { s := t.source if p, ok := s.sepPatches[t.index]; ok { return p } return string(s.buf[s.toks[t.index].sep:s.toks[t.index].src]) } // SetSep sets t's separator. func (t Token) SetSep(s string) { src := t.source if src.sepPatches == nil { src.sepPatches = map[int32]string{} } src.sepPatches[t.index] = s } // Src returns t's source form. func (t Token) Src() string { s := t.source if p, ok := s.srcPatches[t.index]; ok { return p } if t.ch != int32(EOF) { next := t.source.off if t.index < int32(len(s.toks))-1 { next = s.toks[t.index+1].sep } return string(s.buf[s.toks[t.index].src:next]) } return "" } // SetSrc sets t's source form. func (t Token) SetSrc(s string) { src := t.source if src.srcPatches == nil { src.srcPatches = map[int32]string{} } src.srcPatches[t.index] = s } // IsValid reports t is a valid token. Zero value reports false. func (t Token) IsValid() bool { return t.source != nil } type tok struct { // 12 bytes ch int32 sep int32 src int32 } func (t *tok) token() token.Token { return token.Token(t.ch) } func (t *tok) position(s *source) (r token.Position) { off := mathutil.MinInt32(int32(len(s.buf)), t.src) return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true)) } // source represents a single Go source file, editor text buffer etc. type source struct { buf []byte file *mtoken.File name string sepPatches map[int32]string srcPatches map[int32]string toks []tok base int32 off int32 } // 'buf' becomes owned by the result and must not be modified afterwards. func newSource(name string, buf []byte) *source { file := mtoken.NewFile(name, len(buf)) return &source{ buf: buf, file: file, name: name, base: int32(file.Base()), } } type ErrWithPosition struct { pos token.Position err error } func (e ErrWithPosition) String() string { switch { case e.pos.IsValid(): return fmt.Sprintf("%v: %v", e.pos, e.err) default: return fmt.Sprintf("%v", e.err) } } type errList []ErrWithPosition func (e errList) Err() (r error) { if len(e) == 0 { return nil } return e } func (e errList) Error() string { w := 0 prev := ErrWithPosition{pos: token.Position{Offset: -1}} for _, v := range e { if v.pos.Line == 0 || v.pos.Offset != prev.pos.Offset || v.err.Error() != prev.err.Error() { e[w] = v w++ prev = v } } var a []string for _, v := range e { a = append(a, fmt.Sprint(v)) } return strings.Join(a, "\n") } func (e *errList) err(pos token.Position, msg string, args ...interface{}) { if trcErrors { trc("FAIL "+msg, args...) } switch { case len(args) == 0: *e = append(*e, ErrWithPosition{pos, fmt.Errorf("%s", msg)}) default: *e = append(*e, ErrWithPosition{pos, fmt.Errorf(msg, args...)}) } } type scanner struct { *source dir string errs errList tok tok last int32 errBudget int c byte // Lookahead byte. eof bool isClosed bool } func newScanner(name string, buf []byte) *scanner { dir, _ := filepath.Split(name) r := &scanner{source: newSource(name, buf), errBudget: 10, dir: dir} switch { case len(buf) == 0: r.eof = true default: r.c = buf[0] if r.c == '\n' { r.file.AddLine(int(r.base + r.off)) } } return r } func isDigit(c byte) bool { return c >= '0' && c <= '9' } func isHexDigit(c byte) bool { return isDigit(c) || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' } func isIDNext(c byte) bool { return isIDFirst(c) || isDigit(c) } func isOctalDigit(c byte) bool { return c >= '0' && c <= '7' } func isIDFirst(c byte) bool { return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_' } func (s *scanner) position() token.Position { return token.Position(s.source.file.PositionFor(mtoken.Pos(s.base+s.off), true)) } func (s *scanner) pos(off int32) token.Position { return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true)) } func (s *scanner) token() Token { return Token{source: s.source, ch: s.tok.ch, index: int32(len(s.toks) - 1)} } func (s *scanner) err(off int32, msg string, args ...interface{}) { if s.errBudget <= 0 { s.close() return } s.errBudget-- if n := int32(len(s.buf)); off >= n { off = n } s.errs.err(s.pos(off), msg, args...) } func (s *scanner) close() { if s.isClosed { return } s.tok.ch = int32(ILLEGAL) s.eof = true s.isClosed = true } func (s *scanner) next() { if s.eof { return } s.off++ if int(s.off) == len(s.buf) { s.c = 0 s.eof = true return } s.c = s.buf[s.off] if s.c == '\n' { s.file.AddLine(int(s.base + s.off)) } } func (s *scanner) nextN(n int) { if int(s.off) == len(s.buf)-n { s.c = 0 s.eof = true return } s.off += int32(n) s.c = s.buf[s.off] if s.c == '\n' { s.file.AddLine(int(s.base + s.off)) } } func (s *scanner) scan() (r bool) { if s.isClosed { return false } s.last = s.tok.ch s.tok.sep = s.off s.tok.ch = -1 for { if r = s.scan0(); !r || s.tok.ch >= 0 { s.toks = append(s.toks, s.tok) // trc("", dump(s.token())) return r } } } func (s *scanner) scan0() (r bool) { s.tok.src = mathutil.MinInt32(s.off, int32(len(s.buf))) switch s.c { case ' ', '\t', '\r', '\n': // White space, formed from spaces (U+0020), horizontal tabs (U+0009), carriage // returns (U+000D), and newlines (U+000A), is ignored except as it separates // tokens that would otherwise combine into a single token. if s.c == '\n' && s.injectSemi() { return true } s.next() return true case '/': off := s.off s.next() switch s.c { case '=': s.next() s.tok.ch = int32(QUO_ASSIGN) case '/': // Line comments start with the character sequence // and stop at the end of // the line. s.next() s.lineComment(off) return true case '*': // General comments start with the character sequence /* and stop with the // first subsequent character sequence */. s.next() s.generalComment(off) return true default: s.tok.ch = int32(QUO) } case '(': s.tok.ch = int32(LPAREN) s.next() case ')': s.tok.ch = int32(RPAREN) s.next() case '[': s.tok.ch = int32(LBRACK) s.next() case ']': s.tok.ch = int32(RBRACK) s.next() case '{': s.tok.ch = int32(LBRACE) s.next() case '}': s.tok.ch = int32(RBRACE) s.next() case ',': s.tok.ch = int32(COMMA) s.next() case ';': s.tok.ch = int32(SEMICOLON) s.next() case '~': s.tok.ch = int32(TILDE) s.next() case '"': off := s.off s.next() s.stringLiteral(off) case '\'': off := s.off s.next() s.runeLiteral(off) case '`': s.next() for { switch { case s.c == '`': s.next() s.tok.ch = int32(STRING) return true case s.eof: s.err(s.off, "raw string literal not terminated") s.tok.ch = int32(STRING) return true case s.c == 0: panic(todo("%v: %#U", s.position(), s.c)) default: s.next() } } case '.': s.next() off := s.off if isDigit(s.c) { s.dot(false, true) return true } if s.c != '.' { s.tok.ch = int32(PERIOD) return true } s.next() if s.c != '.' { s.off = off s.c = '.' s.tok.ch = int32(PERIOD) return true } s.next() s.tok.ch = int32(ELLIPSIS) return true case '%': s.next() switch s.c { case '=': s.next() s.tok.ch = int32(REM_ASSIGN) default: s.tok.ch = int32(REM) } case '*': s.next() switch s.c { case '=': s.next() s.tok.ch = int32(MUL_ASSIGN) default: s.tok.ch = int32(MUL) } case '^': s.next() switch s.c { case '=': s.next() s.tok.ch = int32(XOR_ASSIGN) default: s.tok.ch = int32(XOR) } case '+': s.next() switch s.c { case '+': s.next() s.tok.ch = int32(INC) case '=': s.next() s.tok.ch = int32(ADD_ASSIGN) default: s.tok.ch = int32(ADD) } case '-': s.next() switch s.c { case '-': s.next() s.tok.ch = int32(DEC) case '=': s.next() s.tok.ch = int32(SUB_ASSIGN) default: s.tok.ch = int32(SUB) } case ':': s.next() switch { case s.c == '=': s.next() s.tok.ch = int32(DEFINE) default: s.tok.ch = int32(COLON) } case '=': s.next() switch { case s.c == '=': s.next() s.tok.ch = int32(EQL) default: s.tok.ch = int32(ASSIGN) } case '!': s.next() switch { case s.c == '=': s.next() s.tok.ch = int32(NEQ) default: s.tok.ch = int32(NOT) } case '>': s.next() switch s.c { case '=': s.next() s.tok.ch = int32(GEQ) case '>': s.next() switch s.c { case '=': s.next() s.tok.ch = int32(SHR_ASSIGN) default: s.tok.ch = int32(SHR) } default: s.tok.ch = int32(GTR) } case '<': s.next() switch s.c { case '=': s.next() s.tok.ch = int32(LEQ) case '<': s.next() switch s.c { case '=': s.next() s.tok.ch = int32(SHL_ASSIGN) default: s.tok.ch = int32(SHL) } case '-': s.next() s.tok.ch = int32(ARROW) default: s.tok.ch = int32(LSS) } case '|': s.next() switch s.c { case '|': s.next() s.tok.ch = int32(LOR) case '=': s.next() s.tok.ch = int32(OR_ASSIGN) default: s.tok.ch = int32(OR) } case '&': s.next() switch s.c { case '&': s.next() s.tok.ch = int32(LAND) case '^': s.next() switch s.c { case '=': s.next() s.tok.ch = int32(AND_NOT_ASSIGN) default: s.tok.ch = int32(AND_NOT) } case '=': s.next() s.tok.ch = int32(AND_ASSIGN) default: s.tok.ch = int32(AND) } default: switch { case isIDFirst(s.c): s.next() s.identifierOrKeyword() case isDigit(s.c): s.numericLiteral() case s.c >= 0x80: off := s.off switch r := s.rune(); { case unicode.IsLetter(r): s.identifierOrKeyword() case r == 0xfeff: if off == 0 { // Ignore BOM, but only at buffer start. return true } s.err(off, "illegal byte order mark") s.tok.ch = int32(ILLEGAL) default: s.err(s.off, "illegal character %#U", r) s.tok.ch = int32(ILLEGAL) } case s.eof: if s.injectSemi() { return true } s.close() s.tok.ch = int32(EOF) s.tok.sep = mathutil.MinInt32(s.tok.sep, s.tok.src) return false // case s.c == 0: // panic(todo("%v: %#U", s.position(), s.c)) default: s.err(s.off, "illegal character %#U", s.c) s.next() s.tok.ch = int32(ILLEGAL) } } return true } func (s *scanner) runeLiteral(off int32) { // Leading ' consumed. ok := 0 s.tok.ch = int32(CHAR) expOff := int32(-1) if s.eof { s.err(off, "rune literal not terminated") return } for { switch s.c { case '\\': ok++ s.next() switch s.c { case '\'', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v': s.next() case 'x', 'X': s.next() for i := 0; i < 2; i++ { if s.c == '\'' { if i != 2 { s.err(s.off, "illegal character %#U in escape sequence", s.c) } s.next() return } if !isHexDigit(s.c) { s.err(s.off, "illegal character %#U in escape sequence", s.c) break } s.next() } case 'u': s.u(4) case 'U': s.u(8) default: switch { case s.eof: s.err(s.base+s.off, "escape sequence not terminated") return case isOctalDigit(s.c): for i := 0; i < 3; i++ { s.next() if s.c == '\'' { if i != 2 { s.err(s.off, "illegal character %#U in escape sequence", s.c) } s.next() return } if !isOctalDigit(s.c) { s.err(s.off, "illegal character %#U in escape sequence", s.c) break } } default: s.err(s.off, "unknown escape sequence") } } case '\'': s.next() if ok != 1 { s.err(off, "illegal rune literal") } return case '\t': s.next() ok++ default: switch { case s.eof: switch { case ok != 0: s.err(expOff, "rune literal not terminated") default: s.err(s.base+s.off, "rune literal not terminated") } return case s.c == 0: panic(todo("%v: %#U", s.position(), s.c)) case s.c < ' ': ok++ s.err(s.off, "non-printable character: %#U", s.c) s.next() case s.c >= 0x80: ok++ off := s.off if c := s.rune(); c == 0xfeff { s.err(off, "illegal byte order mark") } default: ok++ s.next() } } if ok != 0 && expOff < 0 { expOff = s.off if s.eof { expOff++ } } } } func (s *scanner) stringLiteral(off int32) { // Leadind " consumed. s.tok.ch = int32(STRING) for { switch { case s.c == '"': s.next() return case s.c == '\\': s.next() switch s.c { case '"', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v': s.next() continue case 'x', 'X': s.next() if !isHexDigit(s.c) { panic(todo("%v: %#U", s.position(), s.c)) } s.next() if !isHexDigit(s.c) { panic(todo("%v: %#U", s.position(), s.c)) } s.next() continue case 'u': s.u(4) continue case 'U': s.u(8) continue default: switch { case isOctalDigit(s.c): s.next() if isOctalDigit(s.c) { s.next() } if isOctalDigit(s.c) { s.next() } continue default: s.err(off-1, "unknown escape sequence") } } case s.c == '\n': fallthrough case s.eof: s.err(off, "string literal not terminated") return case s.c == 0: s.err(s.off, "illegal character NUL") } switch { case s.c >= 0x80: off := s.off if s.rune() == 0xfeff { s.err(off, "illegal byte order mark") } continue } s.next() } } func (s *scanner) u(n int) (r rune) { // Leading u/U not consumed. s.next() off := s.off for i := 0; i < n; i++ { switch { case isHexDigit(s.c): var n rune switch { case s.c >= '0' && s.c <= '9': n = rune(s.c) - '0' case s.c >= 'a' && s.c <= 'f': n = rune(s.c) - 'a' + 10 case s.c >= 'A' && s.c <= 'F': n = rune(s.c) - 'A' + 10 } r = 16*r + n default: switch { case s.eof: s.err(s.base+s.off, "escape sequence not terminated") default: s.err(s.off, "illegal character %#U in escape sequence", s.c) } return r } s.next() } if r < 0 || r > unicode.MaxRune || r >= 0xd800 && r <= 0xdfff { s.err(off-1, "escape sequence is invalid Unicode code point") } return r } func (s *scanner) identifierOrKeyword() { out: for { switch { case isIDNext(s.c): s.next() case s.c >= 0x80: off := s.off c := s.c switch r := s.rune(); { case unicode.IsLetter(r) || unicode.IsDigit(r): // already consumed default: s.off = off s.c = c break out } case s.eof: break out case s.c == 0: s.err(s.off, "illegal character NUL") break out default: break out } } if s.tok.ch = int32(keywords[string(s.buf[s.tok.src:s.off])]); s.tok.ch == 0 { s.tok.ch = int32(IDENT) } } func (s *scanner) numericLiteral() { // Leading decimal digit not consumed. var hasHexMantissa, needFrac bool more: switch s.c { case '0': s.next() switch s.c { case '.': // nop case 'b', 'B': s.next() s.binaryLiteral() return case 'e', 'E': s.exponent() s.tok.ch = int32(FLOAT) return case 'p', 'P': s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) s.exponent() s.tok.ch = int32(FLOAT) return case 'o', 'O': s.next() s.octalLiteral() return case 'x', 'X': hasHexMantissa = true needFrac = true s.tok.ch = int32(INT) s.next() if s.c == '.' { s.next() s.dot(hasHexMantissa, needFrac) return } if s.hexadecimals() == 0 { s.err(s.base+s.off, "hexadecimal literal has no digits") return } needFrac = false case 'i': s.next() s.tok.ch = int32(IMAG) return default: invalidOff := int32(-1) var invalidDigit byte for { if s.c == '_' { for n := 0; s.c == '_'; n++ { if n == 1 { s.err(s.off, "'_' must separate successive digits") } s.next() } if !isDigit(s.c) { s.err(s.off-1, "'_' must separate successive digits") } } if isOctalDigit(s.c) { s.next() continue } if isDigit(s.c) { if invalidOff < 0 { invalidOff = s.off invalidDigit = s.c } s.next() continue } break } switch s.c { case '.', 'e', 'E', 'i': break more } if isDigit(s.c) { break more } if invalidOff > 0 { s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit) } s.tok.ch = int32(INT) return } default: s.decimals() } switch s.c { case '.': s.next() s.dot(hasHexMantissa, needFrac) case 'p', 'P': if !hasHexMantissa { s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) } fallthrough case 'e', 'E': s.exponent() if s.c == 'i' { s.next() s.tok.ch = int32(IMAG) return } s.tok.ch = int32(FLOAT) case 'i': s.next() s.tok.ch = int32(IMAG) default: s.tok.ch = int32(INT) } } func (s *scanner) octalLiteral() { // Leading 0o consumed. ok := false invalidOff := int32(-1) var invalidDigit byte s.tok.ch = int32(INT) for { for n := 0; s.c == '_'; n++ { if n == 1 { s.err(s.off, "'_' must separate successive digits") } s.next() } switch s.c { case '0', '1', '2', '3', '4', '5', '6', '7': s.next() ok = true case '8', '9': if invalidOff < 0 { invalidOff = s.off invalidDigit = s.c } s.next() case '.': s.tok.ch = int32(FLOAT) s.err(s.off, "invalid radix point in octal literal") s.next() case 'e', 'E': s.tok.ch = int32(FLOAT) s.err(s.off, "'%c' exponent requires decimal mantissa", s.c) s.exponent() case 'p', 'P': s.tok.ch = int32(FLOAT) s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) s.exponent() default: switch { case !ok: s.err(s.base+s.off, "octal literal has no digits") case invalidOff > 0: s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit) } if s.c == 'i' { s.next() s.tok.ch = int32(IMAG) } return } } } func (s *scanner) binaryLiteral() { // Leading 0b consumed. ok := false invalidOff := int32(-1) var invalidDigit byte s.tok.ch = int32(INT) for { for n := 0; s.c == '_'; n++ { if n == 1 { s.err(s.off, "'_' must separate successive digits") } s.next() } switch s.c { case '0', '1': s.next() ok = true case '.': s.tok.ch = int32(FLOAT) s.err(s.off, "invalid radix point in binary literal") s.next() case 'e', 'E': s.tok.ch = int32(FLOAT) s.err(s.off, "'%c' exponent requires decimal mantissa", s.c) s.exponent() case 'p', 'P': s.tok.ch = int32(FLOAT) s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) s.exponent() default: if isDigit(s.c) { if invalidOff < 0 { invalidOff = s.off invalidDigit = s.c } s.next() continue } switch { case !ok: s.err(s.base+s.off, "binary literal has no digits") case invalidOff > 0: s.err(invalidOff, "invalid digit '%c' in binary literal", invalidDigit) } if s.c == 'i' { s.next() s.tok.ch = int32(IMAG) } return } } } func (s *scanner) generalComment(off int32) (injectSemi bool) { // Leading /* consumed off0 := s.off - 2 var nl bool for { switch { case s.c == '*': s.next() switch s.c { case '/': s.lineInfo(off0, s.off+1) s.next() if nl { return s.injectSemi() } return false } case s.c == '\n': nl = true s.next() case s.eof: s.tok.ch = 0 s.err(off, "comment not terminated") return true case s.c == 0: panic(todo("%v: %#U", s.position(), s.c)) default: s.next() } } } func (s *scanner) lineComment(off int32) (injectSemi bool) { // Leading // consumed off0 := s.off - 2 for { switch { case s.c == '\n': s.lineInfo(off0, s.off+1) if s.injectSemi() { return true } s.next() return false case s.c >= 0x80: if c := s.rune(); c == 0xfeff { s.err(off+2, "illegal byte order mark") } case s.eof: s.off++ if s.injectSemi() { return true } return false case s.c == 0: return false default: s.next() } } } func (s *scanner) lineInfo(off, next int32) { if off != 0 && s.buf[off+1] != '*' && s.buf[off-1] != '\n' && s.buf[off-1] != '\r' { return } str := s.buf[off:next] if !bytes.HasPrefix(str[len("//"):], lineCommentTag) { return } switch { case str[1] == '*': str = str[:len(str)-len("*/")] default: str = str[:len(str)-len("\n")] } str = str[len("//"):] str, ln, ok := s.lineInfoNum(str[len("line "):]) col := 0 if ok == liBadNum || ok == liNoNum { return } hasCol := false var n int if str, n, ok = s.lineInfoNum(str); ok == liBadNum { return } if ok != liNoNum { col = ln ln = n hasCol = true } fn := strings.TrimSpace(string(str)) switch { case fn == "" && hasCol: fn = s.pos(off).Filename case fn != "": fn = filepath.Clean(fn) if !filepath.IsAbs(fn) { fn = filepath.Join(s.dir, fn) } } // trc("set %v %q %v %v", next, fn, ln, col) s.file.AddLineColumnInfo(int(next), fn, ln, col) } const ( liNoNum = iota liBadNum liOK ) func (s *scanner) lineInfoNum(str []byte) (_ []byte, n, r int) { // trc("==== %q", str) x := len(str) - 1 if x < 0 || !isDigit(str[x]) { return str, 0, liNoNum } mul := 1 for x > 0 && isDigit(str[x]) { n += mul * (int(str[x]) - '0') mul *= 10 x-- if n < 0 { return str, 0, liBadNum } } if x < 0 || str[x] != ':' { return str, 0, liBadNum } // trc("---- %q %v %v", str[:x], n, liOK) return str[:x], n, liOK } func (s *scanner) rune() rune { switch r, sz := utf8.DecodeRune(s.buf[s.off:]); { case r == utf8.RuneError && sz == 0: panic(todo("%v: %#U", s.position(), s.c)) case r == utf8.RuneError && sz == 1: s.err(s.off, "illegal UTF-8 encoding") s.next() return r default: s.nextN(sz) return r } } func (s *scanner) dot(hasHexMantissa, needFrac bool) { // '.' already consumed switch { case hasHexMantissa: if s.hexadecimals() == 0 && needFrac { s.err(s.off, "hexadecimal literal has no digits") } switch s.c { case 'p', 'P': // ok default: s.err(s.off, "hexadecimal mantissa requires a 'p' exponent") } default: if s.decimals() == 0 && needFrac { panic(todo("%v: %#U", s.position(), s.c)) } } switch s.c { case 'p', 'P': if !hasHexMantissa { s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) } fallthrough case 'e', 'E': s.exponent() if s.c == 'i' { s.next() s.tok.ch = int32(IMAG) return } s.tok.ch = int32(FLOAT) case 'i': s.next() s.tok.ch = int32(IMAG) default: s.tok.ch = int32(FLOAT) } } func (s *scanner) exponent() { // Leanding e or E not consumed. s.next() switch s.c { case '+', '-': s.next() } if !isDigit(s.c) { s.err(s.base+s.off, "exponent has no digits") return } s.decimals() } func (s *scanner) decimals() (r int) { first := true for { switch { case isDigit(s.c): first = false s.next() r++ case s.c == '_': for n := 0; s.c == '_'; n++ { if first || n == 1 { s.err(s.off, "'_' must separate successive digits") } s.next() } if !isDigit(s.c) { s.err(s.off-1, "'_' must separate successive digits") } default: return r } } } func (s *scanner) hexadecimals() (r int) { for { switch { case isHexDigit(s.c): s.next() r++ case s.c == '_': for n := 0; s.c == '_'; n++ { if n == 1 { s.err(s.off, "'_' must separate successive digits") } s.next() } if !isHexDigit(s.c) { s.err(s.off-1, "'_' must separate successive digits") } default: return r } } } // When the input is broken into tokens, a semicolon is automatically inserted // into the token stream immediately after a line's final token if that token // is // // - an identifier // - an integer, floating-point, imaginary, rune, or string literal // - one of the keywords break, continue, fallthrough, or return // - one of the operators and punctuation ++, --, ), ], or } func (s *scanner) injectSemi() bool { switch token.Token(s.last) { case IDENT, INT, FLOAT, IMAG, CHAR, STRING, BREAK, CONTINUE, FALLTHROUGH, RETURN, INC, DEC, RPAREN, RBRACK, RBRACE: s.tok.ch = int32(SEMICOLON) s.last = 0 if s.c == '\n' { s.next() } return true } s.last = 0 return false }