/* AMU: Custom simple markup language Copyright (C) 2021 Arsen Musayelyan This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ // Package parser provides a parser for AMU source code package parser import ( "io" "strings" "go.arsenm.dev/amu/ast" "go.arsenm.dev/amu/internal/stack" "go.arsenm.dev/amu/scanner" ) // Parser parses tokens from scanner.Scanner into an AST type Parser struct { s *scanner.Scanner buf *buffer } // New creates a new parser using a new scanner with the // provided reader func New(r io.Reader) *Parser { return &Parser{s: scanner.New(r), buf: newBuffer()} } // NewWithScanner creates a new parser using the provided scanner func NewWithScanner(s *scanner.Scanner) *Parser { return &Parser{s: s, buf: newBuffer()} } // scan scans a single token from the underlying scanner.Scanner func (p *Parser) scan() (scanner.Token, string) { // If at least one unscan if p.buf.unscans > 0 { // Get token and literal from buffer tok, lit := p.buf.pop() // Decrement unscans p.buf.unscans-- // Return buffered token and literal return tok, lit } // Scan token and literal from scanner tok, lit := p.s.Scan() // Add token and literal to buffer p.buf.push(tok, lit) // Return token and literal return tok, lit } // unscan unscans a single token func (p *Parser) unscan() { // Increment unscans p.buf.unscans++ } // unscanMulti unscans multiple tokens func (p *Parser) unscanMulti(amt int) { // Increment unscans by amt p.buf.unscans += amt } // buffer stores tokens and literals for unreads type buffer struct { tok *stack.Stack lit *stack.Stack unscans int } // newBuffer creates a new buffer, initializing stacks func newBuffer() *buffer { return &buffer{ tok: stack.New(), lit: stack.New(), } } // push adds a token and literal to the buffer func (b *buffer) push(tok scanner.Token, lit string) { // Add token to buffer b.tok.Push(tok) // Add literal to buffer b.lit.Push(lit) } // pop removes a token from the buffer and returns it func (b *buffer) pop() (scanner.Token, string) { if b.tok.Size() > 0 { tok := b.tok.Pop() lit := b.lit.Pop() return tok.(scanner.Token), lit.(string) } return scanner.EOF, "" } // Parse parses the input into an AST func (p *Parser) Parse() (*ast.AST, error) { // Create new AST AST := &ast.AST{} parseLoop: for { // Scan token tok, lit := p.scan() switch tok { case scanner.HEADING: p.unscan() // Attempt to parse heading heading := p.parseHeading() // If successful if heading != nil { // Add heading to the AST AST.Entries = append(AST.Entries, ast.Entry{Heading: heading}) } case scanner.WS, scanner.WORD, scanner.FORMAT, scanner.PUNCT: if tok == scanner.PUNCT && lit == "!" { // Attempt to parse image img := p.parseImage() // If successful if img != nil { // Add image to AST AST.Entries = append(AST.Entries, ast.Entry{Image: img}) // Continue to next token continue } } else if tok == scanner.WORD && lit == "=list" { // Attempt to parse list list := p.parseList(tok, lit) // If successful if list != nil { // Add list to AST AST.Entries = append(AST.Entries, ast.Entry{List: list}) // Continue to next token continue } } else if tok == scanner.WORD && lit == "=code" { // Attempt to parse code code := p.parseCode(tok, lit) // If successful if code != nil { // Add code to AST AST.Entries = append(AST.Entries, ast.Entry{Code: code}) // Continue to next token continue } } else if tok == scanner.PUNCT && lit == "-" { // Scan token tok, lit = p.scan() // If token is not PUNCT or literal is not "-" if tok != scanner.PUNCT || lit != "-" { // Unscan token p.unscan() // Continue parsing next token continue } // Scan token tok, lit = p.scan() // If token is not PUNCT or literal is not "-" if tok != scanner.PUNCT || lit != "-" { // Unscan two tokens p.unscanMulti(2) // Continue parsing next token continue } // Add Hline to AST AST.Entries = append(AST.Entries, ast.Entry{Hline: &ast.Hline{}}) // Continue parsing next token continue } // Unscan token as it will be needed for parsing para p.unscan() // Attempt to parse paragraph until 2 newlines encountered para := p.parsePara(2) if para != nil { AST.Entries = append(AST.Entries, ast.Entry{Para: para}) } case scanner.EOL: // If 2 or more newlines encountered if strings.Count(lit, "\n") >= 2 { // Add break to AST AST.Entries = append(AST.Entries, ast.Entry{Break: &ast.Break{}}) } case scanner.EOF: // Stop parsing break parseLoop } } // Return filled AST return AST, nil }