amu/parser/parser.go

212 lines
5.3 KiB
Go

/*
AMU: Custom simple markup language
Copyright (C) 2021 Arsen Musayelyan
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
// Package parser provides a parser for AMU source code
package parser
import (
"io"
"strings"
"go.arsenm.dev/amu/ast"
"go.arsenm.dev/amu/internal/stack"
"go.arsenm.dev/amu/scanner"
)
// Parser parses tokens from scanner.Scanner into an AST
type Parser struct {
s *scanner.Scanner
buf *buffer
}
// New creates a new parser using a new scanner with the
// provided reader
func New(r io.Reader) *Parser {
return &Parser{s: scanner.New(r), buf: newBuffer()}
}
// NewWithScanner creates a new parser using the provided scanner
func NewWithScanner(s *scanner.Scanner) *Parser {
return &Parser{s: s, buf: newBuffer()}
}
// scan scans a single token from the underlying scanner.Scanner
func (p *Parser) scan() (scanner.Token, string) {
// If at least one unscan
if p.buf.unscans > 0 {
// Get token and literal from buffer
tok, lit := p.buf.pop()
// Decrement unscans
p.buf.unscans--
// Return buffered token and literal
return tok, lit
}
// Scan token and literal from scanner
tok, lit := p.s.Scan()
// Add token and literal to buffer
p.buf.push(tok, lit)
// Return token and literal
return tok, lit
}
// unscan unscans a single token
func (p *Parser) unscan() {
// Increment unscans
p.buf.unscans++
}
// unscanMulti unscans multiple tokens
func (p *Parser) unscanMulti(amt int) {
// Increment unscans by amt
p.buf.unscans += amt
}
// buffer stores tokens and literals for unreads
type buffer struct {
tok *stack.Stack
lit *stack.Stack
unscans int
}
// newBuffer creates a new buffer, initializing stacks
func newBuffer() *buffer {
return &buffer{
tok: stack.New(),
lit: stack.New(),
}
}
// push adds a token and literal to the buffer
func (b *buffer) push(tok scanner.Token, lit string) {
// Add token to buffer
b.tok.Push(tok)
// Add literal to buffer
b.lit.Push(lit)
}
// pop removes a token from the buffer and returns it
func (b *buffer) pop() (scanner.Token, string) {
if b.tok.Size() > 0 {
tok := b.tok.Pop()
lit := b.lit.Pop()
return tok.(scanner.Token), lit.(string)
}
return scanner.EOF, ""
}
// Parse parses the input into an AST
func (p *Parser) Parse() (*ast.AST, error) {
// Create new AST
AST := &ast.AST{}
parseLoop:
for {
// Scan token
tok, lit := p.scan()
switch tok {
case scanner.HEADING:
p.unscan()
// Attempt to parse heading
heading := p.parseHeading()
// If successful
if heading != nil {
// Add heading to the AST
AST.Entries = append(AST.Entries, ast.Entry{Heading: heading})
}
case scanner.WS, scanner.WORD, scanner.FORMAT, scanner.PUNCT:
if tok == scanner.PUNCT && lit == "!" {
// Attempt to parse image
img := p.parseImage()
// If successful
if img != nil {
// Add image to AST
AST.Entries = append(AST.Entries, ast.Entry{Image: img})
// Continue to next token
continue
}
} else if tok == scanner.WORD && lit == "=list" {
// Attempt to parse list
list := p.parseList(tok, lit)
// If successful
if list != nil {
// Add list to AST
AST.Entries = append(AST.Entries, ast.Entry{List: list})
// Continue to next token
continue
}
} else if tok == scanner.WORD && lit == "=code" {
// Attempt to parse code
code := p.parseCode(tok, lit)
// If successful
if code != nil {
// Add code to AST
AST.Entries = append(AST.Entries, ast.Entry{Code: code})
// Continue to next token
continue
}
} else if tok == scanner.PUNCT && lit == "-" {
// Scan token
tok, lit = p.scan()
// If token is not PUNCT or literal is not "-"
if tok != scanner.PUNCT || lit != "-" {
// Unscan token
p.unscan()
// Continue parsing next token
continue
}
// Scan token
tok, lit = p.scan()
// If token is not PUNCT or literal is not "-"
if tok != scanner.PUNCT || lit != "-" {
// Unscan two tokens
p.unscanMulti(2)
// Continue parsing next token
continue
}
// Add Hline to AST
AST.Entries = append(AST.Entries, ast.Entry{Hline: &ast.Hline{}})
// Continue parsing next token
continue
}
// Unscan token as it will be needed for parsing para
p.unscan()
// Attempt to parse paragraph until 2 newlines encountered
para := p.parsePara(2)
if para != nil {
AST.Entries = append(AST.Entries, ast.Entry{Para: para})
}
case scanner.EOL:
// If 2 or more newlines encountered
if strings.Count(lit, "\n") >= 2 {
// Add break to AST
AST.Entries = append(AST.Entries, ast.Entry{Break: &ast.Break{}})
}
case scanner.EOF:
// Stop parsing
break parseLoop
}
}
// Return filled AST
return AST, nil
}