amu/parser/parser.go

/*
   AMU: Custom simple markup language
   Copyright (C) 2021 Arsen Musayelyan

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
*/

// Package parser provides a parser for AMU source code
package parser

import (
	"io"
	"strings"

	"go.arsenm.dev/amu/ast"
	"go.arsenm.dev/amu/internal/stack"
	"go.arsenm.dev/amu/scanner"
)

// Parser parses tokens from scanner.Scanner into an AST
type Parser struct {
	s   *scanner.Scanner
	buf *buffer
}

// New creates a new parser using a new scanner with the
// provided reader
func New(r io.Reader) *Parser {
	return &Parser{s: scanner.New(r), buf: newBuffer()}
}

// NewWithScanner creates a new parser using the provided scanner
func NewWithScanner(s *scanner.Scanner) *Parser {
	return &Parser{s: s, buf: newBuffer()}
}

// scan scans a single token from the underlying scanner.Scanner
func (p *Parser) scan() (scanner.Token, string) {
	// If at least one unscan
	if p.buf.unscans > 0 {
		// Get token and literal from buffer
		tok, lit := p.buf.pop()
		// Decrement unscans
		p.buf.unscans--
		// Return buffered token and literal
		return tok, lit
	}

	// Scan token and literal from scanner
	tok, lit := p.s.Scan()

	// Add token and literal to buffer
	p.buf.push(tok, lit)

	// Return token and literal
	return tok, lit
}

// unscan unscans a single token
func (p *Parser) unscan() {
	// Increment unscans
	p.buf.unscans++
}

// unscanMulti unscans multiple tokens
func (p *Parser) unscanMulti(amt int) {
	// Increment unscans by amt
	p.buf.unscans += amt
}

// buffer stores tokens and literals for unreads
type buffer struct {
	tok     *stack.Stack
	lit     *stack.Stack
	unscans int
}

// newBuffer creates a new buffer, initializing stacks
func newBuffer() *buffer {
	return &buffer{
		tok: stack.New(),
		lit: stack.New(),
	}
}

// push adds a token and literal to the buffer
func (b *buffer) push(tok scanner.Token, lit string) {
	// Add token to buffer
	b.tok.Push(tok)
	// Add literal to buffer
	b.lit.Push(lit)
}

// pop removes a token from the buffer and returns it
func (b *buffer) pop() (scanner.Token, string) {
	if b.tok.Size() > 0 {
		tok := b.tok.Pop()
		lit := b.lit.Pop()
		return tok.(scanner.Token), lit.(string)
	}
	return scanner.EOF, ""
}

// Parse parses the input into an AST
func (p *Parser) Parse() (*ast.AST, error) {
	// Create new AST
	AST := &ast.AST{}

parseLoop:
	for {
		// Scan token
		tok, lit := p.scan()

		switch tok {
		case scanner.HEADING:
			p.unscan()
			// Attempt to parse heading
			heading := p.parseHeading()
			// If successful
			if heading != nil {
				// Add heading to the AST
				AST.Entries = append(AST.Entries, ast.Entry{Heading: heading})
			}
		case scanner.WS, scanner.WORD, scanner.FORMAT, scanner.PUNCT:
			if tok == scanner.PUNCT && lit == "!" {
				// Attempt to parse image
				img := p.parseImage()
				// If successful
				if img != nil {
					// Add image to AST
					AST.Entries = append(AST.Entries, ast.Entry{Image: img})
					// Continue to next token
					continue
				}
			} else if tok == scanner.WORD && lit == "=list" {
				// Attempt to parse list
				list := p.parseList(tok, lit)
				// If successful
				if list != nil {
					// Add list to AST
					AST.Entries = append(AST.Entries, ast.Entry{List: list})
					// Continue to next token
					continue
				}
			} else if tok == scanner.WORD && lit == "=code" {
				// Attempt to parse code
				code := p.parseCode(tok, lit)
				// If successful
				if code != nil {
					// Add code to AST
					AST.Entries = append(AST.Entries, ast.Entry{Code: code})
					// Continue to next token
					continue
				}
			} else if tok == scanner.PUNCT && lit == "-" {
				// Scan token
				tok, lit = p.scan()
				// If token is not PUNCT or literal is not "-"
				if tok != scanner.PUNCT || lit != "-" {
					// Unscan token
					p.unscan()
					// Continue parsing next token
					continue
				}
				// Scan token
				tok, lit = p.scan()
				// If token is not PUNCT or literal is not "-"
				if tok != scanner.PUNCT || lit != "-" {
					// Unscan two tokens
					p.unscanMulti(2)
					// Continue parsing next token
					continue
				}
				// Add Hline to AST
				AST.Entries = append(AST.Entries, ast.Entry{Hline: &ast.Hline{}})
				// Continue parsing next token
				continue
			}
			// Unscan token as it will be needed for parsing para
			p.unscan()
			// Attempt to parse paragraph until 2 newlines encountered
			para := p.parsePara(2)
			if para != nil {
				AST.Entries = append(AST.Entries, ast.Entry{Para: para})
			}
		case scanner.EOL:
			// If 2 or more newlines encountered
			if strings.Count(lit, "\n") >= 2 {
				// Add break to AST
				AST.Entries = append(AST.Entries, ast.Entry{Break: &ast.Break{}})
			}
		case scanner.EOF:
			// Stop parsing
			break parseLoop
		}
	}

	// Return filled AST
	return AST, nil
}