wright/parser.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
//! This parser module is responsible for turning the stream of [Token]s from the [Lexer] into a tree of [AST] nodes.
//!
//! [AST]: crate::ast
//! [Token]: crate::lexer::token::Token
use error::{ParserError, ParserErrorKind};
use super::lexer::Lexer;
use crate::{
lexer::token::{Token, TokenTy},
source_tracking::fragment::Fragment,
};
use std::collections::VecDeque;
pub mod error;
mod identifier;
mod literal;
mod path;
pub mod whitespace;
/// The [Parser] struct wraps a [Lexer] and adds lookahead and functions that are useful for parsing.
#[derive(Debug)]
pub struct Parser {
lexer: Lexer,
lookahead: VecDeque<Token>,
}
impl Parser {
/// Construct a new parser around a given [Lexer].
pub fn new(lexer: Lexer) -> Self {
Parser {
lexer,
lookahead: VecDeque::new(),
}
}
/// Get the next [Token] from this [Parser]. This may be a token that's already been peeked.
/// Return an error if a [Token] with [TokenTy::Unknown] is encountered.
pub fn next_token(&mut self) -> Result<Option<Token>, ParserError> {
let token = self
.lookahead
.pop_front()
.or_else(|| self.lexer.next_token());
// Check for unknown tokens, which should always convert to an error.
if let Some(Token {
variant: TokenTy::Unknown,
fragment,
}) = token
{
Err(ParserError {
kind: ParserErrorKind::EncounteredUnknownToken,
location: fragment,
help: None,
})
} else {
Ok(token)
}
}
/// Advance this [Parser] by `n` [Token]s. If this [Parser] runs out of [Token]s, panic.
///
/// Panics
/// - If `n` is greater than the number of remaining tokens.
pub fn advance(&mut self, n: usize) {
// Add tokens to the lookahead buffer until we have enough to split off.
while self.lookahead.len() < n {
let token = self
.lexer
.next_token()
.expect("advance: `n` <= number of remaining tokens");
self.lookahead.push_back(token);
}
// Split them off.
self.lookahead = self.lookahead.split_off(n);
}
/// Peek at the next token from the [Lexer] (cached in the lookahead queue if peeked before).
pub fn peek(&mut self) -> Option<&Token> {
if self.lookahead.is_empty() {
self.lookahead.push_back(self.lexer.next_token()?);
}
self.lookahead.front()
}
/// Peek the [Fragment] of the next [Token].
pub fn peek_fragment(&mut self) -> Option<&Fragment> {
self.peek().map(|token| &token.fragment)
}
/// Get the [Lexer] that's wrapped.
pub fn lexer(&self) -> &Lexer {
&self.lexer
}
/// Lookahead `k` [Token]s.
///
/// If `k == 0` then this is effectively peeking at the next [Token] from the wrapped [Lexer].
pub fn lookahead(&mut self, k: usize) -> Option<&Token> {
while self.lookahead.len() <= k {
self.lookahead.push_back(self.lexer.next_token()?);
}
self.lookahead.get(k)
}
/// Similar to [Parser::lookahead] but instead returns a slice of `n` [Token]s, starting with the next [Token].
///
/// Returns [None] if `n` is greater than the number of remaining [Token]s for this [Parser].
pub fn lookahead_window(&mut self, n: usize) -> Option<&[Token]> {
while self.lookahead.len() < n {
self.lookahead.push_back(self.lexer.next_token()?);
}
// Use make contiguous here to get a unified/single slice.
Some(&self.lookahead.make_contiguous()[..n])
}
/// Get the next [Token] from this parser if its [Token::variant] is the given `token_ty`.
pub fn next_if_is(&mut self, token_ty: TokenTy) -> Option<Token> {
// Peeking successfully first means that the lookahead vec will never be empty here.
(self.peek()?.variant == token_ty)
// SAFETY: We just peeked a token to check its variant so this unwrap is alway ok.
.then(|| unsafe { self.lookahead.pop_front().unwrap_unchecked() })
}
/// Peek at the next [Token]s of this [Parser] and determine if the [Token::variant]s match this
/// sequence of [TokenTy]s.
pub fn matches(&mut self, seq: &[TokenTy]) -> bool {
// Use the rare let-else to ensure there are at minimum, the given number of tokens remaining.
let Some(lookahead_window) = self.lookahead_window(seq.len()) else {
return false;
};
// Use a zipped iterator to compare all the token variants.
lookahead_window
.iter()
.zip(seq.iter())
.all(|(token, matches)| token.variant == *matches)
}
/// Peek at the next [Token], remove it if it's a [TokenTy::Whitespace].
pub fn ignore_whitespace(&mut self) {
self.next_if_is(TokenTy::Whitespace);
}
}