wright/
parser.rs

1//! This parser module is responsible for turning the stream of [Token]s from the [Lexer] into a tree of [AST] nodes.
2//!
3//! [AST]: crate::ast
4//! [Token]: crate::lexer::token::Token
5
6use error::{ParserError, ParserErrorKind};
7
8use super::lexer::Lexer;
9use crate::{
10    lexer::token::{Token, TokenTy},
11    source_tracking::fragment::Fragment,
12};
13use std::collections::VecDeque;
14
15mod decl;
16pub mod error;
17mod identifier;
18mod literal;
19mod path;
20mod ty;
21pub mod whitespace;
22
23/// The [Parser] struct wraps a [Lexer] and adds lookahead and functions that are useful for parsing.
24#[derive(Debug)]
25pub struct Parser {
26    lexer: Lexer,
27    lookahead: VecDeque<Token>,
28}
29
30impl Parser {
31    /// Construct a new parser around a given [Lexer].
32    pub fn new(lexer: Lexer) -> Self {
33        Parser {
34            lexer,
35            lookahead: VecDeque::new(),
36        }
37    }
38
39    /// Get the number of remaining bytes on this parser. This is potentially useful for checking
40    /// if a parser has advanced between two calls (or checking if a parser has reached end of input).
41    pub fn bytes_remaining(&self) -> usize {
42        let bytes_remaining_in_lookahead_buffer = self
43            .lookahead
44            .iter()
45            .map(|t| t.fragment.len())
46            .sum::<usize>();
47
48        let bytes_remaining_in_lexer = self.lexer.bytes_remaining();
49
50        bytes_remaining_in_lexer + bytes_remaining_in_lookahead_buffer
51    }
52
53    /// Get the next [Token] from this [Parser]. This may be a token that's already been peeked.
54    ///
55    /// Skips any non-document comments encountered via the lexer implementation.
56    ///
57    /// Return an error if a [Token] with [TokenTy::Unknown] is encountered.
58    pub fn next_token(&mut self) -> Result<Option<Token>, ParserError> {
59        let token = self
60            .lookahead
61            .pop_front()
62            .or_else(|| self.lexer.next_token());
63
64        // Check for unknown tokens, which should always convert to an error.
65        match token {
66            Some(Token {
67                variant: TokenTy::Unknown,
68                fragment,
69            }) => Err(ParserErrorKind::EncounteredUnknownToken.at(fragment)),
70            known_token_or_none => Ok(known_token_or_none),
71        }
72    }
73
74    /// Advance this [Parser] by `n` [Token]s. If this [Parser] runs out of [Token]s, panic.
75    ///
76    /// Panics
77    /// - If `n` is greater than the number of remaining tokens.
78    pub fn advance(&mut self, n: usize) {
79        // Add tokens to the lookahead buffer until we have enough to split off.
80        while self.lookahead.len() < n {
81            let token = self
82                .lexer
83                .next_token()
84                .expect("advance: `n` <= number of remaining tokens");
85
86            self.lookahead.push_back(token);
87        }
88
89        // Split them off.
90        self.lookahead = self.lookahead.split_off(n);
91    }
92
93    /// Peek at the next token from the [Lexer] (cached in the lookahead queue if peeked before).
94    pub fn peek(&mut self) -> Option<&Token> {
95        if self.lookahead.is_empty() {
96            self.lookahead.push_back(self.lexer.next_token()?);
97        }
98
99        self.lookahead.front()
100    }
101
102    /// Peek the [Fragment] of the next [Token].
103    pub fn peek_fragment(&mut self) -> Option<&Fragment> {
104        self.peek().map(|token| &token.fragment)
105    }
106
107    /// Peek the [TokenTy] of the next [Token].
108    pub fn peek_variant(&mut self) -> Option<TokenTy> {
109        self.peek().map(|token| token.variant)
110    }
111
112    /// Peek the [Fragment] of the next [Token] and clone it or return a clone of the
113    /// remainder [Fragment] of the internal [Lexer]
114    /// (which will be empty, since there wasn't a [Token] to peek).
115    ///
116    /// This is likely only useful for error reporting -- a clone of a potentially empty fragment is
117    /// rarely ever useful otherwise.
118    pub fn peek_fragment_or_rest_cloned(&mut self) -> Fragment {
119        match self.peek() {
120            Some(Token { fragment, .. }) => fragment.clone(),
121            None => {
122                let rest = self.lexer.remaining.clone();
123
124                // Assert that we're making the right assumptions about the remaining fragment.
125                // These are (unidiomatically) done using debug_assert -- perhaps that changes eventually
126                // however it should be fine for now, since this can only produce logic bugs (never memory or
127                // concurrency bugs).
128                debug_assert!(rest.is_valid());
129                debug_assert!(rest.is_empty());
130                debug_assert!(rest.is_empty_at_end_of_source());
131
132                rest
133            }
134        }
135    }
136
137    /// Get the [Lexer] that's wrapped.
138    pub fn lexer(&self) -> &Lexer {
139        &self.lexer
140    }
141
142    /// Lookahead `k` [Token]s.
143    ///
144    /// If `k == 0` then this is effectively peeking at the next [Token] from the wrapped [Lexer].
145    pub fn lookahead(&mut self, k: usize) -> Option<&Token> {
146        while self.lookahead.len() <= k {
147            self.lookahead.push_back(self.lexer.next_token()?);
148        }
149
150        self.lookahead.get(k)
151    }
152
153    /// Similar to [Parser::lookahead] but instead returns a slice of `n` [Token]s, starting with the next [Token].
154    ///
155    /// Returns [None] if `n` is greater than the number of remaining [Token]s for this [Parser].
156    pub fn lookahead_window(&mut self, n: usize) -> Option<&[Token]> {
157        while self.lookahead.len() < n {
158            self.lookahead.push_back(self.lexer.next_token()?);
159        }
160
161        // Use make contiguous here to get a unified/single slice.
162        Some(&self.lookahead.make_contiguous()[..n])
163    }
164
165    /// Get the next [Token] from this parser if its [Token::variant] is the given `token_ty`.
166    pub fn next_if_is(&mut self, token_ty: TokenTy) -> Option<Token> {
167        // Peeking successfully first means that the lookahead vec will never be empty here.
168        (self.peek()?.variant == token_ty)
169            // SAFETY: We just peeked a token to check its variant so this unwrap is always ok.
170            .then(|| unsafe { self.lookahead.pop_front().unwrap_unchecked() })
171    }
172
173    /// Peek at the next [Token]s of this [Parser] and determine if the [Token::variant]s match this
174    /// sequence of [TokenTy]s.
175    pub fn matches(&mut self, seq: &[TokenTy]) -> bool {
176        // Use the rare let-else to ensure there are at minimum, the given number of tokens remaining.
177        let Some(lookahead_window) = self.lookahead_window(seq.len()) else {
178            return false;
179        };
180
181        // Use a zipped iterator to compare all the token variants.
182        lookahead_window
183            .iter()
184            .zip(seq)
185            .all(|(token, matches)| token.variant == *matches)
186    }
187}