wright/parser.rs
1//! This parser module is responsible for turning the stream of [Token]s from the [Lexer] into a tree of [AST] nodes.
2//!
3//! [AST]: crate::ast
4//! [Token]: crate::lexer::token::Token
5
6use error::{ParserError, ParserErrorKind};
7
8use super::lexer::Lexer;
9use crate::{
10 lexer::token::{Token, TokenTy},
11 source_tracking::fragment::Fragment,
12};
13use std::collections::VecDeque;
14
15mod decl;
16pub mod error;
17mod identifier;
18mod literal;
19mod path;
20mod ty;
21pub mod whitespace;
22
23/// The [Parser] struct wraps a [Lexer] and adds lookahead and functions that are useful for parsing.
24#[derive(Debug)]
25pub struct Parser {
26 lexer: Lexer,
27 lookahead: VecDeque<Token>,
28}
29
30impl Parser {
31 /// Construct a new parser around a given [Lexer].
32 pub fn new(lexer: Lexer) -> Self {
33 Parser {
34 lexer,
35 lookahead: VecDeque::new(),
36 }
37 }
38
39 /// Get the next [Token] from this [Parser]. This may be a token that's already been peeked.
40 ///
41 /// Skips any non-document comments encountered via the lexer implementation.
42 ///
43 /// Return an error if a [Token] with [TokenTy::Unknown] is encountered.
44 pub fn next_token(&mut self) -> Result<Option<Token>, ParserError> {
45 let token = self
46 .lookahead
47 .pop_front()
48 .or_else(|| self.lexer.next_token());
49
50 // Check for unknown tokens, which should always convert to an error.
51 match token {
52 Some(Token {
53 variant: TokenTy::Unknown,
54 fragment,
55 }) => Err(ParserErrorKind::EncounteredUnknownToken.at(fragment)),
56 known_token_or_none => Ok(known_token_or_none),
57 }
58 }
59
60 /// Advance this [Parser] by `n` [Token]s. If this [Parser] runs out of [Token]s, panic.
61 ///
62 /// Panics
63 /// - If `n` is greater than the number of remaining tokens.
64 pub fn advance(&mut self, n: usize) {
65 // Add tokens to the lookahead buffer until we have enough to split off.
66 while self.lookahead.len() < n {
67 let token = self
68 .lexer
69 .next_token()
70 .expect("advance: `n` <= number of remaining tokens");
71
72 self.lookahead.push_back(token);
73 }
74
75 // Split them off.
76 self.lookahead = self.lookahead.split_off(n);
77 }
78
79 /// Peek at the next token from the [Lexer] (cached in the lookahead queue if peeked before).
80 pub fn peek(&mut self) -> Option<&Token> {
81 if self.lookahead.is_empty() {
82 self.lookahead.push_back(self.lexer.next_token()?);
83 }
84
85 self.lookahead.front()
86 }
87
88 /// Peek the [Fragment] of the next [Token].
89 pub fn peek_fragment(&mut self) -> Option<&Fragment> {
90 self.peek().map(|token| &token.fragment)
91 }
92
93 /// Peek the [TokenTy] of the next [Token].
94 pub fn peek_variant(&mut self) -> Option<TokenTy> {
95 self.peek().map(|token| token.variant)
96 }
97
98 /// Peek the [Fragment] of the next [Token] and clone it or return a clone of the
99 /// remainder [Fragment] of the internal [Lexer]
100 /// (which will be empty, since there wasn't a [Token] to peek).
101 ///
102 /// This is likely only useful for error reporting -- a clone of a potentially empty fragment is
103 /// rarely ever useful otherwise.
104 pub fn peek_fragment_or_rest_cloned(&mut self) -> Fragment {
105 match self.peek() {
106 Some(Token { fragment, .. }) => fragment.clone(),
107 None => {
108 let rest = self.lexer.remaining.clone();
109
110 // Assert that we're making the right assumptions about the remaining fragment.
111 // These are (unidiomatically) done using debug_assert -- perhaps that changes eventually
112 // however it should be fine for now, since this can only produce logic bugs (never memory or
113 // concurrency bugs).
114 debug_assert!(rest.is_valid());
115 debug_assert!(rest.is_empty());
116 debug_assert!(rest.is_empty_at_end_of_source());
117
118 rest
119 }
120 }
121 }
122
123 /// Get the [Lexer] that's wrapped.
124 pub fn lexer(&self) -> &Lexer {
125 &self.lexer
126 }
127
128 /// Lookahead `k` [Token]s.
129 ///
130 /// If `k == 0` then this is effectively peeking at the next [Token] from the wrapped [Lexer].
131 pub fn lookahead(&mut self, k: usize) -> Option<&Token> {
132 while self.lookahead.len() <= k {
133 self.lookahead.push_back(self.lexer.next_token()?);
134 }
135
136 self.lookahead.get(k)
137 }
138
139 /// Similar to [Parser::lookahead] but instead returns a slice of `n` [Token]s, starting with the next [Token].
140 ///
141 /// Returns [None] if `n` is greater than the number of remaining [Token]s for this [Parser].
142 pub fn lookahead_window(&mut self, n: usize) -> Option<&[Token]> {
143 while self.lookahead.len() < n {
144 self.lookahead.push_back(self.lexer.next_token()?);
145 }
146
147 // Use make contiguous here to get a unified/single slice.
148 Some(&self.lookahead.make_contiguous()[..n])
149 }
150
151 /// Get the next [Token] from this parser if its [Token::variant] is the given `token_ty`.
152 pub fn next_if_is(&mut self, token_ty: TokenTy) -> Option<Token> {
153 // Peeking successfully first means that the lookahead vec will never be empty here.
154 (self.peek()?.variant == token_ty)
155 // SAFETY: We just peeked a token to check its variant so this unwrap is always ok.
156 .then(|| unsafe { self.lookahead.pop_front().unwrap_unchecked() })
157 }
158
159 /// Peek at the next [Token]s of this [Parser] and determine if the [Token::variant]s match this
160 /// sequence of [TokenTy]s.
161 pub fn matches(&mut self, seq: &[TokenTy]) -> bool {
162 // Use the rare let-else to ensure there are at minimum, the given number of tokens remaining.
163 let Some(lookahead_window) = self.lookahead_window(seq.len()) else {
164 return false;
165 };
166
167 // Use a zipped iterator to compare all the token variants.
168 lookahead_window
169 .iter()
170 .zip(seq)
171 .all(|(token, matches)| token.variant == *matches)
172 }
173}