wright/lexer/
identifier.rs

1//! Implementation related to parsing keywords and identifiers.
2
3use super::{Lexer, token::Token, token::TokenTy};
4use crate::source_tracking::fragment::Fragment;
5use std::str::Chars;
6use unicode_ident::{is_xid_continue, is_xid_start};
7
8/// Try to match a fragment recognized to be an identifier or keyword to
9/// a keyword or return [TokenTy::Identifier].
10fn identifier_or_keyword(fragment: Fragment) -> TokenTy {
11    use TokenTy::*;
12
13    match fragment.as_str() {
14        "record" => KwRecord,
15        "type" => KwType,
16        "enum" => KwEnum,
17        "union" => KwUnion,
18        "func" => KwFunc,
19        "pure" => KwPure,
20        "unsafe" => KwUnsafe,
21        "naked" => KwNaked,
22        "repr" => KwRepr,
23        "impl" => KwImpl,
24        "constrain" => KwConstrain,
25        "constraint" => KwConstraint,
26        "references" => KwReferences,
27        "trait" => KwTrait,
28        "const" => KwConst,
29        "where" => KwWhere,
30
31        "use" => KwUse,
32        "as" => KwAs,
33        "mod" => KwMod,
34        "pub" => KwPub,
35
36        "if" => KwIf,
37        "else" => KwElse,
38        "match" => KwMatch,
39
40        "for" => KwFor,
41        "in" => KwIn,
42        "while" => KwWhile,
43        "loop" => KwLoop,
44
45        "let" => KwLet,
46        "var" => KwVar,
47
48        "true" => KwTrue,
49        "false" => KwFalse,
50
51        "bool" => KwBool,
52        "u8" => KwU8,
53        "i8" => KwI8,
54        "u16" => KwU16,
55        "i16" => KwI16,
56        "u32" => KwU32,
57        "i32" => KwI32,
58        "f32" => KwF32,
59        "u64" => KwU64,
60        "i64" => KwI64,
61        "f64" => KwF64,
62        "char" => KwChar,
63
64        "_" => Underscore,
65
66        _ => Identifier,
67    }
68}
69
70/// Attempt to consume a keyword/[identifier](TokenTy::Identifier)/[underscore](TokenTy::Underscore) from the lexer.
71pub fn try_consume_keyword_or_identifier(lexer: &mut Lexer) -> Option<Token> {
72    // Get a character iterator that we can pull from.
73    let mut chars: Chars = lexer.remaining.chars();
74    // Get the next character from the iterator, consider it the first char of any potential match.
75    // Make sure it's a valid identifier start (includes start to all keywords) or is an underscore.
76    // If it does not exist or match predicates, return None.
77    let next: char = chars.next().filter(|c| is_xid_start(*c) || *c == '_')?;
78    // Store/track the number of bytes consumed so far.
79    let mut bytes_consumed: usize = next.len_utf8();
80
81    // Take remaining chars and add to sum.
82    bytes_consumed += chars
83        .take_while(|c| is_xid_continue(*c))
84        .map(char::len_utf8)
85        .sum::<usize>();
86
87    // Split the token and the new remaining fragment.
88    // VALIDITY: The character iterator should guarantee that we land on a valid character boundary within the bounds
89    // of the fragment.
90    let (token_fragment, new_remaining): (Fragment, Fragment) =
91        lexer.remaining.split_at_unchecked(bytes_consumed);
92
93    // Get the variant of token to produce.
94    let variant: TokenTy = identifier_or_keyword(token_fragment.clone());
95
96    // Update the lexer's remaining fragment.
97    lexer.remaining = new_remaining;
98
99    // Return the token.
100    Some(Token {
101        variant,
102        fragment: token_fragment,
103    })
104}
105
106#[cfg(test)]
107mod tests {
108    use super::{Lexer, TokenTy};
109
110    #[test]
111    fn identifiers_and_keywords() {
112        let mut lexer = Lexer::new_test("const TEST");
113
114        assert_eq!(lexer.next_token().unwrap().variant, TokenTy::KwConst);
115        assert_eq!(lexer.next_token().unwrap().variant, TokenTy::Whitespace);
116        assert_eq!(lexer.next_token().unwrap().variant, TokenTy::Identifier);
117    }
118}