wright/lexer/
integer_literal.rs

1//! Implementation for lexing integer literals.
2
3use super::{
4    Lexer,
5    token::{Token, TokenTy},
6};
7use std::{iter::Peekable, str::Chars};
8
9/// Attempt to lex and consume an [TokenTy::IntegerLiteral] from the lexer.
10pub fn try_consume_integer_literal(lexer: &mut Lexer) -> Option<Token> {
11    // Make a peekable character iterator.
12    let mut chars: Peekable<Chars> = lexer.remaining.chars().peekable();
13    // Get the first character from the iterator. We can only continue lexing if one exists and is an ascii
14    // decimal digit.
15    let next: char = chars.next().filter(char::is_ascii_digit)?;
16    // Track the number of bytes consumed. We use the length of the parsed first char here but we could probably
17    // assume it to be 1.
18    let mut bytes_consumed: usize = next.len_utf8();
19    // Track the radix
20    let mut radix: u32 = 10;
21
22    // Change the radix if necessary
23    if next == '0' {
24        if let Some(prefix) = chars.next_if(|x| ['x', 'o', 'b', 'X', 'B'].contains(x)) {
25            // All the possible prefix chars are 1 byte ascii characters.
26            bytes_consumed += 1;
27
28            radix = match prefix {
29                'x' | 'X' => 16,
30                'b' | 'B' => 2,
31                'o' => 8,
32                _ => unreachable!("the prefix byte is checked above"),
33            };
34        }
35    }
36
37    // The first character after the optional prefix is required to be a digit, not underscore.
38    bytes_consumed += chars.next_if(|c| c.is_digit(radix))?.len_utf8();
39
40    // Add the rest of the integer literal.
41    bytes_consumed += chars
42        .take_while(|c| c.is_digit(radix) || *c == '_')
43        .map(char::len_utf8)
44        .sum::<usize>();
45
46    Some(lexer.split_token(bytes_consumed, TokenTy::IntegerLiteral))
47}
48
49#[cfg(test)]
50mod tests {
51    use crate::lexer::integer_literal::try_consume_integer_literal;
52
53    use super::{Lexer, TokenTy};
54
55    #[test]
56    fn integer_literal() {
57        let mut lexer = Lexer::new_test("123_456_789.");
58
59        let token = lexer.next_token().unwrap();
60
61        assert_eq!(token.fragment.as_str(), "123_456_789");
62        assert_eq!(token.variant, TokenTy::IntegerLiteral);
63        assert_eq!(lexer.remaining.as_str(), ".");
64    }
65
66    #[test]
67    fn cant_start_with_underscore() {
68        let mut lexer = Lexer::new_test("0x__10");
69
70        assert!(try_consume_integer_literal(&mut lexer).is_none());
71
72        assert_eq!(lexer.remaining.as_str(), "0x__10");
73    }
74}