wright/lexer/
quoted.rs

1//! Lexing implementation for quoted literals.
2
3use super::{Lexer, token::Token, token::TokenTy};
4use std::str::Chars;
5
6/// Attempt to parse a quoted literal. This includes [TokenTy::StringLiteral], [TokenTy::CharLiteral], and
7/// [TokenTy::FormatStringLiteral].
8pub fn try_consume_quoted_literal(lexer: &mut Lexer) -> Option<Token> {
9    // Make a chars iterator to lex from.
10    let mut chars: Chars = lexer.remaining.chars();
11    // Get the first char from the character iterator.
12    // Return none if the first character doesn't exist or is not one of the quote terminating characters.
13    let first: char = chars.next().filter(|c| ['\'', '"', '`'].contains(c))?;
14    // Track number of bytes consumed.
15    let mut bytes_consumed: usize = first.len_utf8();
16    // Track whether the quoted literal is terminated.
17    let mut is_terminated: bool = false;
18
19    // Consume from the iterator while possible.
20    while let Some(consumed) = chars.next() {
21        // Update the number of bytes consumed.
22        bytes_consumed += consumed.len_utf8();
23
24        // Check if the character matches the starting char.
25        // If so, record the literal as terminated and break this loop.
26        if consumed == first {
27            is_terminated = true;
28            break;
29        }
30
31        // If the character we just consumed is a backslash.
32        // We only handle escaped terminators here, rather than parsing actual meaning.
33        // Consume the next character if there is one, regardless of what it is.
34        // This prevents an escaped terminator from ending the literal.
35        if consumed == '\\' {
36            // If there is no next char, do not add anything to the number of bytes consumed.
37            bytes_consumed += chars.next().map(char::len_utf8).unwrap_or(0);
38        }
39    }
40
41    // Return when we have either reached a terminator or run out of characters.
42    // First determine the variant to return.
43    let variant: TokenTy = match first {
44        '\'' => TokenTy::CharLiteral {
45            terminated: is_terminated,
46        },
47
48        '\"' => TokenTy::StringLiteral {
49            terminated: is_terminated,
50        },
51
52        '`' => TokenTy::FormatStringLiteral {
53            terminated: is_terminated,
54        },
55
56        _ => unreachable!("There are no other quoted literals"),
57    };
58
59    // Summing char lengths from the iterator should never give us an invalid or out of bounds index.
60    Some(lexer.split_token_unchecked(bytes_consumed, variant))
61}
62
63#[cfg(test)]
64mod tests {
65    use super::super::{Lexer, token::TokenTy};
66
67    #[test]
68    fn string_literal() {
69        let mut lexer = Lexer::new_test(r#""Test string literal""#);
70        let token = lexer.next_token().unwrap();
71        assert_eq!(token.variant, TokenTy::StringLiteral { terminated: true });
72        assert_eq!(token.fragment.as_str(), "\"Test string literal\"");
73    }
74}