wright/lexer/quoted.rs
1//! Lexing implementation for quoted literals.
2
3use super::{Lexer, token::Token, token::TokenTy};
4use std::str::Chars;
5
6/// Attempt to parse a quoted literal. This includes [TokenTy::StringLiteral], [TokenTy::CharLiteral], and
7/// [TokenTy::FormatStringLiteral].
8pub fn try_consume_quoted_literal(lexer: &mut Lexer) -> Option<Token> {
9 // Make a chars iterator to lex from.
10 let mut chars: Chars = lexer.remaining.chars();
11 // Get the first char from the character iterator.
12 // Return none if the first character doesn't exist or is not one of the quote terminating characters.
13 let first: char = chars.next().filter(|c| ['\'', '"', '`'].contains(c))?;
14 // Track number of bytes consumed.
15 let mut bytes_consumed: usize = first.len_utf8();
16 // Track whether the quoted literal is terminated.
17 let mut is_terminated: bool = false;
18
19 // Consume from the iterator while possible.
20 while let Some(consumed) = chars.next() {
21 // Update the number of bytes consumed.
22 bytes_consumed += consumed.len_utf8();
23
24 // Check if the character matches the starting char.
25 // If so, record the literal as terminated and break this loop.
26 if consumed == first {
27 is_terminated = true;
28 break;
29 }
30
31 // If the character we just consumed is a backslash.
32 // We only handle escaped terminators here, rather than parsing actual meaning.
33 // Consume the next character if there is one, regardless of what it is.
34 // This prevents an escaped terminator from ending the literal.
35 if consumed == '\\' {
36 // If there is no next char, do not add anything to the number of bytes consumed.
37 bytes_consumed += chars.next().map(char::len_utf8).unwrap_or(0);
38 }
39 }
40
41 // Return when we have either reached a terminator or run out of characters.
42 // First determine the variant to return.
43 let variant: TokenTy = match first {
44 '\'' => TokenTy::CharLiteral {
45 terminated: is_terminated,
46 },
47
48 '\"' => TokenTy::StringLiteral {
49 terminated: is_terminated,
50 },
51
52 '`' => TokenTy::FormatStringLiteral {
53 terminated: is_terminated,
54 },
55
56 _ => unreachable!("There are no other quoted literals"),
57 };
58
59 // Summing char lengths from the iterator should never give us an invalid or out of bounds index.
60 Some(lexer.split_token_unchecked(bytes_consumed, variant))
61}
62
63#[cfg(test)]
64mod tests {
65 use super::super::{Lexer, token::TokenTy};
66
67 #[test]
68 fn string_literal() {
69 let mut lexer = Lexer::new_test(r#""Test string literal""#);
70 let token = lexer.next_token().unwrap();
71 assert_eq!(token.variant, TokenTy::StringLiteral { terminated: true });
72 assert_eq!(token.fragment.as_str(), "\"Test string literal\"");
73 }
74}