wright/lexer/quoted.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
//! Lexing implementation for quoted literals.
use super::{token::Token, token::TokenTy, Lexer};
use std::str::Chars;
/// Attempt to parse a quoted literal. This includes [TokenTy::StringLiteral], [TokenTy::CharLiteral], and
/// [TokenTy::FormatStringLiteral].
pub fn try_consume_quoted_literal(lexer: &mut Lexer) -> Option<Token> {
// Make a chars iterator to lex from.
let mut chars: Chars = lexer.remaining.chars();
// Get the first char from the character iterator.
// Return none if the first character doesn't exist or is not one of the quote terminating characters.
let first: char = chars.next().filter(|c| ['\'', '"', '`'].contains(c))?;
// Track number of bytes consumed.
let mut bytes_consumed: usize = first.len_utf8();
// Track whether the quoted literal is terminated.
let mut is_terminated: bool = false;
// Consume from the iterator while possible.
while let Some(consumed) = chars.next() {
// Update the number of bytes consumed.
bytes_consumed += consumed.len_utf8();
// Check if the character matches the starting char.
// If so, record the literal as terminated and break this loop.
if consumed == first {
is_terminated = true;
break;
}
// If the character we just consumed is a backslash.
// We only handle escaped terminators here, rather than parsing actual meaning.
// Consume the next character if there is one, regardless of what it is.
// This prevents an escaped terminator from ending the literal.
if consumed == '\\' {
// If there is no next char, do not add anything to the number of bytes consumed.
bytes_consumed += chars.next().map(char::len_utf8).unwrap_or(0);
}
}
// Return when we have either reached a terminator or run out of characters.
// First determine the variant to return.
let variant: TokenTy = match first {
'\'' => TokenTy::CharLiteral {
terminated: is_terminated,
},
'\"' => TokenTy::StringLiteral {
terminated: is_terminated,
},
'`' => TokenTy::FormatStringLiteral {
terminated: is_terminated,
},
_ => unreachable!("There are no other quoted literals"),
};
// SAFETY: Summing char lengths from the iterator should never give us an invalid or out of bounds index.
Some(unsafe { lexer.split_token_unchecked(bytes_consumed, variant) })
}
#[cfg(test)]
mod tests {
use super::super::{token::TokenTy, Lexer};
#[test]
fn string_literal() {
let mut lexer = Lexer::new_test(r#""Test string literal""#);
let token = lexer.next_token().unwrap();
assert_eq!(token.variant, TokenTy::StringLiteral { terminated: true });
assert_eq!(token.fragment.as_str(), "\"Test string literal\"");
}
}