wright/parser/literal/integer.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
//! Integer literal parsing implementation.
use num::{BigUint, Num};
use crate::parser::Parser;
use crate::{ast::literal::IntegerLiteral, lexer::token::TokenTy};
use crate::parser::error::{ParserError, ParserErrorKind};
impl IntegerLiteral {
/// Parse an integer literal from the given [Parser].
pub fn parse(parser: &mut Parser) -> Result<Self, ParserError> {
// Get the token containing the integer literal from the parser.
let Some(int_lit_token) = parser.next_if_is(TokenTy::IntegerLiteral) else {
return match parser.peek_fragment() {
Some(frag) => Err(ParserError {
kind: ParserErrorKind::ExpectedIntegerLiteral,
location: frag.clone(),
help: None,
}),
None => Err(ParserError {
kind: ParserErrorKind::ExpectedIntegerLiteral,
location: parser.lexer.remaining.clone(),
help: Some("found end of source".into()),
}),
};
};
// Get the string to pass to num for the rest of parsing.
let mut parse_str: &str = int_lit_token.fragment.as_str();
let mut chars = parse_str.chars();
// Unwrap: Integer literals must be at minimum 1 character, enforced by the lexer.
// use null byte as a sentinel value for the second one, since we're just using the prefix to check for
// a radix to pass to num.
let prefix: [char; 2] = [chars.next().unwrap(), chars.next().unwrap_or('\0')];
// Determine the radix and remove any prefix in the process.
let radix: u32 = match prefix {
// Hexidecimal.
['0', 'x' | 'X'] => {
parse_str = &parse_str[2..];
16
}
// Binary.
['0', 'b' | 'B'] => {
parse_str = &parse_str[2..];
2
}
// Octal
['0', 'o'] => {
parse_str = &parse_str[2..];
8
}
// All other patterns are not radix-prefixes.
_ => 10,
};
// Pass the remainder of parsing off to num.
let value = BigUint::from_str_radix(parse_str, radix)
// We can use expect here for now since we have validated the format of the string
// on our own before passing it off.
.expect("num should successfully parse");
Ok(IntegerLiteral {
fragment: int_lit_token.fragment,
value,
})
}
}
#[cfg(test)]
mod tests {
use num::BigUint;
use crate::{ast::literal::IntegerLiteral, lexer::Lexer, parser::Parser};
#[test]
fn normal() {
let mut parser = Parser::new(Lexer::new_test("1000"));
let int_lit = IntegerLiteral::parse(&mut parser).unwrap();
assert_eq!(int_lit.value, BigUint::new(vec![1000]));
assert_eq!(parser.lexer.remaining.as_str(), "");
assert_eq!(int_lit.fragment.as_str(), "1000");
}
// #[test]
// fn ingore_underscores
}