wright/lexer/token.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
//! Token models.
use std::fmt::{self, Display};
use crate::source_tracking::fragment::Fragment;
/// A token in wright source code.
#[derive(Debug)]
pub struct Token {
/// What type of token this is.
pub variant: TokenTy,
/// The matching fragment of source code -- this contains the location and length data for the token.
pub fragment: Fragment,
}
/// The different types of tokens in wright source.
#[rustfmt::skip] // Turn off auto reformat.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
// Allow missing docs (most of these should be self-evident).
#[allow(missing_docs)]
pub enum TokenTy {
LeftCurly, RightCurly,
LeftBracket, RightBracket,
LeftParen, RightParen,
Plus, PlusEq,
Star, StarEq,
Div, DivEq,
Xor, XorEq,
Mod, ModEq,
Bang, BangEq,
Minus, MinusEq, SingleArrow,
Eq, EqEq, DoubleArrow,
Lt, LtEq, LtLt,
Gt, GtEq, GtGt,
And, AndEq, AndAnd,
Or, OrEq, OrOr,
Colon, ColonEq, ColonColon,
At,
Tilde,
Semi,
Dot,
Comma,
Hash,
Question,
Dollar,
// Not in the same group as the other ones there since it can be used at the start of identifiers.
Underscore,
Identifier,
OuterDocComment, OuterBlockDocComment,
InnerDocComment, InnerBlockDocComment,
/// Indicates a block style comment without termination.
/// Separate from [TokenTy::InnerDocComment] and [TokenTy::OuterDocComment] to indicate that
/// unterminated comments will be handled differently (produce errors eventually).
UnterminatedBlockComment,
KwRecord,
KwType,
KwEnum,
KwUnion,
KwFunc,
KwRepr,
KwImpl,
KwConstraint,
KwReferences,
KwTrait,
KwUse,
KwAs,
KwConst,
KwMod,
KwIf,
KwElse,
KwMatch,
KwFor,
KwIn,
KwWhile,
KwTrue,
KwFalse,
KwLoop,
KwWhere,
IntegerLiteral,
StringLiteral { terminated: bool },
FormatStringLiteral { terminated: bool },
CharLiteral { terminated: bool },
/// Whitespace counts as a token.
Whitespace,
/// Unknown character in lexer fragment.
Unknown
}
impl Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// If the host terminal supports unicode, replace the newline & carriage return characters with pictures,
// otherwise use ascii.
let replacements = match crate::util::supports_unicode::supports_unicode() {
true => &[("\n", "\u{240A}"), ("\r", "\u{240D}")],
false => &[("\n", "[nl]"), ("\r", "[cr]")],
};
let mut with_replacements = self.fragment.as_str().to_owned();
for (replace, replace_with) in replacements {
with_replacements = with_replacements.replace(replace, replace_with);
}
write!(f, "\"{with_replacements}\" ({:?})", self.variant)
}
}