wright/lexer/
token.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
//! Token models.

use std::fmt::{self, Display};
use crate::source_tracking::fragment::Fragment;

/// A token in wright source code.
#[derive(Debug)]
pub struct Token {
    /// What type of token this is.
    pub variant: TokenTy,
    /// The matching fragment of source code -- this contains the location and length data for the token.
    pub fragment: Fragment,
}

/// The different types of tokens in wright source.
#[rustfmt::skip] // Turn off auto reformat. 
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
// Allow missing docs (most of these should be self-evident). 
#[allow(missing_docs)]
pub enum TokenTy {
    LeftCurly, RightCurly,
    LeftBracket, RightBracket,
    LeftParen, RightParen,

    Plus, PlusEq,
    Star, StarEq,
    Div, DivEq,
    Xor, XorEq,
    Mod, ModEq,
    Bang, BangEq,

    Minus, MinusEq, SingleArrow,
    Eq, EqEq, DoubleArrow,

    Lt, LtEq, LtLt,
    Gt, GtEq, GtGt,
    And, AndEq, AndAnd,
    Or, OrEq, OrOr,
    Colon, ColonEq, ColonColon,

    At,
    Tilde,
    Semi,
    Dot,
    Comma,
    Hash,
    Question,
    Dollar,
    
    // Not in the same group as the other ones there since it can be used at the start of identifiers.
    Underscore,

    Identifier,

    OuterDocComment, OuterBlockDocComment,
    InnerDocComment, InnerBlockDocComment,
    
    /// Indicates a block style comment without termination. 
    /// Separate from [TokenTy::InnerDocComment] and [TokenTy::OuterDocComment] to indicate that 
    /// unterminated comments will be handled differently (produce errors eventually). 
    UnterminatedBlockComment,

    KwRecord,
    KwType,
    KwEnum,
    KwUnion,
    KwFunc,
    KwRepr,
    KwImpl,
    KwConstraint,
    KwReferences,
    KwTrait,
    KwUse,
    KwAs,
    KwConst,
    KwMod,
    KwIf,
    KwElse,
    KwMatch,
    KwFor,
    KwIn,
    KwWhile,
    KwTrue,
    KwFalse,
    KwLoop,
    KwWhere,

    IntegerLiteral,
    StringLiteral { terminated: bool }, 
    FormatStringLiteral { terminated: bool },
    CharLiteral { terminated: bool },

    /// Whitespace counts as a token.
    Whitespace,

    /// Unknown character in lexer fragment. 
    Unknown
}

impl Display for Token {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        // If the host terminal supports unicode, replace the newline & carriage return characters with pictures,
        // otherwise use ascii.
        let replacements = match crate::util::supports_unicode::supports_unicode() {
            true => &[("\n", "\u{240A}"), ("\r", "\u{240D}")],
            false => &[("\n", "[nl]"), ("\r", "[cr]")],
        };

        let mut with_replacements = self.fragment.as_str().to_owned();

        for (replace, replace_with) in replacements {
            with_replacements = with_replacements.replace(replace, replace_with);
        }

        write!(f, "\"{with_replacements}\" ({:?})", self.variant)
    }
}