wright/lexer/comments.rs
1//! Implementation of comment token lexing.
2
3use super::{Lexer, token::TokenTy};
4
5/// The pattern that begins any single line comments (including doc comments).
6pub const SINGLE_LINE_COMMENT_PREFIX: &str = "//";
7
8/// The pattern that starts any multi-line comments (including doc comments).
9pub const MULTI_LINE_COMMENT_START: &str = "/*";
10
11/// The pattern that ends any multi-line comments (including doc comments).
12pub const MULTI_LINE_COMMENT_END: &str = "*/";
13
14/// Attempt to match a sinlgle line comment from the start of the [Lexer::remaining] fragment.
15/// Return a [usize] and optionally a [TokenTy]. The [usize] indicates how many bytes were in the comment.
16/// The [TokenTy] (if it's not [None]) should be either [TokenTy::InnerDocComment] or [TokenTy::OuterDocComment].
17///
18/// If the [TokenTy] is not [None], the lexer should consume the specified number of bytes (by the [usize]) and
19/// Produce a token with the [variant](super::token::Token::variant) from this function.
20///
21/// Generally I'm trying to follow the [rust comment spec] here.
22///
23/// [rust comment spec]: https://doc.rust-lang.org/reference/comments.html
24pub fn try_match_single_line_comment(lexer: &Lexer) -> (usize, Option<TokenTy>) {
25 // Fork the lexer so we can do all the parsing on the fork without worrying about modifying the original
26 // unnecessarily.
27 let mut fork: Lexer = lexer.fork();
28
29 // Try to consume the single line comment prefix from the fork.
30 if fork.consume(SINGLE_LINE_COMMENT_PREFIX) {
31 // We consumed it successfully, read through a newline or the end of the forked lexer if we get there.
32
33 // First determine if this is a doc comment of some kind.
34 let is_inner_doc: bool = fork.matches("/") && !fork.matches("//");
35 let is_outer_doc: bool = fork.matches("!");
36
37 // The consume until a newline, carraige return, or the end of the source fragment.
38 while !fork.remaining.is_empty() && !fork.matches("\r") && !fork.matches("\n") {
39 fork.consume_any();
40 }
41
42 // Determine the kind of token to produce (if any).
43 let variant: Option<TokenTy> = match (is_inner_doc, is_outer_doc) {
44 (true, false) => Some(TokenTy::InnerDocComment),
45 (false, true) => Some(TokenTy::OuterDocComment),
46 (false, false) => None,
47 (true, true) => unreachable!(
48 "It is impossible for the `remaining` fragment to start with an `!` and a `/` simultaneously."
49 ),
50 };
51
52 // Return the number of bytes consumed and the type of token to
53 // produce if any.
54 return (fork.offset_from(lexer), variant);
55 }
56
57 // If the single line comment prefix was not immediately available, there is no comment.
58 (0, None)
59}
60
61/// Attempt to match a block comment from the start of the [Lexer::remaining] fragment.
62/// Return a [usize] and optionally a [TokenTy]. The [usize] indicates how many bytes were in the comment.
63/// The [TokenTy] (if it's not [None]) should be [TokenTy::InnerBlockDocComment], [TokenTy::OuterBlockDocComment], or
64/// [TokenTy::UnterminatedBlockComment].
65///
66/// If the [TokenTy] is not [None], the lexer should consume the specified number of bytes (by the [usize]) and
67/// Produce a token with the [variant](super::token::Token::variant) from this function.
68pub fn try_match_block_comment(lexer: &Lexer) -> (usize, Option<TokenTy>) {
69 // Handle corner cases here so we don't have to below.
70 // These are both considered empty non-documenting comments.
71 if lexer.matches("/***/") {
72 return (5, None);
73 }
74
75 if lexer.matches("/**/") {
76 return (4, None);
77 }
78
79 // Make a fork of the lexer to avoid modifying this lexer if we fail to parse.
80 let mut fork: Lexer = lexer.fork();
81
82 // Try to parse the start of a multi-line comment.
83 if fork.consume(MULTI_LINE_COMMENT_START) {
84 // Check if this is a doc comment.
85 let is_outer_doc: bool = fork.matches("!");
86 // Use this to indicate that more than one following asterix is not a doc comment.
87 let is_inner_doc: bool = fork.matches("*") && !fork.matches("**");
88
89 // Consume until we see the end of the doc comment. If we run out of characters, consider the
90 // comment unterminated.
91 while !fork.matches(MULTI_LINE_COMMENT_END) {
92 // Handle nested comments here:
93 if fork.matches(MULTI_LINE_COMMENT_START) {
94 // Discard the output -- don't care about doc comments in other comments.
95 let (nested_comment_bytes, _) = try_match_block_comment(&fork);
96
97 // SAFETY: the return from this function should never be on a char boundary or out of bounds.
98 // This is because the return value is always either 0 or calculated using `offset_from`.
99 unsafe { fork.advance_unchecked(nested_comment_bytes) };
100
101 // Restart the loop to keep consuming this comment.
102 continue;
103 }
104
105 // Handle unterminated comments here.
106 if fork.remaining.is_empty() {
107 // If we have not hit a "*/" before the end of the input, return an unterminated block comment.
108 let bytes_consumed: usize = fork.offset_from(lexer);
109 return (bytes_consumed, Some(TokenTy::UnterminatedBlockComment));
110 }
111
112 // If there's still input, and not a nested comment, consume it.
113 fork.consume_any();
114 }
115
116 // If we get here, the comment was terminated. Consume the terminating characters, and return.
117 // Use debug assert here to make sure that the comment is actually terminated.
118 let consumed_comment_terminator: bool = fork.consume(MULTI_LINE_COMMENT_END);
119 debug_assert!(consumed_comment_terminator, "comment is actually terminated");
120
121 // Determine the kind of token to produce (if any).
122 let variant: Option<TokenTy> = match (is_inner_doc, is_outer_doc) {
123 (true, false) => Some(TokenTy::InnerBlockDocComment),
124 (false, true) => Some(TokenTy::OuterBlockDocComment),
125 (false, false) => None,
126 (true, true) => {
127 unreachable!("Lexer should not match multiple comment types at once.")
128 }
129 };
130
131 return (fork.offset_from(lexer), variant);
132 }
133
134 (0, None)
135}
136
137#[cfg(test)]
138mod tests {
139 use super::Lexer;
140
141 #[test]
142 fn ignored_single_line_comment() {
143 let mut lexer = Lexer::new_test("// test comment ");
144 assert!(lexer.next_token().is_none());
145 assert_eq!(lexer.remaining.len(), 0);
146 }
147}