1use anstyle_parse::state::state_change;
2use anstyle_parse::state::Action;
3use anstyle_parse::state::State;
4
5#[inline]
22pub fn strip_str(data: &str) -> StrippedStr<'_> {
23 StrippedStr::new(data)
24}
25
26#[derive(Default, Clone, Debug, PartialEq, Eq)]
28pub struct StrippedStr<'s> {
29 bytes: &'s [u8],
30 state: State,
31}
32
33impl<'s> StrippedStr<'s> {
34 #[inline]
35 fn new(data: &'s str) -> Self {
36 Self {
37 bytes: data.as_bytes(),
38 state: State::Ground,
39 }
40 }
41
42 #[inline]
44 #[allow(clippy::inherent_to_string_shadow_display)] pub fn to_string(&self) -> String {
46 use std::fmt::Write as _;
47 let mut stripped = String::with_capacity(self.bytes.len());
48 let _ = write!(&mut stripped, "{self}");
49 stripped
50 }
51}
52
53impl<'s> std::fmt::Display for StrippedStr<'s> {
54 #[inline]
56 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57 let iter = Self {
58 bytes: self.bytes,
59 state: self.state,
60 };
61 for printable in iter {
62 printable.fmt(f)?;
63 }
64 Ok(())
65 }
66}
67
68impl<'s> Iterator for StrippedStr<'s> {
69 type Item = &'s str;
70
71 #[inline]
72 fn next(&mut self) -> Option<Self::Item> {
73 next_str(&mut self.bytes, &mut self.state)
74 }
75}
76
77#[derive(Default, Clone, Debug, PartialEq, Eq)]
79pub struct StripStr {
80 state: State,
81}
82
83impl StripStr {
84 pub fn new() -> Self {
86 Default::default()
87 }
88
89 pub fn strip_next<'s>(&'s mut self, data: &'s str) -> StripStrIter<'s> {
91 StripStrIter {
92 bytes: data.as_bytes(),
93 state: &mut self.state,
94 }
95 }
96}
97
98#[derive(Debug, PartialEq, Eq)]
100pub struct StripStrIter<'s> {
101 bytes: &'s [u8],
102 state: &'s mut State,
103}
104
105impl<'s> Iterator for StripStrIter<'s> {
106 type Item = &'s str;
107
108 #[inline]
109 fn next(&mut self) -> Option<Self::Item> {
110 next_str(&mut self.bytes, self.state)
111 }
112}
113
114#[inline]
115fn next_str<'s>(bytes: &mut &'s [u8], state: &mut State) -> Option<&'s str> {
116 let offset = bytes.iter().copied().position(|b| {
117 let (next_state, action) = state_change(*state, b);
118 if next_state != State::Anywhere {
119 *state = next_state;
120 }
121 is_printable_bytes(action, b)
122 });
123 let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
124 *bytes = next;
125 *state = State::Ground;
126
127 let offset = bytes.iter().copied().position(|b| {
128 let (_next_state, action) = state_change(State::Ground, b);
129 !(is_printable_bytes(action, b) || is_utf8_continuation(b))
130 });
131 let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
132 *bytes = next;
133 if printable.is_empty() {
134 None
135 } else {
136 let printable = unsafe {
137 from_utf8_unchecked(
138 printable,
139 "`bytes` was validated as UTF-8, the parser preserves UTF-8 continuations",
140 )
141 };
142 Some(printable)
143 }
144}
145
146#[inline]
147unsafe fn from_utf8_unchecked<'b>(bytes: &'b [u8], safety_justification: &'static str) -> &'b str {
148 unsafe {
149 if cfg!(debug_assertions) {
150 std::str::from_utf8(bytes).expect(safety_justification)
152 } else {
153 std::str::from_utf8_unchecked(bytes)
154 }
155 }
156}
157
158#[inline]
159fn is_utf8_continuation(b: u8) -> bool {
160 matches!(b, 0x80..=0xbf)
161}
162
163#[inline]
178pub fn strip_bytes(data: &[u8]) -> StrippedBytes<'_> {
179 StrippedBytes::new(data)
180}
181
182#[derive(Default, Clone, Debug, PartialEq, Eq)]
184pub struct StrippedBytes<'s> {
185 bytes: &'s [u8],
186 state: State,
187 utf8parser: Utf8Parser,
188}
189
190impl<'s> StrippedBytes<'s> {
191 #[inline]
193 pub fn new(bytes: &'s [u8]) -> Self {
194 Self {
195 bytes,
196 state: State::Ground,
197 utf8parser: Default::default(),
198 }
199 }
200
201 #[inline]
209 pub fn extend(&mut self, bytes: &'s [u8]) {
210 debug_assert!(
211 self.is_empty(),
212 "current bytes must be processed to ensure we end at the right state"
213 );
214 self.bytes = bytes;
215 }
216
217 #[inline]
219 pub fn is_empty(&self) -> bool {
220 self.bytes.is_empty()
221 }
222
223 #[inline]
225 pub fn into_vec(self) -> Vec<u8> {
226 let mut stripped = Vec::with_capacity(self.bytes.len());
227 for printable in self {
228 stripped.extend(printable);
229 }
230 stripped
231 }
232}
233
234impl<'s> Iterator for StrippedBytes<'s> {
235 type Item = &'s [u8];
236
237 #[inline]
238 fn next(&mut self) -> Option<Self::Item> {
239 next_bytes(&mut self.bytes, &mut self.state, &mut self.utf8parser)
240 }
241}
242
243#[derive(Default, Clone, Debug, PartialEq, Eq)]
245pub struct StripBytes {
246 state: State,
247 utf8parser: Utf8Parser,
248}
249
250impl StripBytes {
251 pub fn new() -> Self {
253 Default::default()
254 }
255
256 pub fn strip_next<'s>(&'s mut self, bytes: &'s [u8]) -> StripBytesIter<'s> {
258 StripBytesIter {
259 bytes,
260 state: &mut self.state,
261 utf8parser: &mut self.utf8parser,
262 }
263 }
264}
265
266#[derive(Debug, PartialEq, Eq)]
268pub struct StripBytesIter<'s> {
269 bytes: &'s [u8],
270 state: &'s mut State,
271 utf8parser: &'s mut Utf8Parser,
272}
273
274impl<'s> Iterator for StripBytesIter<'s> {
275 type Item = &'s [u8];
276
277 #[inline]
278 fn next(&mut self) -> Option<Self::Item> {
279 next_bytes(&mut self.bytes, self.state, self.utf8parser)
280 }
281}
282
283#[inline]
284fn next_bytes<'s>(
285 bytes: &mut &'s [u8],
286 state: &mut State,
287 utf8parser: &mut Utf8Parser,
288) -> Option<&'s [u8]> {
289 let offset = bytes.iter().copied().position(|b| {
290 if *state == State::Utf8 {
291 true
292 } else {
293 let (next_state, action) = state_change(*state, b);
294 if next_state != State::Anywhere {
295 *state = next_state;
296 }
297 is_printable_bytes(action, b)
298 }
299 });
300 let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
301 *bytes = next;
302
303 let offset = bytes.iter().copied().position(|b| {
304 if *state == State::Utf8 {
305 if utf8parser.add(b) {
306 *state = State::Ground;
307 }
308 false
309 } else {
310 let (next_state, action) = state_change(State::Ground, b);
311 if next_state != State::Anywhere {
312 *state = next_state;
313 }
314 if *state == State::Utf8 {
315 utf8parser.add(b);
316 false
317 } else {
318 !is_printable_bytes(action, b)
319 }
320 }
321 });
322 let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
323 *bytes = next;
324 if printable.is_empty() {
325 None
326 } else {
327 Some(printable)
328 }
329}
330
331#[derive(Default, Clone, Debug, PartialEq, Eq)]
332pub(crate) struct Utf8Parser {
333 utf8_parser: utf8parse::Parser,
334}
335
336impl Utf8Parser {
337 fn add(&mut self, byte: u8) -> bool {
338 let mut b = false;
339 let mut receiver = VtUtf8Receiver(&mut b);
340 self.utf8_parser.advance(&mut receiver, byte);
341 b
342 }
343}
344
345struct VtUtf8Receiver<'a>(&'a mut bool);
346
347impl<'a> utf8parse::Receiver for VtUtf8Receiver<'a> {
348 fn codepoint(&mut self, _: char) {
349 *self.0 = true;
350 }
351
352 fn invalid_sequence(&mut self) {
353 *self.0 = true;
354 }
355}
356
357#[inline]
358fn is_printable_bytes(action: Action, byte: u8) -> bool {
359 const DEL: u8 = 0x7f;
362
363 (action == Action::Print && byte != DEL)
365 || action == Action::BeginUtf8
366 || (action == Action::Execute && byte.is_ascii_whitespace())
367}
368
369#[cfg(test)]
370mod test {
371 use super::*;
372 use proptest::prelude::*;
373
374 fn parser_strip(bytes: &[u8]) -> String {
376 #[derive(Default)]
377 struct Strip(String);
378 impl Strip {
379 fn with_capacity(capacity: usize) -> Self {
380 Self(String::with_capacity(capacity))
381 }
382 }
383 impl anstyle_parse::Perform for Strip {
384 fn print(&mut self, c: char) {
385 self.0.push(c);
386 }
387
388 fn execute(&mut self, byte: u8) {
389 if byte.is_ascii_whitespace() {
390 self.0.push(byte as char);
391 }
392 }
393 }
394
395 let mut stripped = Strip::with_capacity(bytes.len());
396 let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new();
397 for byte in bytes {
398 parser.advance(&mut stripped, *byte);
399 }
400 stripped.0
401 }
402
403 fn strip_char(mut s: &str) -> String {
405 let mut result = String::new();
406 let mut state = StripStr::new();
407 while !s.is_empty() {
408 let mut indices = s.char_indices();
409 indices.next(); let offset = indices.next().map(|(i, _)| i).unwrap_or_else(|| s.len());
411 let (current, remainder) = s.split_at(offset);
412 for printable in state.strip_next(current) {
413 result.push_str(printable);
414 }
415 s = remainder;
416 }
417 result
418 }
419
420 fn strip_byte(s: &[u8]) -> Vec<u8> {
422 let mut result = Vec::new();
423 let mut state = StripBytes::default();
424 for start in 0..s.len() {
425 let current = &s[start..=start];
426 for printable in state.strip_next(current) {
427 result.extend(printable);
428 }
429 }
430 result
431 }
432
433 #[test]
434 fn test_strip_bytes_multibyte() {
435 let bytes = [240, 145, 141, 139];
436 let expected = parser_strip(&bytes);
437 let actual = String::from_utf8(strip_bytes(&bytes).into_vec()).unwrap();
438 assert_eq!(expected, actual);
439 }
440
441 #[test]
442 fn test_strip_byte_multibyte() {
443 let bytes = [240, 145, 141, 139];
444 let expected = parser_strip(&bytes);
445 let actual = String::from_utf8(strip_byte(&bytes).clone()).unwrap();
446 assert_eq!(expected, actual);
447 }
448
449 #[test]
450 fn test_strip_str_del() {
451 let input = std::str::from_utf8(&[0x7f]).unwrap();
452 let expected = "";
453 let actual = strip_str(input).to_string();
454 assert_eq!(expected, actual);
455 }
456
457 #[test]
458 fn test_strip_byte_del() {
459 let bytes = [0x7f];
460 let expected = "";
461 let actual = String::from_utf8(strip_byte(&bytes).clone()).unwrap();
462 assert_eq!(expected, actual);
463 }
464
465 #[test]
466 fn test_strip_str_handles_broken_sequence() {
467 let s = "ö\x1b😀hello😀goodbye";
469 let mut it = strip_str(s);
470 assert_eq!("ö", it.next().unwrap());
471 assert_eq!("ello😀goodbye", it.next().unwrap());
472 }
473
474 proptest! {
475 #[test]
476 #[cfg_attr(miri, ignore)] fn strip_str_no_escapes(s in "\\PC*") {
478 let expected = parser_strip(s.as_bytes());
479 let actual = strip_str(&s).to_string();
480 assert_eq!(expected, actual);
481 }
482
483 #[test]
484 #[cfg_attr(miri, ignore)] fn strip_char_no_escapes(s in "\\PC*") {
486 let expected = parser_strip(s.as_bytes());
487 let actual = strip_char(&s);
488 assert_eq!(expected, actual);
489 }
490
491 #[test]
492 #[cfg_attr(miri, ignore)] fn strip_bytes_no_escapes(s in "\\PC*") {
494 dbg!(&s);
495 dbg!(s.as_bytes());
496 let expected = parser_strip(s.as_bytes());
497 let actual = String::from_utf8(strip_bytes(s.as_bytes()).into_vec()).unwrap();
498 assert_eq!(expected, actual);
499 }
500
501 #[test]
502 #[cfg_attr(miri, ignore)] fn strip_byte_no_escapes(s in "\\PC*") {
504 dbg!(&s);
505 dbg!(s.as_bytes());
506 let expected = parser_strip(s.as_bytes());
507 let actual = String::from_utf8(strip_byte(s.as_bytes()).clone()).unwrap();
508 assert_eq!(expected, actual);
509 }
510 }
511}