Skip to content

Commit 6cb1896

Browse files
committed
Optimize Rfc2822 parsing
1 parent 6d264d5 commit 6cb1896

4 files changed

Lines changed: 120 additions & 149 deletions

File tree

time/src/parsing/combinator/mod.rs

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -27,34 +27,6 @@ pub(crate) const fn sign(input: &[u8]) -> Option<ParsedItem<'_, Sign>> {
2727
}
2828
}
2929

30-
/// Consume the first matching item, returning its associated value.
31-
#[inline]
32-
pub(crate) fn first_match<'a, T, I>(
33-
options: I,
34-
case_sensitive: bool,
35-
) -> impl for<'b> FnMut(&'b [u8]) -> Option<ParsedItem<'b, T>>
36-
where
37-
I: IntoIterator<Item = (&'a [u8], T)>,
38-
{
39-
let mut options = options.into_iter();
40-
move |input| {
41-
if case_sensitive {
42-
options.find_map(|(expected, t)| Some(ParsedItem(input.strip_prefix(expected)?, t)))
43-
} else {
44-
options.find_map(|(expected, t)| {
45-
let n = expected.len();
46-
if n <= input.len() {
47-
let (head, tail) = input.split_at(n);
48-
if head.eq_ignore_ascii_case(expected) {
49-
return Some(ParsedItem(tail, t));
50-
}
51-
}
52-
None
53-
})
54-
}
55-
}
56-
}
57-
5830
/// Consume zero or more instances of the provided parser. The parser must return the unit value.
5931
#[inline]
6032
pub(crate) fn zero_or_more<P>(parser: P) -> impl for<'a> FnMut(&'a [u8]) -> ParsedItem<'a, ()>

time/src/parsing/combinator/rfc/rfc2822.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
//!
33
//! [RFC 2822]: https://datatracker.ietf.org/doc/html/rfc2822
44
5+
use num_conv::prelude::*;
6+
57
use crate::parsing::ParsedItem;
68
use crate::parsing::combinator::rfc::rfc2234::wsp;
79
use crate::parsing::combinator::{ascii_char, one_or_more, zero_or_more};
@@ -117,3 +119,70 @@ fn text<'a>(input: &'a [u8]) -> ParsedItem<'a, ()> {
117119

118120
new_text(input).unwrap_or_else(|| obs_text(input))
119121
}
122+
123+
/// Consume an old zone literal, returning the offset in hours.
124+
#[inline]
125+
pub(crate) fn zone_literal(input: &[u8]) -> Option<ParsedItem<'_, i8>> {
126+
let [first, second, third, rest @ ..] = input else {
127+
const UT_VARIANTS: [u16; 4] = [
128+
u16::from_ne_bytes([b'u', b't']),
129+
u16::from_ne_bytes([b'u', b'T']),
130+
u16::from_ne_bytes([b'U', b't']),
131+
u16::from_ne_bytes([b'U', b'T']),
132+
];
133+
134+
let [first, rest @ ..] = input else {
135+
return None;
136+
};
137+
if let [second, rest @ ..] = rest
138+
&& UT_VARIANTS.contains(&u16::from_ne_bytes([*first, *second]))
139+
{
140+
return Some(ParsedItem(rest, 0));
141+
}
142+
return (*first != b'j' && *first != b'J' && first.is_ascii_alphabetic())
143+
.then_some(ParsedItem(rest, 0));
144+
};
145+
let byte = u32::from_ne_bytes([
146+
0,
147+
first.to_ascii_lowercase(),
148+
second.to_ascii_lowercase(),
149+
third.to_ascii_lowercase(),
150+
]);
151+
const ZONES: [u32; 8] = [
152+
u32::from_ne_bytes([0, b'e', b's', b't']),
153+
u32::from_ne_bytes([0, b'e', b'd', b't']),
154+
u32::from_ne_bytes([0, b'c', b's', b't']),
155+
u32::from_ne_bytes([0, b'c', b'd', b't']),
156+
u32::from_ne_bytes([0, b'm', b's', b't']),
157+
u32::from_ne_bytes([0, b'm', b'd', b't']),
158+
u32::from_ne_bytes([0, b'p', b's', b't']),
159+
u32::from_ne_bytes([0, b'p', b'd', b't']),
160+
];
161+
162+
let eq = [
163+
if ZONES[0] == byte { i32::MAX } else { 0 },
164+
if ZONES[1] == byte { i32::MAX } else { 0 },
165+
if ZONES[2] == byte { i32::MAX } else { 0 },
166+
if ZONES[3] == byte { i32::MAX } else { 0 },
167+
if ZONES[4] == byte { i32::MAX } else { 0 },
168+
if ZONES[5] == byte { i32::MAX } else { 0 },
169+
if ZONES[6] == byte { i32::MAX } else { 0 },
170+
if ZONES[7] == byte { i32::MAX } else { 0 },
171+
];
172+
if eq == [0; 8] && byte != const { u32::from_ne_bytes([0, b'g', b'm', b't']) } {
173+
return None;
174+
}
175+
176+
let nonzero_zones = [
177+
eq[0] & -5,
178+
eq[1] & -4,
179+
eq[2] & -6,
180+
eq[3] & -5,
181+
eq[4] & -7,
182+
eq[5] & -6,
183+
eq[6] & -8,
184+
eq[7] & -7,
185+
];
186+
let zone = nonzero_zones.iter().sum::<i32>().truncate();
187+
Some(ParsedItem(rest, zone))
188+
}

time/src/parsing/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ impl<'a, T> ParsedItem<'a, T> {
3535
Some(self.0)
3636
}
3737

38+
/// Discard the stored value, returning the remaining input.
39+
#[must_use = "this returns the remaining input"]
40+
#[inline]
41+
pub(crate) fn discard_value(self) -> &'a [u8] {
42+
self.0
43+
}
44+
3845
/// Filter the value with the provided function. If the function returns `false`, the value
3946
/// is discarded and `None` is returned. Otherwise, the value is preserved and `Some(self)` is
4047
/// returned.

time/src/parsing/parsable.rs

Lines changed: 44 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::format_description::well_known::{Iso8601, Rfc2822, Rfc3339};
1414
use crate::internal_macros::bug;
1515
use crate::parsing::combinator::{Sign, one_or_two_digits};
1616
use crate::parsing::{Parsed, ParsedItem};
17-
use crate::{Date, Month, OffsetDateTime, Time, UtcOffset, Weekday, error};
17+
use crate::{Date, Month, OffsetDateTime, Time, UtcOffset, error};
1818

1919
/// A type that can be parsed.
2020
#[cfg_attr(docsrs, doc(notable_trait))]
@@ -172,25 +172,23 @@ impl sealed::Sealed for Rfc2822 {
172172
parsed: &mut Parsed,
173173
) -> Result<&'a [u8], error::Parse> {
174174
use crate::error::ParseFromDescription::{InvalidComponent, InvalidLiteral};
175-
use crate::parsing::combinator::rfc::rfc2822::{cfws, fws};
176-
use crate::parsing::combinator::{ExactlyNDigits, ascii_char, first_match, opt, sign};
175+
use crate::format_description::modifier;
176+
use crate::parsing::combinator::rfc::rfc2822::{cfws, fws, zone_literal};
177+
use crate::parsing::combinator::{ExactlyNDigits, ascii_char, opt, sign};
178+
use crate::parsing::component;
177179

178180
let colon = ascii_char::<b':'>;
179181
let comma = ascii_char::<b','>;
180182

181183
let input = opt(cfws)(input).into_inner();
182-
let weekday = first_match(
183-
[
184-
(b"Mon".as_slice(), Weekday::Monday),
185-
(b"Tue".as_slice(), Weekday::Tuesday),
186-
(b"Wed".as_slice(), Weekday::Wednesday),
187-
(b"Thu".as_slice(), Weekday::Thursday),
188-
(b"Fri".as_slice(), Weekday::Friday),
189-
(b"Sat".as_slice(), Weekday::Saturday),
190-
(b"Sun".as_slice(), Weekday::Sunday),
191-
],
192-
false,
193-
)(input);
184+
let weekday = component::parse_weekday(
185+
input,
186+
modifier::Weekday {
187+
repr: modifier::WeekdayRepr::Short,
188+
one_indexed: false,
189+
case_sensitive: false,
190+
},
191+
);
194192
let input = if let Some(item) = weekday {
195193
let input = item
196194
.consume_value(|value| parsed.set_weekday(value))
@@ -204,23 +202,14 @@ impl sealed::Sealed for Rfc2822 {
204202
.and_then(|item| item.consume_value(|value| parsed.set_day(NonZero::new(value)?)))
205203
.ok_or(InvalidComponent("day"))?;
206204
let input = cfws(input).ok_or(InvalidLiteral)?.into_inner();
207-
let input = first_match(
208-
[
209-
(b"Jan".as_slice(), Month::January),
210-
(b"Feb".as_slice(), Month::February),
211-
(b"Mar".as_slice(), Month::March),
212-
(b"Apr".as_slice(), Month::April),
213-
(b"May".as_slice(), Month::May),
214-
(b"Jun".as_slice(), Month::June),
215-
(b"Jul".as_slice(), Month::July),
216-
(b"Aug".as_slice(), Month::August),
217-
(b"Sep".as_slice(), Month::September),
218-
(b"Oct".as_slice(), Month::October),
219-
(b"Nov".as_slice(), Month::November),
220-
(b"Dec".as_slice(), Month::December),
221-
],
222-
false,
223-
)(input)
205+
let input = component::parse_month(
206+
input,
207+
modifier::Month {
208+
padding: modifier::Padding::None,
209+
repr: modifier::MonthRepr::Short,
210+
case_sensitive: false,
211+
},
212+
)
224213
.and_then(|item| item.consume_value(|value| parsed.set_month(value)))
225214
.ok_or(InvalidComponent("month"))?;
226215
let input = cfws(input).ok_or(InvalidLiteral)?.into_inner();
@@ -271,33 +260,7 @@ impl sealed::Sealed for Rfc2822 {
271260
// The RFC explicitly allows leap seconds.
272261
parsed.leap_second_allowed = true;
273262

274-
#[expect(
275-
clippy::unnecessary_lazy_evaluations,
276-
reason = "rust-lang/rust-clippy#8522"
277-
)]
278-
let zone_literal = first_match(
279-
[
280-
(b"UT".as_slice(), 0),
281-
(b"GMT".as_slice(), 0),
282-
(b"EST".as_slice(), -5),
283-
(b"EDT".as_slice(), -4),
284-
(b"CST".as_slice(), -6),
285-
(b"CDT".as_slice(), -5),
286-
(b"MST".as_slice(), -7),
287-
(b"MDT".as_slice(), -6),
288-
(b"PST".as_slice(), -8),
289-
(b"PDT".as_slice(), -7),
290-
],
291-
false,
292-
)(input)
293-
.or_else(|| match input {
294-
[
295-
b'a'..=b'i' | b'k'..=b'z' | b'A'..=b'I' | b'K'..=b'Z',
296-
rest @ ..,
297-
] => Some(ParsedItem(rest, 0)),
298-
_ => None,
299-
});
300-
if let Some(zone_literal) = zone_literal {
263+
if let Some(zone_literal) = zone_literal(input) {
301264
let input = zone_literal
302265
.consume_value(|value| parsed.set_offset_hour(value))
303266
.ok_or(InvalidComponent("offset hour"))?;
@@ -333,53 +296,40 @@ impl sealed::Sealed for Rfc2822 {
333296

334297
fn parse_offset_date_time(&self, input: &[u8]) -> Result<OffsetDateTime, error::Parse> {
335298
use crate::error::ParseFromDescription::{InvalidComponent, InvalidLiteral};
336-
use crate::parsing::combinator::rfc::rfc2822::{cfws, fws};
337-
use crate::parsing::combinator::{ExactlyNDigits, ascii_char, first_match, opt, sign};
299+
use crate::format_description::modifier;
300+
use crate::parsing::combinator::rfc::rfc2822::{cfws, fws, zone_literal};
301+
use crate::parsing::combinator::{ExactlyNDigits, ascii_char, opt, sign};
302+
use crate::parsing::component;
338303

339304
let colon = ascii_char::<b':'>;
340305
let comma = ascii_char::<b','>;
341306

342307
let input = opt(cfws)(input).into_inner();
343-
// This parses the weekday, but we don't actually use the value anywhere. Because of this,
344-
// just return `()` to avoid unnecessary generated code.
345-
let weekday = first_match(
346-
[
347-
(b"Mon".as_slice(), ()),
348-
(b"Tue".as_slice(), ()),
349-
(b"Wed".as_slice(), ()),
350-
(b"Thu".as_slice(), ()),
351-
(b"Fri".as_slice(), ()),
352-
(b"Sat".as_slice(), ()),
353-
(b"Sun".as_slice(), ()),
354-
],
355-
false,
356-
)(input);
308+
let weekday = component::parse_weekday(
309+
input,
310+
modifier::Weekday {
311+
repr: modifier::WeekdayRepr::Short,
312+
one_indexed: false,
313+
case_sensitive: false,
314+
},
315+
);
357316
let input = if let Some(item) = weekday {
358-
let input = item.into_inner();
317+
let input = item.discard_value();
359318
let input = comma(input).ok_or(InvalidLiteral)?.into_inner();
360319
opt(cfws)(input).into_inner()
361320
} else {
362321
input
363322
};
364323
let ParsedItem(input, day) = one_or_two_digits(input).ok_or(InvalidComponent("day"))?;
365324
let input = cfws(input).ok_or(InvalidLiteral)?.into_inner();
366-
let ParsedItem(input, month) = first_match(
367-
[
368-
(b"Jan".as_slice(), Month::January),
369-
(b"Feb".as_slice(), Month::February),
370-
(b"Mar".as_slice(), Month::March),
371-
(b"Apr".as_slice(), Month::April),
372-
(b"May".as_slice(), Month::May),
373-
(b"Jun".as_slice(), Month::June),
374-
(b"Jul".as_slice(), Month::July),
375-
(b"Aug".as_slice(), Month::August),
376-
(b"Sep".as_slice(), Month::September),
377-
(b"Oct".as_slice(), Month::October),
378-
(b"Nov".as_slice(), Month::November),
379-
(b"Dec".as_slice(), Month::December),
380-
],
381-
false,
382-
)(input)
325+
let ParsedItem(input, month) = component::parse_month(
326+
input,
327+
modifier::Month {
328+
padding: modifier::Padding::None,
329+
repr: modifier::MonthRepr::Short,
330+
case_sensitive: false,
331+
},
332+
)
383333
.ok_or(InvalidComponent("month"))?;
384334
let input = cfws(input).ok_or(InvalidLiteral)?.into_inner();
385335
let (input, year) = match ExactlyNDigits::<4>::parse(input) {
@@ -421,34 +371,7 @@ impl sealed::Sealed for Rfc2822 {
421371
(cfws(input).ok_or(InvalidLiteral)?.into_inner(), 0)
422372
};
423373

424-
#[expect(
425-
clippy::unnecessary_lazy_evaluations,
426-
reason = "rust-lang/rust-clippy#8522"
427-
)]
428-
let zone_literal = first_match(
429-
[
430-
(b"UT".as_slice(), 0),
431-
(b"GMT".as_slice(), 0),
432-
(b"EST".as_slice(), -5),
433-
(b"EDT".as_slice(), -4),
434-
(b"CST".as_slice(), -6),
435-
(b"CDT".as_slice(), -5),
436-
(b"MST".as_slice(), -7),
437-
(b"MDT".as_slice(), -6),
438-
(b"PST".as_slice(), -8),
439-
(b"PDT".as_slice(), -7),
440-
],
441-
false,
442-
)(input)
443-
.or_else(|| match input {
444-
[
445-
b'a'..=b'i' | b'k'..=b'z' | b'A'..=b'I' | b'K'..=b'Z',
446-
rest @ ..,
447-
] => Some(ParsedItem(rest, 0)),
448-
_ => None,
449-
});
450-
451-
let (input, offset_hour, offset_minute) = if let Some(zone_literal) = zone_literal {
374+
let (input, offset_hour, offset_minute) = if let Some(zone_literal) = zone_literal(input) {
452375
let ParsedItem(input, offset_hour) = zone_literal;
453376
(input, offset_hour, 0)
454377
} else {

0 commit comments

Comments
 (0)