diff options
| author | u <@> | 2026-03-11 06:19:08 +0200 |
|---|---|---|
| committer | u <@> | 2026-03-11 08:24:39 +0200 |
| commit | 9bdd9eb5442bbdeefa08ea60fd966c8f9a83c543 (patch) | |
| tree | 31ca975794ad966935ac44fe8a59765e9df9b227 /src/dateparse.rs | |
| parent | bfc2cabc3b0d6af8c7ca0496b7bd815fdbb658f6 (diff) | |
drop regex and handroll parser
the rustflags change, leaner parser, and dropping the regex crate
yields a 4x binary size decrease, i can also see it runs around 2.5x
faster
Diffstat (limited to 'src/dateparse.rs')
| -rw-r--r-- | src/dateparse.rs | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/src/dateparse.rs b/src/dateparse.rs new file mode 100644 index 0000000..ac5dab1 --- /dev/null +++ b/src/dateparse.rs @@ -0,0 +1,81 @@ +use std::simd::{ + cmp::{SimdPartialEq, SimdPartialOrd}, + simd_swizzle, u8x16, +}; + +#[must_use] +pub fn dateparse(s: &str) -> Option<(usize, usize, usize)> { + // inf and sup for indiv. values + const LO: u8x16 = u8x16::from_array([ + b'0', b'0', b'/', b'0', b'0', b'/', b'0', b'0', // + 0, 0, 0, 0, 0, 0, 0, 0, // load_or_default isnt const + ]); + const HI: u8x16 = u8x16::from_array([ + b'1', b'9', b'/', b'3', b'9', b'/', b'9', b'9', // + 0xFF, 0xFF, 0xFF, 0xFF, // + 0xFF, 0xFF, 0xFF, 0xFF, // + ]); + const INTMSK: u8x16 = u8x16::from_array([b'0'; 16]); + const SLHMSK: u8x16 = u8x16::from_array([b'/'; 16]); + const NUMMSK: u8x16 = u8x16::from_array([ + 10, 1, 0, // [m_10, m_1] -> m_10 * 10 + m_1 (0 is slash) + 10, 1, 0, // dd .. + 10, 1, 0, // yy .. + 0, 0, 0, 0, 0, 0, 0, + ]); + let b = s.as_bytes(); + let v = u8x16::load_or_default(b); + let mut valid = true; + // index 16 for swizzles with INTMSK is b'0' + // index 15 is 0 + // this will compile to a shuffle, but a relaxed_laneselect would be cool + let mut v = match (b.len(), v.simd_eq(SLHMSK).to_bitmask()) { + // m/d/yy + (6, 0b1010) => simd_swizzle!( + v, + INTMSK, + [16, 0, 1, 16, 2, 3, 4, 5, 15, 15, 15, 15, 15, 15, 15, 15] + ), + // m/dd/yy + (7, 0b10010) => simd_swizzle!( + v, + INTMSK, + [16, 0, 1, 2, 3, 4, 5, 6, 15, 15, 15, 15, 15, 15, 15, 15] + ), + // mm/d/yy + (7, 0b10100) => simd_swizzle!( + v, + INTMSK, + [0, 1, 2, 16, 3, 4, 5, 6, 15, 15, 15, 15, 15, 15, 15, 15] + ), + // mm/dd/yy + (8, 0b100100) => v, + _ => { + valid = false; + v + } + }; + + // est-ce numerique? + valid &= (v.simd_ge(LO) & v.simd_le(HI)).all(); + + // parse + // '0' '3' '/' '3' '1' '/' '2' '6' + // - '0' '0' '0' '0' '0' '0' '0' '0' + // --------------------------------- + // 0 3 . 3 1 . 2 6 + // * 10 1 0 10 1 0 10 1 + // --------------------------------- + // 0 3 0 30 1 0 20 6 + // + 3 0 30 1 0 20 6<<<< + // --------------------------------- + // 3 . . 31 . . 26 + v -= INTMSK; + v *= NUMMSK; + v += v.rotate_elements_left::<1>(); + let n = v.to_array(); + let (m, d, y) = (n[0], n[3], n[6]); + + (valid & (1..=12).contains(&m) & (1..=31).contains(&d)) + .then(|| (m.into(), d.into(), y.into())) +} |
