summaryrefslogtreecommitdiff
path: root/src/dateparse.rs
diff options
context:
space:
mode:
authoru <@>2026-03-11 06:19:08 +0200
committeru <@>2026-03-11 08:24:39 +0200
commit9bdd9eb5442bbdeefa08ea60fd966c8f9a83c543 (patch)
tree31ca975794ad966935ac44fe8a59765e9df9b227 /src/dateparse.rs
parentbfc2cabc3b0d6af8c7ca0496b7bd815fdbb658f6 (diff)
drop regex and handroll parser
the rustflags change, leaner parser, and dropping the regex crate yields a 4x binary size decrease, i can also see it runs around 2.5x faster
Diffstat (limited to 'src/dateparse.rs')
-rw-r--r--src/dateparse.rs81
1 files changed, 81 insertions, 0 deletions
diff --git a/src/dateparse.rs b/src/dateparse.rs
new file mode 100644
index 0000000..ac5dab1
--- /dev/null
+++ b/src/dateparse.rs
@@ -0,0 +1,81 @@
+use std::simd::{
+ cmp::{SimdPartialEq, SimdPartialOrd},
+ simd_swizzle, u8x16,
+};
+
+#[must_use]
+pub fn dateparse(s: &str) -> Option<(usize, usize, usize)> {
+ // inf and sup for indiv. values
+ const LO: u8x16 = u8x16::from_array([
+ b'0', b'0', b'/', b'0', b'0', b'/', b'0', b'0', //
+ 0, 0, 0, 0, 0, 0, 0, 0, // load_or_default isnt const
+ ]);
+ const HI: u8x16 = u8x16::from_array([
+ b'1', b'9', b'/', b'3', b'9', b'/', b'9', b'9', //
+ 0xFF, 0xFF, 0xFF, 0xFF, //
+ 0xFF, 0xFF, 0xFF, 0xFF, //
+ ]);
+ const INTMSK: u8x16 = u8x16::from_array([b'0'; 16]);
+ const SLHMSK: u8x16 = u8x16::from_array([b'/'; 16]);
+ const NUMMSK: u8x16 = u8x16::from_array([
+ 10, 1, 0, // [m_10, m_1] -> m_10 * 10 + m_1 (0 is slash)
+ 10, 1, 0, // dd ..
+ 10, 1, 0, // yy ..
+ 0, 0, 0, 0, 0, 0, 0,
+ ]);
+ let b = s.as_bytes();
+ let v = u8x16::load_or_default(b);
+ let mut valid = true;
+ // index 16 for swizzles with INTMSK is b'0'
+ // index 15 is 0
+ // this will compile to a shuffle, but a relaxed_laneselect would be cool
+ let mut v = match (b.len(), v.simd_eq(SLHMSK).to_bitmask()) {
+ // m/d/yy
+ (6, 0b1010) => simd_swizzle!(
+ v,
+ INTMSK,
+ [16, 0, 1, 16, 2, 3, 4, 5, 15, 15, 15, 15, 15, 15, 15, 15]
+ ),
+ // m/dd/yy
+ (7, 0b10010) => simd_swizzle!(
+ v,
+ INTMSK,
+ [16, 0, 1, 2, 3, 4, 5, 6, 15, 15, 15, 15, 15, 15, 15, 15]
+ ),
+ // mm/d/yy
+ (7, 0b10100) => simd_swizzle!(
+ v,
+ INTMSK,
+ [0, 1, 2, 16, 3, 4, 5, 6, 15, 15, 15, 15, 15, 15, 15, 15]
+ ),
+ // mm/dd/yy
+ (8, 0b100100) => v,
+ _ => {
+ valid = false;
+ v
+ }
+ };
+
+ // est-ce numerique?
+ valid &= (v.simd_ge(LO) & v.simd_le(HI)).all();
+
+ // parse
+ // '0' '3' '/' '3' '1' '/' '2' '6'
+ // - '0' '0' '0' '0' '0' '0' '0' '0'
+ // ---------------------------------
+ // 0 3 . 3 1 . 2 6
+ // * 10 1 0 10 1 0 10 1
+ // ---------------------------------
+ // 0 3 0 30 1 0 20 6
+ // + 3 0 30 1 0 20 6<<<<
+ // ---------------------------------
+ // 3 . . 31 . . 26
+ v -= INTMSK;
+ v *= NUMMSK;
+ v += v.rotate_elements_left::<1>();
+ let n = v.to_array();
+ let (m, d, y) = (n[0], n[3], n[6]);
+
+ (valid & (1..=12).contains(&m) & (1..=31).contains(&d))
+ .then(|| (m.into(), d.into(), y.into()))
+}