From 9bdd9eb5442bbdeefa08ea60fd966c8f9a83c543 Mon Sep 17 00:00:00 2001 From: u <@> Date: Wed, 11 Mar 2026 06:19:08 +0200 Subject: drop regex and handroll parser the rustflags change, leaner parser, and dropping the regex crate yields a 4x binary size decrease, i can also see it runs around 2.5x faster --- src/dateparse.rs | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 18 ++++++------- src/solar.rs | 44 +++++++++++++++--------------- 3 files changed, 113 insertions(+), 30 deletions(-) create mode 100644 src/dateparse.rs (limited to 'src') diff --git a/src/dateparse.rs b/src/dateparse.rs new file mode 100644 index 0000000..ac5dab1 --- /dev/null +++ b/src/dateparse.rs @@ -0,0 +1,81 @@ +use std::simd::{ + cmp::{SimdPartialEq, SimdPartialOrd}, + simd_swizzle, u8x16, +}; + +#[must_use] +pub fn dateparse(s: &str) -> Option<(usize, usize, usize)> { + // inf and sup for indiv. values + const LO: u8x16 = u8x16::from_array([ + b'0', b'0', b'/', b'0', b'0', b'/', b'0', b'0', // + 0, 0, 0, 0, 0, 0, 0, 0, // load_or_default isnt const + ]); + const HI: u8x16 = u8x16::from_array([ + b'1', b'9', b'/', b'3', b'9', b'/', b'9', b'9', // + 0xFF, 0xFF, 0xFF, 0xFF, // + 0xFF, 0xFF, 0xFF, 0xFF, // + ]); + const INTMSK: u8x16 = u8x16::from_array([b'0'; 16]); + const SLHMSK: u8x16 = u8x16::from_array([b'/'; 16]); + const NUMMSK: u8x16 = u8x16::from_array([ + 10, 1, 0, // [m_10, m_1] -> m_10 * 10 + m_1 (0 is slash) + 10, 1, 0, // dd .. + 10, 1, 0, // yy .. + 0, 0, 0, 0, 0, 0, 0, + ]); + let b = s.as_bytes(); + let v = u8x16::load_or_default(b); + let mut valid = true; + // index 16 for swizzles with INTMSK is b'0' + // index 15 is 0 + // this will compile to a shuffle, but a relaxed_laneselect would be cool + let mut v = match (b.len(), v.simd_eq(SLHMSK).to_bitmask()) { + // m/d/yy + (6, 0b1010) => simd_swizzle!( + v, + INTMSK, + [16, 0, 1, 16, 2, 3, 4, 5, 15, 15, 15, 15, 15, 15, 15, 15] + ), + // m/dd/yy + (7, 0b10010) => simd_swizzle!( + v, + INTMSK, + [16, 0, 1, 2, 3, 4, 5, 6, 15, 15, 15, 15, 15, 15, 15, 15] + ), + // mm/d/yy + (7, 0b10100) => simd_swizzle!( + v, + INTMSK, + [0, 1, 2, 16, 3, 4, 5, 6, 15, 15, 15, 15, 15, 15, 15, 15] + ), + // mm/dd/yy + (8, 0b100100) => v, + _ => { + valid = false; + v + } + }; + + // est-ce numerique? + valid &= (v.simd_ge(LO) & v.simd_le(HI)).all(); + + // parse + // '0' '3' '/' '3' '1' '/' '2' '6' + // - '0' '0' '0' '0' '0' '0' '0' '0' + // --------------------------------- + // 0 3 . 3 1 . 2 6 + // * 10 1 0 10 1 0 10 1 + // --------------------------------- + // 0 3 0 30 1 0 20 6 + // + 3 0 30 1 0 20 6<<<< + // --------------------------------- + // 3 . . 31 . . 26 + v -= INTMSK; + v *= NUMMSK; + v += v.rotate_elements_left::<1>(); + let n = v.to_array(); + let (m, d, y) = (n[0], n[3], n[6]); + + (valid & (1..=12).contains(&m) & (1..=31).contains(&d)) + .then(|| (m.into(), d.into(), y.into())) +} diff --git a/src/lib.rs b/src/lib.rs index 6ac8784..52169b4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,19 @@ #![feature(negative_impls)] #![feature(freeze)] #![feature(freeze_impls)] +#![feature(portable_simd)] #![allow(internal_features)] #![allow(mutable_transmutes)] use maud::{html, Markup, PreEscaped, DOCTYPE}; -use regex::{Match, Regex}; use std::mem::{take, transmute}; use worker::*; +pub mod dateparse; pub mod solar; -use crate::solar::{dow, solar}; +use crate::{ + dateparse::dateparse, + solar::{dow, solar}, +}; async fn text(r: &mut Response) -> Result { match r.body() { @@ -33,9 +37,6 @@ async fn fetch(req: Request, env: Env, _ctx: Context) -> Result { let a = ctx.env.assets("ASSETS")?; let mut a = a.fetch(u.join("_/diary")?, None).await?; let a = text(&mut a).await?; - let re = Regex::new( - r"(0[1-9]|[1-9]|1[0-2])\/(0[1-9]|[1-9]|1\d|2\d|3[01])\/(\d{2})$", - ).unwrap(); bone("shitpit", "look in the sky! it's a bird! it's a plane! no it's superego!", html! { p { "The numbering system for people (#1, #2, ...) is arbitrary and " @@ -51,12 +52,11 @@ async fn fetch(req: Request, env: Env, _ctx: Context) -> Result { } pre { @for x in a.split('\n').map(|l| { - re.captures(l) + dateparse(l) .map_or_else(|| html! { (PreEscaped(l)) "\n" }, |c| { - let (m, d, y) = (c.get(1), c.get(2), c.get(3)); - let f = |x: Option| -> usize { x.unwrap().as_str().parse().unwrap() }; - let (y, m, d) = (f(y) + 2000, f(m), f(d)); + let (m, d, y) = c; + let y = y + 2000; let s = solar(y, m, d); let (dowc, dowl) = dow(y, m, d); let (y, m, d, t) = (s.year, s.month, s.day, s.term); diff --git a/src/solar.rs b/src/solar.rs index 4bff33a..1266a23 100644 --- a/src/solar.rs +++ b/src/solar.rs @@ -8,7 +8,7 @@ use num::cast; use std::{ marker::Freeze, mem::{transmute, MaybeUninit}, - ops::{Div, Rem}, + ops::{Div, Index, IndexMut, Rem}, sync::LazyLock, }; @@ -192,34 +192,40 @@ where align(6 * g - 5 * z, 60, 0) } +// need to do this because +// 1. unsafecell doesnt implement Sync +// 2. mutating a slice directly will put it in .rodata for normal targets +// where i test this on struct Cell { val: T, } -impl Cell { - pub fn get(&self) -> &mut T { - unsafe { transmute(&self.val) } - } -} +// SAFETY(lol): wasm isolates are single-core. unsafe impl Sync for Cell {} // technically i dont need this because wasm mutable globals are enabled, // and Freeze is only needed for targets with a separate .rodata // i dont really care either way but this is how UnsafeCell does it +// // lang = "unsafe_cell" (apparently an extremely integral part of rust) // does additional stuff but this is sufficient for the compiler to not -// put it on .rodata for normal targets +// put it in .rodata for normal targets impl !Freeze for Cell {} +impl + Index> Cell { + pub fn get(&self, i: usize) -> &mut T::Output { + // SAFETY: see above + let a: &mut T = unsafe { transmute(&self.val) }; + &mut a[i] + } +} fn stday(i: usize, y: usize) -> f64 { const YEARS: usize = 200; const ARRLEN: usize = YEARS * TERMS.len(); - static STDAYS: Cell<[f64; ARRLEN]> = Cell { - val: [0.; ARRLEN], - }; // rust doesnt have (*a)[n], | @ least ill have2 use a cr8 4 it // also lazy_static doesnt work with mutables (im not using mutex) - assert!((UNIT_YR..UNIT_YR + YEARS).contains(&y)); + static STDAYS: Cell<[f64; ARRLEN]> = Cell { val: [0.; ARRLEN] }; let idx = y - UNIT_YR; - let ret = &mut STDAYS.get()[idx * TERMS.len() + i]; + assert!((0..YEARS).contains(&idx)); + let ret = STDAYS.get(idx * TERMS.len() + i); (if int(*ret) != 0 { *ret } else { @@ -239,15 +245,10 @@ pub fn ganzhi(i: usize) -> &'static str { '子', '丑', '寅', '卯', '辰', '巳', '午', '未', '申', '酉', '戌', '亥', ]; - let mut tmp: Vec = Vec::with_capacity(60); - (0..60).for_each(|i| { - // looks so much worse than using format!() - // cant be bothered to benchmark - let mut s = gan[i % 10].to_string(); - s.push(zhi[i % 12]); - tmp.push(s); - }); - tmp + (0..60).fold(Vec::with_capacity(60), |mut v, i| { + v.push([gan[i % 10], zhi[i % 12]].iter().collect()); + v + }) }); &GANZHIS[i] } @@ -271,6 +272,7 @@ pub fn solar(y: usize, m: usize, d: usize) -> SexagenaryDate { let div = int(a.div(15.).floor()); let mut dz = align(div.div_ceil(2), 12, 9); let mut termb = rem > 14.; + // SAFETY: guarded by termb let mut term: usize = unsafe { MaybeUninit::uninit().assume_init() }; if termb { term = align(div, 24, 18); -- cgit v1.2.3