summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoru <@>2026-03-11 06:19:08 +0200
committeru <@>2026-03-11 08:24:39 +0200
commit9bdd9eb5442bbdeefa08ea60fd966c8f9a83c543 (patch)
tree31ca975794ad966935ac44fe8a59765e9df9b227 /src
parentbfc2cabc3b0d6af8c7ca0496b7bd815fdbb658f6 (diff)
drop regex and handroll parser
the rustflags change, leaner parser, and dropping the regex crate yields a 4x binary size decrease, i can also see it runs around 2.5x faster
Diffstat (limited to 'src')
-rw-r--r--src/dateparse.rs81
-rw-r--r--src/lib.rs18
-rw-r--r--src/solar.rs44
3 files changed, 113 insertions, 30 deletions
diff --git a/src/dateparse.rs b/src/dateparse.rs
new file mode 100644
index 0000000..ac5dab1
--- /dev/null
+++ b/src/dateparse.rs
@@ -0,0 +1,81 @@
+use std::simd::{
+ cmp::{SimdPartialEq, SimdPartialOrd},
+ simd_swizzle, u8x16,
+};
+
+#[must_use]
+pub fn dateparse(s: &str) -> Option<(usize, usize, usize)> {
+ // inf and sup for indiv. values
+ const LO: u8x16 = u8x16::from_array([
+ b'0', b'0', b'/', b'0', b'0', b'/', b'0', b'0', //
+ 0, 0, 0, 0, 0, 0, 0, 0, // load_or_default isnt const
+ ]);
+ const HI: u8x16 = u8x16::from_array([
+ b'1', b'9', b'/', b'3', b'9', b'/', b'9', b'9', //
+ 0xFF, 0xFF, 0xFF, 0xFF, //
+ 0xFF, 0xFF, 0xFF, 0xFF, //
+ ]);
+ const INTMSK: u8x16 = u8x16::from_array([b'0'; 16]);
+ const SLHMSK: u8x16 = u8x16::from_array([b'/'; 16]);
+ const NUMMSK: u8x16 = u8x16::from_array([
+ 10, 1, 0, // [m_10, m_1] -> m_10 * 10 + m_1 (0 is slash)
+ 10, 1, 0, // dd ..
+ 10, 1, 0, // yy ..
+ 0, 0, 0, 0, 0, 0, 0,
+ ]);
+ let b = s.as_bytes();
+ let v = u8x16::load_or_default(b);
+ let mut valid = true;
+ // index 16 for swizzles with INTMSK is b'0'
+ // index 15 is 0
+ // this will compile to a shuffle, but a relaxed_laneselect would be cool
+ let mut v = match (b.len(), v.simd_eq(SLHMSK).to_bitmask()) {
+ // m/d/yy
+ (6, 0b1010) => simd_swizzle!(
+ v,
+ INTMSK,
+ [16, 0, 1, 16, 2, 3, 4, 5, 15, 15, 15, 15, 15, 15, 15, 15]
+ ),
+ // m/dd/yy
+ (7, 0b10010) => simd_swizzle!(
+ v,
+ INTMSK,
+ [16, 0, 1, 2, 3, 4, 5, 6, 15, 15, 15, 15, 15, 15, 15, 15]
+ ),
+ // mm/d/yy
+ (7, 0b10100) => simd_swizzle!(
+ v,
+ INTMSK,
+ [0, 1, 2, 16, 3, 4, 5, 6, 15, 15, 15, 15, 15, 15, 15, 15]
+ ),
+ // mm/dd/yy
+ (8, 0b100100) => v,
+ _ => {
+ valid = false;
+ v
+ }
+ };
+
+ // est-ce numerique?
+ valid &= (v.simd_ge(LO) & v.simd_le(HI)).all();
+
+ // parse
+ // '0' '3' '/' '3' '1' '/' '2' '6'
+ // - '0' '0' '0' '0' '0' '0' '0' '0'
+ // ---------------------------------
+ // 0 3 . 3 1 . 2 6
+ // * 10 1 0 10 1 0 10 1
+ // ---------------------------------
+ // 0 3 0 30 1 0 20 6
+ // + 3 0 30 1 0 20 6<<<<
+ // ---------------------------------
+ // 3 . . 31 . . 26
+ v -= INTMSK;
+ v *= NUMMSK;
+ v += v.rotate_elements_left::<1>();
+ let n = v.to_array();
+ let (m, d, y) = (n[0], n[3], n[6]);
+
+ (valid & (1..=12).contains(&m) & (1..=31).contains(&d))
+ .then(|| (m.into(), d.into(), y.into()))
+}
diff --git a/src/lib.rs b/src/lib.rs
index 6ac8784..52169b4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,15 +1,19 @@
#![feature(negative_impls)]
#![feature(freeze)]
#![feature(freeze_impls)]
+#![feature(portable_simd)]
#![allow(internal_features)]
#![allow(mutable_transmutes)]
use maud::{html, Markup, PreEscaped, DOCTYPE};
-use regex::{Match, Regex};
use std::mem::{take, transmute};
use worker::*;
+pub mod dateparse;
pub mod solar;
-use crate::solar::{dow, solar};
+use crate::{
+ dateparse::dateparse,
+ solar::{dow, solar},
+};
async fn text(r: &mut Response) -> Result<String> {
match r.body() {
@@ -33,9 +37,6 @@ async fn fetch(req: Request, env: Env, _ctx: Context) -> Result<Response> {
let a = ctx.env.assets("ASSETS")?;
let mut a = a.fetch(u.join("_/diary")?, None).await?;
let a = text(&mut a).await?;
- let re = Regex::new(
- r"(0[1-9]|[1-9]|1[0-2])\/(0[1-9]|[1-9]|1\d|2\d|3[01])\/(\d{2})$",
- ).unwrap();
bone("shitpit", "look in the sky! it's a bird! it's a plane! no it's superego!", html! {
p {
"The numbering system for people (#1, #2, ...) is arbitrary and "
@@ -51,12 +52,11 @@ async fn fetch(req: Request, env: Env, _ctx: Context) -> Result<Response> {
}
pre {
@for x in a.split('\n').map(|l| {
- re.captures(l)
+ dateparse(l)
.map_or_else(|| html! { (PreEscaped(l)) "\n" },
|c| {
- let (m, d, y) = (c.get(1), c.get(2), c.get(3));
- let f = |x: Option<Match>| -> usize { x.unwrap().as_str().parse().unwrap() };
- let (y, m, d) = (f(y) + 2000, f(m), f(d));
+ let (m, d, y) = c;
+ let y = y + 2000;
let s = solar(y, m, d);
let (dowc, dowl) = dow(y, m, d);
let (y, m, d, t) = (s.year, s.month, s.day, s.term);
diff --git a/src/solar.rs b/src/solar.rs
index 4bff33a..1266a23 100644
--- a/src/solar.rs
+++ b/src/solar.rs
@@ -8,7 +8,7 @@ use num::cast;
use std::{
marker::Freeze,
mem::{transmute, MaybeUninit},
- ops::{Div, Rem},
+ ops::{Div, Index, IndexMut, Rem},
sync::LazyLock,
};
@@ -192,34 +192,40 @@ where
align(6 * g - 5 * z, 60, 0)
}
+// need to do this because
+// 1. unsafecell doesnt implement Sync
+// 2. mutating a slice directly will put it in .rodata for normal targets
+// where i test this on
struct Cell<T: ?Sized> {
val: T,
}
-impl<T: ?Sized> Cell<T> {
- pub fn get(&self) -> &mut T {
- unsafe { transmute(&self.val) }
- }
-}
+// SAFETY(lol): wasm isolates are single-core.
unsafe impl<T: ?Sized + Send> Sync for Cell<T> {}
// technically i dont need this because wasm mutable globals are enabled,
// and Freeze is only needed for targets with a separate .rodata
// i dont really care either way but this is how UnsafeCell does it
+//
// lang = "unsafe_cell" (apparently an extremely integral part of rust)
// does additional stuff but this is sufficient for the compiler to not
-// put it on .rodata for normal targets
+// put it in .rodata for normal targets
impl<T: ?Sized> !Freeze for Cell<T> {}
+impl<T: ?Sized + IndexMut<usize> + Index<usize>> Cell<T> {
+ pub fn get(&self, i: usize) -> &mut T::Output {
+ // SAFETY: see above
+ let a: &mut T = unsafe { transmute(&self.val) };
+ &mut a[i]
+ }
+}
fn stday(i: usize, y: usize) -> f64 {
const YEARS: usize = 200;
const ARRLEN: usize = YEARS * TERMS.len();
- static STDAYS: Cell<[f64; ARRLEN]> = Cell {
- val: [0.; ARRLEN],
- };
// rust doesnt have (*a)[n], | @ least ill have2 use a cr8 4 it
// also lazy_static doesnt work with mutables (im not using mutex)
- assert!((UNIT_YR..UNIT_YR + YEARS).contains(&y));
+ static STDAYS: Cell<[f64; ARRLEN]> = Cell { val: [0.; ARRLEN] };
let idx = y - UNIT_YR;
- let ret = &mut STDAYS.get()[idx * TERMS.len() + i];
+ assert!((0..YEARS).contains(&idx));
+ let ret = STDAYS.get(idx * TERMS.len() + i);
(if int(*ret) != 0 {
*ret
} else {
@@ -239,15 +245,10 @@ pub fn ganzhi(i: usize) -> &'static str {
'子', '丑', '寅', '卯', '辰', '巳', '午', '未', '申', '酉', '戌',
'亥',
];
- let mut tmp: Vec<String> = Vec::with_capacity(60);
- (0..60).for_each(|i| {
- // looks so much worse than using format!()
- // cant be bothered to benchmark
- let mut s = gan[i % 10].to_string();
- s.push(zhi[i % 12]);
- tmp.push(s);
- });
- tmp
+ (0..60).fold(Vec::with_capacity(60), |mut v, i| {
+ v.push([gan[i % 10], zhi[i % 12]].iter().collect());
+ v
+ })
});
&GANZHIS[i]
}
@@ -271,6 +272,7 @@ pub fn solar(y: usize, m: usize, d: usize) -> SexagenaryDate {
let div = int(a.div(15.).floor());
let mut dz = align(div.div_ceil(2), 12, 9);
let mut termb = rem > 14.;
+ // SAFETY: guarded by termb
let mut term: usize = unsafe { MaybeUninit::uninit().assume_init() };
if termb {
term = align(div, 24, 18);