use super::*; impl From for F8 { fn from(v: u8) -> Self { if v == 0 { return Self(0); } let e = v.ilog2() as u8; let off = e as i8 - M_BITS as i8; let m = if off >= 0 { v >> off } else { v << -off }; if e > E_MAX { return Self(0xff); } Self::merge(m & M_STORAGE_MAX, e + E_BIAS) } } impl From for u8 { fn from(value: F8) -> Self { if value.0 == 0 { return 0; } let (m, e) = value.split_unbias(); match e { 0.. => m << e, -7..0 => m >> -e, ..-7 => 0, } } } impl From for f32 { fn from(value: F8) -> Self { if value.0 == 0 { return 0.0; } let (m, e) = value.split_unbias(); (m as f32) * (e as f32).exp2() } } impl F8 { /// Split self into the mantissa and exponent, as stored. pub(crate) const fn split(self) -> (u8, u8) { (self.0 & M_MASK, self.0 >> M_BITS) } /// Split self into integers (m, e) such that `self == m * 2.pow(e)`. pub(crate) const fn split_unbias(self) -> (u8, i8) { let (m, e) = self.split(); (m | M_BIAS, e as i8 - (E_BIAS + M_BITS) as i8) } pub(crate) const fn merge(m: u8, e: u8) -> Self { assert!(m <= M_STORAGE_MAX); assert!(e <= E_STORAGE_MAX); Self((e << M_BITS) | m) } pub(crate) const fn merge_unbias(in_m: u32, in_e: i32) -> Self { if in_m == 0 { return Self(0); } let base_e = in_m.ilog2() as u8; let off = base_e as i8 - M_BITS as i8; let m = if off >= 0 { in_m >> off } else { in_m << -off }; let e = (base_e as i32) + in_e + (E_BIAS as i32); if e < 0 { return Self(0); } if e > E_STORAGE_MAX as i32 { return Self(0xff); } Self::merge(m as u8 & M_STORAGE_MAX, e as u8) } /// Reinterpret `bits` as an [`F8`]. Equivalent to [`std::mem::transmute`] but safe. pub const fn from_bits(bits: u8) -> Self { Self(bits) } /// Reinterpret `self` as an [`u8`]. Equivalent to [`std::mem::transmute`] but safe. pub const fn to_bits(self) -> u8 { self.0 } /// Calculate `base * 2.pow(scale)`, preserving as much precision as possible. pub fn ldexp(base: u32, scale: i32) -> Self { Self::merge_unbias(base, scale) } /// Split `self` into `(base, scale)` such that `self = base * 2.pow(scale)`. /// /// Guarantees: /// * `ldexp(val.frexp()) == val`. /// * `base` will have at most [M_BITS]+1 low-order bits set. /// * `scale.abs()` will have at most [E_BITS]+1 low-order bits set. /// /// # Example /// ``` /// # use f8::F8; /// # let val = F8::from_bits(42); /// let (base, scale) = val.frexp(); /// let val2 = F8::ldexp(base, scale); /// assert_eq!(val, val2); /// ``` pub fn frexp(self) -> (u32, i32) { let (base, scale) = self.split_unbias(); (base.into(), scale.into()) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_int_conv() { assert_eq!(u8::from(F8(0)), 0); assert_eq!(u8::from(F8::merge(0, 1)), 0); assert_eq!(u8::from(F8::merge(0, E_BIAS - 1)), 0); assert_eq!(u8::from(F8::merge(0, E_BIAS)), 1); assert_eq!(u8::from(F8::merge(0, E_BIAS + 1)), 2); assert_eq!(u8::from(F8::merge(0, E_STORAGE_MAX)), 1 << E_MAX); for k in 0..=EXACT_INT_MAX { assert_eq!(u8::from(F8::from(k)), k); } } #[test] fn test_float_conv() { assert_eq!(f32::from(F8(0)), 0.0); assert_eq!(f32::from(F8::merge(0, E_BIAS)), 1.0); assert_eq!(f32::from(F8::merge(0, E_BIAS - 1)), 0.5); assert_eq!(f32::from(F8::merge(0, E_BIAS + 1)), 2.0); assert_eq!(f32::from(F8::merge(1 << (M_BITS - 1), E_BIAS)), 1.5); assert_eq!(f32::from(F8::merge(1 << (M_BITS - 1), E_BIAS - 1)), 0.75); assert_eq!( f32::from(F8::merge(0, E_STORAGE_MAX)), (E_MAX as f32).exp2() ); for k in 0..=EXACT_INT_MAX { assert_eq!(f32::from(F8::from(k)), k as f32); } } #[test] fn test_merge() { assert_eq!(f32::from(F8::merge_unbias(0, 0)), 0.0); assert_eq!(f32::from(F8::merge_unbias(1, 0)), 1.0); assert_eq!(f32::from(F8::merge_unbias(1, 1)), 2.0); assert_eq!(f32::from(F8::merge_unbias(1, -1)), 0.5); assert_eq!(f32::from(F8::merge_unbias(3, 0)), 3.0); assert_eq!(f32::from(F8::merge_unbias(3, 1)), 6.0); assert_eq!(f32::from(F8::merge_unbias(3, -1)), 1.5); assert_eq!( f32::from(F8::merge_unbias(EXACT_INT_MAX.into(), 0)), EXACT_INT_MAX as f32 ); } }