f8/src/conv.rs

use super::*;

impl From<u8> for F8 {
	fn from(v: u8) -> Self {
		if v == 0 {
			return Self(0);
		}
		let e = v.ilog2() as u8;
		let off = e as i8 - M_BITS as i8;
		let m = if off >= 0 { v >> off } else { v << -off };
		if e > E_MAX {
			return Self(0xff);
		}
		Self::merge(m & M_STORAGE_MAX, e + E_BIAS)
	}
}

impl From<F8> for u8 {
	fn from(value: F8) -> Self {
		if value.0 == 0 {
			return 0;
		}
		let (m, e) = value.split_unbias();
		match e {
			0.. => m << e,
			-7..0 => m >> -e,
			..-7 => 0,
		}
	}
}

impl From<F8> for f32 {
	fn from(value: F8) -> Self {
		if value.0 == 0 {
			return 0.0;
		}
		let (m, e) = value.split_unbias();
		(m as f32) * (e as f32).exp2()
	}
}

impl F8 {
	/// Split self into the mantissa and exponent, as stored.
	pub(crate) const fn split(self) -> (u8, u8) {
		(self.0 & M_MASK, self.0 >> M_BITS)
	}

	/// Split self into integers (m, e) such that `self == m * 2.pow(e)`.
	pub(crate) const fn split_unbias(self) -> (u8, i8) {
		let (m, e) = self.split();
		(m | M_BIAS, e as i8 - (E_BIAS + M_BITS) as i8)
	}

	pub(crate) const fn merge(m: u8, e: u8) -> Self {
		assert!(m <= M_STORAGE_MAX);
		assert!(e <= E_STORAGE_MAX);
		Self((e << M_BITS) | m)
	}

	pub(crate) const fn merge_unbias(in_m: u32, in_e: i32) -> Self {
		if in_m == 0 {
			return Self(0);
		}
		let base_e = in_m.ilog2() as u8;
		let off = base_e as i8 - M_BITS as i8;
		let m = if off >= 0 { in_m >> off } else { in_m << -off };
		let e = (base_e as i32) + in_e + (E_BIAS as i32);
		if e < 0 {
			return Self(0);
		}
		if e > E_STORAGE_MAX as i32 {
			return Self(0xff);
		}
		Self::merge(m as u8 & M_STORAGE_MAX, e as u8)
	}

	/// Reinterpret `bits` as an [`F8`]. Equivalent to [`std::mem::transmute`] but safe.
	pub const fn from_bits(bits: u8) -> Self {
		Self(bits)
	}

	/// Reinterpret `self` as an [`u8`]. Equivalent to [`std::mem::transmute`] but safe.
	pub const fn to_bits(self) -> u8 {
		self.0
	}

	/// Calculate `base * 2.pow(scale)`, preserving as much precision as possible.
	pub fn ldexp(base: u32, scale: i32) -> Self {
		Self::merge_unbias(base, scale)
	}

	/// Split `self` into `(base, scale)` such that `self = base * 2.pow(scale)`.
	///
	/// Guarantees:
	/// * `ldexp(val.frexp()) == val`.
	/// * `base` will have at most [M_BITS]+1 low-order bits set.
	/// * `scale.abs()` will have at most [E_BITS]+1 low-order bits set.
	///
	/// # Example
	/// ```
	/// # use f8::F8;
	/// # let val = F8::from_bits(42);
	/// let (base, scale) = val.frexp();
	/// let val2 = F8::ldexp(base, scale);
	/// assert_eq!(val, val2);
	/// ```
	pub fn frexp(self) -> (u32, i32) {
		let (base, scale) = self.split_unbias();
		(base.into(), scale.into())
	}
}

#[cfg(test)]
mod tests {
	use super::*;

	#[test]
	fn test_int_conv() {
		assert_eq!(u8::from(F8(0)), 0);
		assert_eq!(u8::from(F8::merge(0, 1)), 0);
		assert_eq!(u8::from(F8::merge(0, E_BIAS - 1)), 0);
		assert_eq!(u8::from(F8::merge(0, E_BIAS)), 1);
		assert_eq!(u8::from(F8::merge(0, E_BIAS + 1)), 2);
		assert_eq!(u8::from(F8::merge(0, E_STORAGE_MAX)), 1 << E_MAX);
		for k in 0..=EXACT_INT_MAX {
			assert_eq!(u8::from(F8::from(k)), k);
		}
	}

	#[test]
	fn test_float_conv() {
		assert_eq!(f32::from(F8(0)), 0.0);
		assert_eq!(f32::from(F8::merge(0, E_BIAS)), 1.0);
		assert_eq!(f32::from(F8::merge(0, E_BIAS - 1)), 0.5);
		assert_eq!(f32::from(F8::merge(0, E_BIAS + 1)), 2.0);
		assert_eq!(f32::from(F8::merge(1 << (M_BITS - 1), E_BIAS)), 1.5);
		assert_eq!(f32::from(F8::merge(1 << (M_BITS - 1), E_BIAS - 1)), 0.75);
		assert_eq!(
			f32::from(F8::merge(0, E_STORAGE_MAX)),
			(E_MAX as f32).exp2()
		);
		for k in 0..=EXACT_INT_MAX {
			assert_eq!(f32::from(F8::from(k)), k as f32);
		}
	}

	#[test]
	fn test_merge() {
		assert_eq!(f32::from(F8::merge_unbias(0, 0)), 0.0);
		assert_eq!(f32::from(F8::merge_unbias(1, 0)), 1.0);
		assert_eq!(f32::from(F8::merge_unbias(1, 1)), 2.0);
		assert_eq!(f32::from(F8::merge_unbias(1, -1)), 0.5);
		assert_eq!(f32::from(F8::merge_unbias(3, 0)), 3.0);
		assert_eq!(f32::from(F8::merge_unbias(3, 1)), 6.0);
		assert_eq!(f32::from(F8::merge_unbias(3, -1)), 1.5);
		assert_eq!(
			f32::from(F8::merge_unbias(EXACT_INT_MAX.into(), 0)),
			EXACT_INT_MAX as f32
		);
	}
}