f8/src/conv.rs
2026-02-17 01:43:37 +03:00

162 lines
4.1 KiB
Rust

use super::*;
impl From<u8> for F8 {
fn from(v: u8) -> Self {
if v == 0 {
return Self(0);
}
let e = v.ilog2() as u8;
let off = e as i8 - M_BITS as i8;
let m = if off >= 0 { v >> off } else { v << -off };
if e > E_MAX {
return Self(0xff);
}
Self::merge(m & M_STORAGE_MAX, e + E_BIAS)
}
}
impl From<F8> for u8 {
fn from(value: F8) -> Self {
if value.0 == 0 {
return 0;
}
let (m, e) = value.split_unbias();
match e {
0.. => m << e,
-7..0 => m >> -e,
..-7 => 0,
}
}
}
impl From<F8> for f32 {
fn from(value: F8) -> Self {
if value.0 == 0 {
return 0.0;
}
let (m, e) = value.split_unbias();
(m as f32) * (e as f32).exp2()
}
}
impl F8 {
/// Split self into the mantissa and exponent, as stored.
pub(crate) const fn split(self) -> (u8, u8) {
(self.0 & M_MASK, self.0 >> M_BITS)
}
/// Split self into integers (m, e) such that `self == m * 2.pow(e)`.
pub(crate) const fn split_unbias(self) -> (u8, i8) {
let (m, e) = self.split();
(m | M_BIAS, e as i8 - (E_BIAS + M_BITS) as i8)
}
pub(crate) const fn merge(m: u8, e: u8) -> Self {
assert!(m <= M_STORAGE_MAX);
assert!(e <= E_STORAGE_MAX);
Self((e << M_BITS) | m)
}
pub(crate) const fn merge_unbias(in_m: u32, in_e: i32) -> Self {
if in_m == 0 {
return Self(0);
}
let base_e = in_m.ilog2() as u8;
let off = base_e as i8 - M_BITS as i8;
let m = if off >= 0 { in_m >> off } else { in_m << -off };
let e = (base_e as i32) + in_e + (E_BIAS as i32);
if e < 0 {
return Self(0);
}
if e > E_STORAGE_MAX as i32 {
return Self(0xff);
}
Self::merge(m as u8 & M_STORAGE_MAX, e as u8)
}
/// Reinterpret `bits` as an [`F8`]. Equivalent to [`std::mem::transmute`] but safe.
pub const fn from_bits(bits: u8) -> Self {
Self(bits)
}
/// Reinterpret `self` as an [`u8`]. Equivalent to [`std::mem::transmute`] but safe.
pub const fn to_bits(self) -> u8 {
self.0
}
/// Calculate `base * 2.pow(scale)`, preserving as much precision as possible.
pub fn ldexp(base: u32, scale: i32) -> Self {
Self::merge_unbias(base, scale)
}
/// Split `self` into `(base, scale)` such that `self = base * 2.pow(scale)`.
///
/// Guarantees:
/// * `ldexp(val.frexp()) == val`.
/// * `base` will have at most [M_BITS]+1 low-order bits set.
/// * `scale.abs()` will have at most [E_BITS]+1 low-order bits set.
///
/// # Example
/// ```
/// # use f8::F8;
/// # let val = F8::from_bits(42);
/// let (base, scale) = val.frexp();
/// let val2 = F8::ldexp(base, scale);
/// assert_eq!(val, val2);
/// ```
pub fn frexp(self) -> (u32, i32) {
let (base, scale) = self.split_unbias();
(base.into(), scale.into())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_int_conv() {
assert_eq!(u8::from(F8(0)), 0);
assert_eq!(u8::from(F8::merge(0, 1)), 0);
assert_eq!(u8::from(F8::merge(0, E_BIAS - 1)), 0);
assert_eq!(u8::from(F8::merge(0, E_BIAS)), 1);
assert_eq!(u8::from(F8::merge(0, E_BIAS + 1)), 2);
assert_eq!(u8::from(F8::merge(0, E_STORAGE_MAX)), 1 << E_MAX);
for k in 0..=EXACT_INT_MAX {
assert_eq!(u8::from(F8::from(k)), k);
}
}
#[test]
fn test_float_conv() {
assert_eq!(f32::from(F8(0)), 0.0);
assert_eq!(f32::from(F8::merge(0, E_BIAS)), 1.0);
assert_eq!(f32::from(F8::merge(0, E_BIAS - 1)), 0.5);
assert_eq!(f32::from(F8::merge(0, E_BIAS + 1)), 2.0);
assert_eq!(f32::from(F8::merge(1 << (M_BITS - 1), E_BIAS)), 1.5);
assert_eq!(f32::from(F8::merge(1 << (M_BITS - 1), E_BIAS - 1)), 0.75);
assert_eq!(
f32::from(F8::merge(0, E_STORAGE_MAX)),
(E_MAX as f32).exp2()
);
for k in 0..=EXACT_INT_MAX {
assert_eq!(f32::from(F8::from(k)), k as f32);
}
}
#[test]
fn test_merge() {
assert_eq!(f32::from(F8::merge_unbias(0, 0)), 0.0);
assert_eq!(f32::from(F8::merge_unbias(1, 0)), 1.0);
assert_eq!(f32::from(F8::merge_unbias(1, 1)), 2.0);
assert_eq!(f32::from(F8::merge_unbias(1, -1)), 0.5);
assert_eq!(f32::from(F8::merge_unbias(3, 0)), 3.0);
assert_eq!(f32::from(F8::merge_unbias(3, 1)), 6.0);
assert_eq!(f32::from(F8::merge_unbias(3, -1)), 1.5);
assert_eq!(
f32::from(F8::merge_unbias(EXACT_INT_MAX.into(), 0)),
EXACT_INT_MAX as f32
);
}
}