162 lines
4.1 KiB
Rust
162 lines
4.1 KiB
Rust
use super::*;
|
|
|
|
impl From<u8> for F8 {
|
|
fn from(v: u8) -> Self {
|
|
if v == 0 {
|
|
return Self(0);
|
|
}
|
|
let e = v.ilog2() as u8;
|
|
let off = e as i8 - M_BITS as i8;
|
|
let m = if off >= 0 { v >> off } else { v << -off };
|
|
if e > E_MAX {
|
|
return Self(0xff);
|
|
}
|
|
Self::merge(m & M_STORAGE_MAX, e + E_BIAS)
|
|
}
|
|
}
|
|
|
|
impl From<F8> for u8 {
|
|
fn from(value: F8) -> Self {
|
|
if value.0 == 0 {
|
|
return 0;
|
|
}
|
|
let (m, e) = value.split_unbias();
|
|
match e {
|
|
0.. => m << e,
|
|
-7..0 => m >> -e,
|
|
..-7 => 0,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<F8> for f32 {
|
|
fn from(value: F8) -> Self {
|
|
if value.0 == 0 {
|
|
return 0.0;
|
|
}
|
|
let (m, e) = value.split_unbias();
|
|
(m as f32) * (e as f32).exp2()
|
|
}
|
|
}
|
|
|
|
impl F8 {
|
|
/// Split self into the mantissa and exponent, as stored.
|
|
pub(crate) const fn split(self) -> (u8, u8) {
|
|
(self.0 & M_MASK, self.0 >> M_BITS)
|
|
}
|
|
|
|
/// Split self into integers (m, e) such that `self == m * 2.pow(e)`.
|
|
pub(crate) const fn split_unbias(self) -> (u8, i8) {
|
|
let (m, e) = self.split();
|
|
(m | M_BIAS, e as i8 - (E_BIAS + M_BITS) as i8)
|
|
}
|
|
|
|
pub(crate) const fn merge(m: u8, e: u8) -> Self {
|
|
assert!(m <= M_STORAGE_MAX);
|
|
assert!(e <= E_STORAGE_MAX);
|
|
Self((e << M_BITS) | m)
|
|
}
|
|
|
|
pub(crate) const fn merge_unbias(in_m: u32, in_e: i32) -> Self {
|
|
if in_m == 0 {
|
|
return Self(0);
|
|
}
|
|
let base_e = in_m.ilog2() as u8;
|
|
let off = base_e as i8 - M_BITS as i8;
|
|
let m = if off >= 0 { in_m >> off } else { in_m << -off };
|
|
let e = (base_e as i32) + in_e + (E_BIAS as i32);
|
|
if e < 0 {
|
|
return Self(0);
|
|
}
|
|
if e > E_STORAGE_MAX as i32 {
|
|
return Self(0xff);
|
|
}
|
|
Self::merge(m as u8 & M_STORAGE_MAX, e as u8)
|
|
}
|
|
|
|
/// Reinterpret `bits` as an [`F8`]. Equivalent to [`std::mem::transmute`] but safe.
|
|
pub const fn from_bits(bits: u8) -> Self {
|
|
Self(bits)
|
|
}
|
|
|
|
/// Reinterpret `self` as an [`u8`]. Equivalent to [`std::mem::transmute`] but safe.
|
|
pub const fn to_bits(self) -> u8 {
|
|
self.0
|
|
}
|
|
|
|
/// Calculate `base * 2.pow(scale)`, preserving as much precision as possible.
|
|
pub fn ldexp(base: u32, scale: i32) -> Self {
|
|
Self::merge_unbias(base, scale)
|
|
}
|
|
|
|
/// Split `self` into `(base, scale)` such that `self = base * 2.pow(scale)`.
|
|
///
|
|
/// Guarantees:
|
|
/// * `ldexp(val.frexp()) == val`.
|
|
/// * `base` will have at most [M_BITS]+1 low-order bits set.
|
|
/// * `scale.abs()` will have at most [E_BITS]+1 low-order bits set.
|
|
///
|
|
/// # Example
|
|
/// ```
|
|
/// # use f8::F8;
|
|
/// # let val = F8::from_bits(42);
|
|
/// let (base, scale) = val.frexp();
|
|
/// let val2 = F8::ldexp(base, scale);
|
|
/// assert_eq!(val, val2);
|
|
/// ```
|
|
pub fn frexp(self) -> (u32, i32) {
|
|
let (base, scale) = self.split_unbias();
|
|
(base.into(), scale.into())
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_int_conv() {
|
|
assert_eq!(u8::from(F8(0)), 0);
|
|
assert_eq!(u8::from(F8::merge(0, 1)), 0);
|
|
assert_eq!(u8::from(F8::merge(0, E_BIAS - 1)), 0);
|
|
assert_eq!(u8::from(F8::merge(0, E_BIAS)), 1);
|
|
assert_eq!(u8::from(F8::merge(0, E_BIAS + 1)), 2);
|
|
assert_eq!(u8::from(F8::merge(0, E_STORAGE_MAX)), 1 << E_MAX);
|
|
for k in 0..=EXACT_INT_MAX {
|
|
assert_eq!(u8::from(F8::from(k)), k);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_float_conv() {
|
|
assert_eq!(f32::from(F8(0)), 0.0);
|
|
assert_eq!(f32::from(F8::merge(0, E_BIAS)), 1.0);
|
|
assert_eq!(f32::from(F8::merge(0, E_BIAS - 1)), 0.5);
|
|
assert_eq!(f32::from(F8::merge(0, E_BIAS + 1)), 2.0);
|
|
assert_eq!(f32::from(F8::merge(1 << (M_BITS - 1), E_BIAS)), 1.5);
|
|
assert_eq!(f32::from(F8::merge(1 << (M_BITS - 1), E_BIAS - 1)), 0.75);
|
|
assert_eq!(
|
|
f32::from(F8::merge(0, E_STORAGE_MAX)),
|
|
(E_MAX as f32).exp2()
|
|
);
|
|
for k in 0..=EXACT_INT_MAX {
|
|
assert_eq!(f32::from(F8::from(k)), k as f32);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_merge() {
|
|
assert_eq!(f32::from(F8::merge_unbias(0, 0)), 0.0);
|
|
assert_eq!(f32::from(F8::merge_unbias(1, 0)), 1.0);
|
|
assert_eq!(f32::from(F8::merge_unbias(1, 1)), 2.0);
|
|
assert_eq!(f32::from(F8::merge_unbias(1, -1)), 0.5);
|
|
assert_eq!(f32::from(F8::merge_unbias(3, 0)), 3.0);
|
|
assert_eq!(f32::from(F8::merge_unbias(3, 1)), 6.0);
|
|
assert_eq!(f32::from(F8::merge_unbias(3, -1)), 1.5);
|
|
assert_eq!(
|
|
f32::from(F8::merge_unbias(EXACT_INT_MAX.into(), 0)),
|
|
EXACT_INT_MAX as f32
|
|
);
|
|
}
|
|
}
|