Compare commits

...

3 Commits

Author SHA1 Message Date
3b126e7fa6 document type properties 2026-02-17 05:33:53 +03:00
40ba11f33c add basic constants 2026-02-17 05:33:53 +03:00
f0468502d8 add readme 2026-02-17 05:33:53 +03:00
2 changed files with 26 additions and 2 deletions

9
README.md Normal file
View File

@ -0,0 +1,9 @@
# F8: 8-bit floats
> ⚠️ Not for production use!
F8 is a toy software floating-point math library.
It provides an 8-bit floating point type `F8`, with 5 mantissa bits, 3 exponent bits, and no sign bit.
The format used resembles [IEEE 754] binary formats but stripped down to the bare necessities: the only special value supported is zero.
[IEEE 754]: https://en.wikipedia.org/wiki/IEEE_754

View File

@ -31,7 +31,22 @@ const E_BIAS: u8 = E_STORAGE_MAX - E_MAX;
const M_MASK: u8 = M_STORAGE_MAX;
const E_MASK: u8 = E_STORAGE_MAX << M_BITS;
/// 8-bit unsigned binary floating-point type, with [`M_BITS`] mantissa bits and [`E_BITS`] exponent bits.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
/// 8-bit unsigned binary floating-point type.
///
/// # Properties
///
/// * Mantissa width: 5 bits ([`M_BITS`])
/// * Exponent width: 3 bits ([`E_BITS`])
/// * Negative values: not supported
/// * Zero: special-cased
/// * Subnormals: not supported
/// * Infinity: not supported
/// * NaN: not supported
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
#[repr(transparent)]
pub struct F8(u8);
impl F8 {
pub const ZERO: Self = Self(0);
pub const ONE: Self = Self::merge(0, E_BIAS);
}