Add public API and docs #1
|
|
@ -74,10 +74,12 @@ impl F8 {
|
||||||
Self::merge(m as u8 & M_STORAGE_MAX, e as u8)
|
Self::merge(m as u8 & M_STORAGE_MAX, e as u8)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reinterpret `bits` as an [`F8`]. Equivalent to [`std::mem::transmute`] but safe.
|
||||||
pub const fn from_bits(bits: u8) -> Self {
|
pub const fn from_bits(bits: u8) -> Self {
|
||||||
Self(bits)
|
Self(bits)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reinterpret `self` as an [`u8`]. Equivalent to [`std::mem::transmute`] but safe.
|
||||||
pub const fn to_bits(self) -> u8 {
|
pub const fn to_bits(self) -> u8 {
|
||||||
self.0
|
self.0
|
||||||
}
|
}
|
||||||
|
|
@ -89,6 +91,12 @@ impl F8 {
|
||||||
|
|
||||||
/// Split `self` into `(base, scale)` such that `self = base * 2.pow(scale)`.
|
/// Split `self` into `(base, scale)` such that `self = base * 2.pow(scale)`.
|
||||||
///
|
///
|
||||||
|
/// Guarantees:
|
||||||
|
/// * `ldexp(val.frexp()) == val`.
|
||||||
|
/// * `base` will have at most [M_BITS]+1 low-order bits set.
|
||||||
|
/// * `scale.abs()` will have at most [E_BITS]+1 low-order bits set.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
/// ```
|
/// ```
|
||||||
/// # use f8::F8;
|
/// # use f8::F8;
|
||||||
/// # let val = F8::from_bits(42);
|
/// # let val = F8::from_bits(42);
|
||||||
|
|
|
||||||
11
src/lib.rs
11
src/lib.rs
|
|
@ -1,14 +1,24 @@
|
||||||
|
//! Software 8-bit floating-point math library. Not for production use.
|
||||||
|
|
||||||
mod conv;
|
mod conv;
|
||||||
mod fmt;
|
mod fmt;
|
||||||
mod ops;
|
mod ops;
|
||||||
|
|
||||||
|
/// Mantissa width of [`F8`].
|
||||||
pub const M_BITS: u8 = 5;
|
pub const M_BITS: u8 = 5;
|
||||||
|
|
||||||
|
/// Exponent width of [`F8`].
|
||||||
pub const E_BITS: u8 = 3;
|
pub const E_BITS: u8 = 3;
|
||||||
|
|
||||||
|
/// The value such that `2.pow(E_CAP)` is just over the [`F8`] limit.
|
||||||
const E_CAP: u8 = 4;
|
const E_CAP: u8 = 4;
|
||||||
|
|
||||||
|
/// Largest exponent value of [`F8`].
|
||||||
pub const E_MAX: u8 = E_CAP - 1;
|
pub const E_MAX: u8 = E_CAP - 1;
|
||||||
|
|
||||||
static_assertions::const_assert_eq!(M_BITS + E_BITS, 8);
|
static_assertions::const_assert_eq!(M_BITS + E_BITS, 8);
|
||||||
|
|
||||||
|
/// The largest integer up to and including which all integers are representable exactly.
|
||||||
pub const EXACT_INT_MAX: u8 = if E_MAX > M_BITS {
|
pub const EXACT_INT_MAX: u8 = if E_MAX > M_BITS {
|
||||||
2 << M_BITS
|
2 << M_BITS
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -21,6 +31,7 @@ const E_BIAS: u8 = E_STORAGE_MAX - E_MAX;
|
||||||
const M_MASK: u8 = M_STORAGE_MAX;
|
const M_MASK: u8 = M_STORAGE_MAX;
|
||||||
const E_MASK: u8 = E_STORAGE_MAX << M_BITS;
|
const E_MASK: u8 = E_STORAGE_MAX << M_BITS;
|
||||||
|
|
||||||
|
/// 8-bit unsigned binary floating-point type, with [`M_BITS`] mantissa bits and [`E_BITS`] exponent bits.
|
||||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct F8(u8);
|
pub struct F8(u8);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user