Add public API and docs #1
9
README.md
Normal file
9
README.md
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
# F8: 8-bit floats
|
||||||
|
|
||||||
|
> ⚠️ Not for production use!
|
||||||
|
|
||||||
|
F8 is a toy software floating-point math library.
|
||||||
|
It provides an 8-bit floating point type `F8`, with 5 mantissa bits, 3 exponent bits, and no sign bit.
|
||||||
|
The format used resembles [IEEE 754] binary formats but stripped down to the bare necessities: the only special value supported is zero.
|
||||||
|
|
||||||
|
[IEEE 754]: https://en.wikipedia.org/wiki/IEEE_754
|
||||||
43
src/conv.rs
43
src/conv.rs
|
|
@ -41,23 +41,23 @@ impl From<F8> for f32 {
|
||||||
|
|
||||||
impl F8 {
|
impl F8 {
|
||||||
/// Split self into the mantissa and exponent, as stored.
|
/// Split self into the mantissa and exponent, as stored.
|
||||||
pub(crate) fn split(self) -> (u8, u8) {
|
pub(crate) const fn split(self) -> (u8, u8) {
|
||||||
(self.0 & M_MASK, self.0 >> M_BITS)
|
(self.0 & M_MASK, self.0 >> M_BITS)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Split self into integers (m, e) such that `self == m * 2.pow(e)`.
|
/// Split self into integers (m, e) such that `self == m * 2.pow(e)`.
|
||||||
pub(crate) fn split_unbias(self) -> (u8, i8) {
|
pub(crate) const fn split_unbias(self) -> (u8, i8) {
|
||||||
let (m, e) = self.split();
|
let (m, e) = self.split();
|
||||||
(m | M_BIAS, e as i8 - (E_BIAS + M_BITS) as i8)
|
(m | M_BIAS, e as i8 - (E_BIAS + M_BITS) as i8)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn merge(m: u8, e: u8) -> Self {
|
pub(crate) const fn merge(m: u8, e: u8) -> Self {
|
||||||
assert!(m <= M_STORAGE_MAX);
|
assert!(m <= M_STORAGE_MAX);
|
||||||
assert!(e <= E_STORAGE_MAX);
|
assert!(e <= E_STORAGE_MAX);
|
||||||
Self((e << M_BITS) | m)
|
Self((e << M_BITS) | m)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn merge_unbias(in_m: u32, in_e: i32) -> Self {
|
pub(crate) const fn merge_unbias(in_m: u32, in_e: i32) -> Self {
|
||||||
if in_m == 0 {
|
if in_m == 0 {
|
||||||
return Self(0);
|
return Self(0);
|
||||||
}
|
}
|
||||||
|
|
@ -73,6 +73,41 @@ impl F8 {
|
||||||
}
|
}
|
||||||
Self::merge(m as u8 & M_STORAGE_MAX, e as u8)
|
Self::merge(m as u8 & M_STORAGE_MAX, e as u8)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reinterpret `bits` as an [`F8`]. Equivalent to [`std::mem::transmute`] but safe.
|
||||||
|
pub const fn from_bits(bits: u8) -> Self {
|
||||||
|
Self(bits)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reinterpret `self` as an [`u8`]. Equivalent to [`std::mem::transmute`] but safe.
|
||||||
|
pub const fn to_bits(self) -> u8 {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate `base * 2.pow(scale)`, preserving as much precision as possible.
|
||||||
|
pub fn ldexp(base: u32, scale: i32) -> Self {
|
||||||
|
Self::merge_unbias(base, scale)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Split `self` into `(base, scale)` such that `self = base * 2.pow(scale)`.
|
||||||
|
///
|
||||||
|
/// Guarantees:
|
||||||
|
/// * `ldexp(val.frexp()) == val`.
|
||||||
|
/// * `base` will have at most [M_BITS]+1 low-order bits set.
|
||||||
|
/// * `scale.abs()` will have at most [E_BITS]+1 low-order bits set.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
/// ```
|
||||||
|
/// # use f8::F8;
|
||||||
|
/// # let val = F8::from_bits(42);
|
||||||
|
/// let (base, scale) = val.frexp();
|
||||||
|
/// let val2 = F8::ldexp(base, scale);
|
||||||
|
/// assert_eq!(val, val2);
|
||||||
|
/// ```
|
||||||
|
pub fn frexp(self) -> (u32, i32) {
|
||||||
|
let (base, scale) = self.split_unbias();
|
||||||
|
(base.into(), scale.into())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
|
||||||
41
src/fmt.rs
Normal file
41
src/fmt.rs
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
use crate::{E_BIAS, F8};
|
||||||
|
|
||||||
|
impl std::fmt::Binary for F8 {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
if f.alternate() {
|
||||||
|
f.write_str("0b")?;
|
||||||
|
}
|
||||||
|
if self.0 == 0 {
|
||||||
|
f.write_str("0.00000p0")?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let (m, e) = self.split();
|
||||||
|
write!(f, "1.{m:05b}p{e}", e = e as i8 - E_BIAS as i8)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for F8 {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{self:#b}f8")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_display() {
|
||||||
|
fn fmt_split(m: u8, e: u8) -> String {
|
||||||
|
let v = F8::merge(m, e);
|
||||||
|
format!("{v:b}")
|
||||||
|
}
|
||||||
|
assert_eq!("0.00000p0", fmt_split(0, 0));
|
||||||
|
assert_eq!("1.00000p0", fmt_split(0, E_BIAS));
|
||||||
|
assert_eq!("1.00000p1", fmt_split(0, E_BIAS + 1));
|
||||||
|
assert_eq!("1.00000p-1", fmt_split(0, E_BIAS - 1));
|
||||||
|
assert_eq!("1.00001p0", fmt_split(1, E_BIAS));
|
||||||
|
assert_eq!("1.11111p0", fmt_split(0b11111, E_BIAS));
|
||||||
|
}
|
||||||
|
}
|
||||||
67
src/lib.rs
67
src/lib.rs
|
|
@ -1,13 +1,24 @@
|
||||||
|
//! Software 8-bit floating-point math library. Not for production use.
|
||||||
|
|
||||||
mod conv;
|
mod conv;
|
||||||
|
mod fmt;
|
||||||
mod ops;
|
mod ops;
|
||||||
|
|
||||||
|
/// Mantissa width of [`F8`].
|
||||||
pub const M_BITS: u8 = 5;
|
pub const M_BITS: u8 = 5;
|
||||||
|
|
||||||
|
/// Exponent width of [`F8`].
|
||||||
pub const E_BITS: u8 = 3;
|
pub const E_BITS: u8 = 3;
|
||||||
|
|
||||||
|
/// The value such that `2.pow(E_CAP)` is just over the [`F8`] limit.
|
||||||
const E_CAP: u8 = 4;
|
const E_CAP: u8 = 4;
|
||||||
|
|
||||||
|
/// Largest exponent value of [`F8`].
|
||||||
|
pub const E_MAX: u8 = E_CAP - 1;
|
||||||
|
|
||||||
static_assertions::const_assert_eq!(M_BITS + E_BITS, 8);
|
static_assertions::const_assert_eq!(M_BITS + E_BITS, 8);
|
||||||
|
|
||||||
const E_MAX: u8 = E_CAP - 1;
|
/// The largest integer up to and including which all integers are representable exactly.
|
||||||
pub const EXACT_INT_MAX: u8 = if E_MAX > M_BITS {
|
pub const EXACT_INT_MAX: u8 = if E_MAX > M_BITS {
|
||||||
2 << M_BITS
|
2 << M_BITS
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -20,46 +31,22 @@ const E_BIAS: u8 = E_STORAGE_MAX - E_MAX;
|
||||||
const M_MASK: u8 = M_STORAGE_MAX;
|
const M_MASK: u8 = M_STORAGE_MAX;
|
||||||
const E_MASK: u8 = E_STORAGE_MAX << M_BITS;
|
const E_MASK: u8 = E_STORAGE_MAX << M_BITS;
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
/// 8-bit unsigned binary floating-point type.
|
||||||
|
///
|
||||||
|
/// # Properties
|
||||||
|
///
|
||||||
|
/// * Mantissa width: 5 bits ([`M_BITS`])
|
||||||
|
/// * Exponent width: 3 bits ([`E_BITS`])
|
||||||
|
/// * Negative values: not supported
|
||||||
|
/// * Zero: special-cased
|
||||||
|
/// * Subnormals: not supported
|
||||||
|
/// * Infinity: not supported
|
||||||
|
/// * NaN: not supported
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct F8(u8);
|
pub struct F8(u8);
|
||||||
|
|
||||||
impl std::fmt::Binary for F8 {
|
impl F8 {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
pub const ZERO: Self = Self(0);
|
||||||
if f.alternate() {
|
pub const ONE: Self = Self::merge(0, E_BIAS);
|
||||||
f.write_str("0b")?;
|
|
||||||
}
|
|
||||||
if self.0 == 0 {
|
|
||||||
f.write_str("0.00000p0")?;
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let (m, e) = self.split();
|
|
||||||
write!(f, "1.{m:05b}p{e}", e = e as i8 - E_BIAS as i8)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Debug for F8 {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(f, "{self:#b}f8")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_display() {
|
|
||||||
fn fmt_split(m: u8, e: u8) -> String {
|
|
||||||
let v = F8::merge(m, e);
|
|
||||||
format!("{v:b}")
|
|
||||||
}
|
|
||||||
assert_eq!("0.00000p0", fmt_split(0, 0));
|
|
||||||
assert_eq!("1.00000p0", fmt_split(0, E_BIAS));
|
|
||||||
assert_eq!("1.00000p1", fmt_split(0, E_BIAS + 1));
|
|
||||||
assert_eq!("1.00000p-1", fmt_split(0, E_BIAS - 1));
|
|
||||||
assert_eq!("1.00001p0", fmt_split(1, E_BIAS));
|
|
||||||
assert_eq!("1.11111p0", fmt_split(0b11111, E_BIAS));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user