use constant sizes for the identifiers instead of dynamic Vectors

This commit is contained in:
Jeremy Wall 2022-08-01 21:23:50 -04:00
parent ee13675963
commit 58fcee69f5
3 changed files with 48 additions and 23 deletions

View File

@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::hash::Hasher;
/// Utility Trait to specify that payloads must be serializable into bytes. /// Utility Trait to specify that payloads must be serializable into bytes.
pub trait ByteEncoder { pub trait ByteEncoder {
fn bytes(&self) -> Vec<u8>; fn bytes(&self) -> Vec<u8>;
@ -20,12 +22,26 @@ pub trait ByteEncoder {
/// Utility Trait to specify the hashing algorithm and provide a common /// Utility Trait to specify the hashing algorithm and provide a common
/// interface for that algorithm to provide. This interface is expected to /// interface for that algorithm to provide. This interface is expected to
/// be stateful. /// be stateful.
pub trait HashWriter: Default { pub trait HashWriter<const LEN: usize>: Default {
/// Record bytes from an iterator into our hash algorithm. /// Record bytes from an iterator into our hash algorithm.
fn record<I: Iterator<Item = u8>>(&mut self, bs: I); fn record<I: Iterator<Item = u8>>(&mut self, bs: I);
/// Provide the current hash value based on the bytes that have so far been recorded. /// Provide the current hash value based on the bytes that have so far been recorded.
/// It is expected that you can call this method multiple times while recording the /// It is expected that you can call this method multiple times while recording the
/// the bytes for input into the hash. /// the bytes for input into the hash.
fn hash(&self) -> Vec<u8>; fn hash(&self) -> [u8; LEN];
}
impl<H> HashWriter<8> for H
where
H: Hasher + Default,
{
fn record<I: Iterator<Item = u8>>(&mut self, iter: I) {
let bytes = iter.collect::<Vec<u8>>();
self.write(bytes.as_slice());
}
fn hash(&self) -> [u8; 8] {
self.finish().to_le_bytes()
}
} }

View File

@ -34,19 +34,19 @@ pub enum EdgeError {
/// ///
/// A merkle DAG instance is tied to a specific implementation of the HashWriter interface to ensure /// A merkle DAG instance is tied to a specific implementation of the HashWriter interface to ensure
/// that all hash identifiers are of the same hash algorithm. /// that all hash identifiers are of the same hash algorithm.
pub struct DAG<N, HW> pub struct DAG<N, HW, const HASH_LEN: usize>
where where
N: ByteEncoder, N: ByteEncoder,
HW: HashWriter, HW: HashWriter<HASH_LEN>,
{ {
roots: BTreeSet<Vec<u8>>, roots: BTreeSet<[u8; HASH_LEN]>,
nodes: BTreeMap<Vec<u8>, Node<N, HW>>, nodes: BTreeMap<[u8; HASH_LEN], Node<N, HW, HASH_LEN>>,
} }
impl<N, HW> DAG<N, HW> impl<N, HW, const HASH_LEN: usize> DAG<N, HW, HASH_LEN>
where where
N: ByteEncoder, N: ByteEncoder,
HW: HashWriter, HW: HashWriter<HASH_LEN>,
{ {
/// Construct a new empty DAG. The empty DAG is also the default for a DAG. /// Construct a new empty DAG. The empty DAG is also the default for a DAG.
pub fn new() -> Self { pub fn new() -> Self {
@ -59,9 +59,9 @@ where
pub fn add_node( pub fn add_node(
&mut self, &mut self,
item: N, item: N,
dependency_ids: BTreeSet<Vec<u8>>, dependency_ids: BTreeSet<[u8; HASH_LEN]>,
) -> Result<(), EdgeError> { ) -> Result<(), EdgeError> {
let node = Node::<N, HW>::new(item, dependency_ids.clone()); let node = Node::<N, HW, HASH_LEN>::new(item, dependency_ids.clone());
let id = node.id(); let id = node.id();
if self.roots.contains(id) { if self.roots.contains(id) {
// We've already added this node so there is nothing left to do. // We've already added this node so there is nothing left to do.
@ -76,15 +76,17 @@ where
} }
/// Get a node from the DAG by it's hash identifier if it exists. /// Get a node from the DAG by it's hash identifier if it exists.
pub fn get_node_by_id(&self, id: &Vec<u8>) -> Option<&Node<N, HW>> { pub fn get_node_by_id(&self, id: &[u8; HASH_LEN]) -> Option<&Node<N, HW, HASH_LEN>> {
self.nodes.get(id) self.nodes.get(id)
} }
// TODO(jwall): How to specify a partial ordering for nodes in a graph?
} }
impl<N, HW> Default for DAG<N, HW> impl<N, HW, const HASH_LEN: usize> Default for DAG<N, HW, HASH_LEN>
where where
N: ByteEncoder, N: ByteEncoder,
HW: HashWriter, HW: HashWriter<HASH_LEN>,
{ {
fn default() -> Self { fn default() -> Self {
Self { Self {

View File

@ -25,21 +25,25 @@ use crate::hash::{ByteEncoder, HashWriter};
/// Nodes are tied to a specific implementation of the HashWriter trait which is itself tied /// Nodes are tied to a specific implementation of the HashWriter trait which is itself tied
/// to the DAG they are stored in guaranteeing that the same Hashing implementation is used /// to the DAG they are stored in guaranteeing that the same Hashing implementation is used
/// for each node in the DAG. /// for each node in the DAG.
pub struct Node<N, HW> { pub struct Node<N, HW, const HASH_LEN: usize>
id: Vec<u8>, where
N: ByteEncoder,
HW: HashWriter<HASH_LEN>,
{
id: [u8; HASH_LEN],
item: N, item: N,
item_id: Vec<u8>, item_id: [u8; HASH_LEN],
dependency_ids: BTreeSet<Vec<u8>>, dependency_ids: BTreeSet<[u8; HASH_LEN]>,
_phantom: PhantomData<HW>, _phantom: PhantomData<HW>,
} }
impl<N, HW> Node<N, HW> impl<N, HW, const HASH_LEN: usize> Node<N, HW, HASH_LEN>
where where
N: ByteEncoder, N: ByteEncoder,
HW: HashWriter, HW: HashWriter<HASH_LEN>,
{ {
/// Construct a new node with a payload and a set of dependency_ids. /// Construct a new node with a payload and a set of dependency_ids.
pub fn new(item: N, dependency_ids: BTreeSet<Vec<u8>>) -> Self { pub fn new(item: N, dependency_ids: BTreeSet<[u8; HASH_LEN]>) -> Self {
let mut hw = HW::default(); let mut hw = HW::default();
// NOTE(jwall): The order here is important. Our reliable id creation must be stable // NOTE(jwall): The order here is important. Our reliable id creation must be stable
@ -48,7 +52,10 @@ where
hw.record(item.bytes().into_iter()); hw.record(item.bytes().into_iter());
let item_id = hw.hash(); let item_id = hw.hash();
// 2. Sort the dependency ids before recording them into our node id hash. // 2. Sort the dependency ids before recording them into our node id hash.
let mut dependency_list = dependency_ids.iter().cloned().collect::<Vec<Vec<u8>>>(); let mut dependency_list = dependency_ids
.iter()
.cloned()
.collect::<Vec<[u8; HASH_LEN]>>();
dependency_list.sort(); dependency_list.sort();
// 3. record the dependency ids into our node id hash in the sorted order. // 3. record the dependency ids into our node id hash in the sorted order.
for d in dependency_list.iter() { for d in dependency_list.iter() {
@ -71,11 +78,11 @@ where
&self.item &self.item
} }
pub fn item_id(&self) -> &Vec<u8> { pub fn item_id(&self) -> &[u8; HASH_LEN] {
&self.item_id &self.item_id
} }
pub fn dependency_ids(&self) -> &BTreeSet<Vec<u8>> { pub fn dependency_ids(&self) -> &BTreeSet<[u8; HASH_LEN]> {
&self.dependency_ids &self.dependency_ids
} }
} }