From 58fcee69f5fbc3ff967aaf104d35a8173bce92df Mon Sep 17 00:00:00 2001
From: Jeremy Wall <jeremy@marzhillstudios.com>
Date: Mon, 1 Aug 2022 21:23:50 -0400
Subject: [PATCH] use constant sizes for the identifiers instead of dynamic
 Vectors

---
 src/hash.rs | 20 ++++++++++++++++++--
 src/lib.rs  | 24 +++++++++++++-----------
 src/node.rs | 27 +++++++++++++++++----------
 3 files changed, 48 insertions(+), 23 deletions(-)
diff --git a/src/hash.rs b/src/hash.rs
index 2388b5e..7eae4f1 100644
--- a/src/hash.rs
+++ b/src/hash.rs
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::hash::Hasher;
+
 /// Utility Trait to specify that payloads must be serializable into bytes.
 pub trait ByteEncoder {
     fn bytes(&self) -> Vec<u8>;
@@ -20,12 +22,26 @@ pub trait ByteEncoder {
 /// Utility Trait to specify the hashing algorithm and provide a common
 /// interface for that algorithm to provide. This interface is expected to
 /// be stateful.
-pub trait HashWriter: Default {
+pub trait HashWriter<const LEN: usize>: Default {
     /// Record bytes from an iterator into our hash algorithm.
     fn record<I: Iterator<Item = u8>>(&mut self, bs: I);
 
     /// Provide the current hash value based on the bytes that have so far been recorded.
     /// It is expected that you can call this method multiple times while recording the
     /// the bytes for input into the hash.
-    fn hash(&self) -> Vec<u8>;
+    fn hash(&self) -> [u8; LEN];
+}
+
+impl<H> HashWriter<8> for H
+where
+    H: Hasher + Default,
+{
+    fn record<I: Iterator<Item = u8>>(&mut self, iter: I) {
+        let bytes = iter.collect::<Vec<u8>>();
+        self.write(bytes.as_slice());
+    }
+
+    fn hash(&self) -> [u8; 8] {
+        self.finish().to_le_bytes()
+    }
 }
diff --git a/src/lib.rs b/src/lib.rs
index c5ad4fb..5f81c61 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -34,19 +34,19 @@ pub enum EdgeError {
 ///
 /// A merkle DAG instance is tied to a specific implementation of the HashWriter interface to ensure
 /// that all hash identifiers are of the same hash algorithm.
-pub struct DAG<N, HW>
+pub struct DAG<N, HW, const HASH_LEN: usize>
 where
     N: ByteEncoder,
-    HW: HashWriter,
+    HW: HashWriter<HASH_LEN>,
 {
-    roots: BTreeSet<Vec<u8>>,
-    nodes: BTreeMap<Vec<u8>, Node<N, HW>>,
+    roots: BTreeSet<[u8; HASH_LEN]>,
+    nodes: BTreeMap<[u8; HASH_LEN], Node<N, HW, HASH_LEN>>,
 }
 
-impl<N, HW> DAG<N, HW>
+impl<N, HW, const HASH_LEN: usize> DAG<N, HW, HASH_LEN>
 where
     N: ByteEncoder,
-    HW: HashWriter,
+    HW: HashWriter<HASH_LEN>,
 {
     /// Construct a new empty DAG. The empty DAG is also the default for a DAG.
     pub fn new() -> Self {
@@ -59,9 +59,9 @@ where
     pub fn add_node(
         &mut self,
         item: N,
-        dependency_ids: BTreeSet<Vec<u8>>,
+        dependency_ids: BTreeSet<[u8; HASH_LEN]>,
     ) -> Result<(), EdgeError> {
-        let node = Node::<N, HW>::new(item, dependency_ids.clone());
+        let node = Node::<N, HW, HASH_LEN>::new(item, dependency_ids.clone());
         let id = node.id();
         if self.roots.contains(id) {
             // We've already added this node so there is nothing left to do.
@@ -76,15 +76,17 @@ where
     }
 
     /// Get a node from the DAG by it's hash identifier if it exists.
-    pub fn get_node_by_id(&self, id: &Vec<u8>) -> Option<&Node<N, HW>> {
+    pub fn get_node_by_id(&self, id: &[u8; HASH_LEN]) -> Option<&Node<N, HW, HASH_LEN>> {
         self.nodes.get(id)
     }
+
+    // TODO(jwall): How to specify a partial ordering for nodes in a graph?
 }
 
-impl<N, HW> Default for DAG<N, HW>
+impl<N, HW, const HASH_LEN: usize> Default for DAG<N, HW, HASH_LEN>
 where
     N: ByteEncoder,
-    HW: HashWriter,
+    HW: HashWriter<HASH_LEN>,
 {
     fn default() -> Self {
         Self {
diff --git a/src/node.rs b/src/node.rs
index 6ac7a29..dc9f60e 100644
--- a/src/node.rs
+++ b/src/node.rs
@@ -25,21 +25,25 @@ use crate::hash::{ByteEncoder, HashWriter};
 /// Nodes are tied to a specific implementation of the HashWriter trait which is itself tied
 /// to the DAG they are stored in guaranteeing that the same Hashing implementation is used
 /// for each node in the DAG.
-pub struct Node<N, HW> {
-    id: Vec<u8>,
+pub struct Node<N, HW, const HASH_LEN: usize>
+where
+    N: ByteEncoder,
+    HW: HashWriter<HASH_LEN>,
+{
+    id: [u8; HASH_LEN],
     item: N,
-    item_id: Vec<u8>,
-    dependency_ids: BTreeSet<Vec<u8>>,
+    item_id: [u8; HASH_LEN],
+    dependency_ids: BTreeSet<[u8; HASH_LEN]>,
     _phantom: PhantomData<HW>,
 }
 
-impl<N, HW> Node<N, HW>
+impl<N, HW, const HASH_LEN: usize> Node<N, HW, HASH_LEN>
 where
     N: ByteEncoder,
-    HW: HashWriter,
+    HW: HashWriter<HASH_LEN>,
 {
     /// Construct a new node with a payload and a set of dependency_ids.
-    pub fn new(item: N, dependency_ids: BTreeSet<Vec<u8>>) -> Self {
+    pub fn new(item: N, dependency_ids: BTreeSet<[u8; HASH_LEN]>) -> Self {
         let mut hw = HW::default();
 
         // NOTE(jwall): The order here is important. Our reliable id creation must be stable
@@ -48,7 +52,10 @@ where
         hw.record(item.bytes().into_iter());
         let item_id = hw.hash();
         // 2. Sort the dependency ids before recording them into our node id hash.
-        let mut dependency_list = dependency_ids.iter().cloned().collect::<Vec<Vec<u8>>>();
+        let mut dependency_list = dependency_ids
+            .iter()
+            .cloned()
+            .collect::<Vec<[u8; HASH_LEN]>>();
         dependency_list.sort();
         // 3. record the dependency ids into our node id hash in the sorted order.
         for d in dependency_list.iter() {
@@ -71,11 +78,11 @@ where
         &self.item
     }
 
-    pub fn item_id(&self) -> &Vec<u8> {
+    pub fn item_id(&self) -> &[u8; HASH_LEN] {
         &self.item_id
     }
 
-    pub fn dependency_ids(&self) -> &BTreeSet<Vec<u8>> {
+    pub fn dependency_ids(&self) -> &BTreeSet<[u8; HASH_LEN]> {
         &self.dependency_ids
     }
 }