Add Documentation and also fix some naming and types for clarity.

2025-07-23 11:09:51 -04:00 · 2022-08-01 20:52:31 -04:00 · 2022-08-01 20:52:31 -04:00 · ee13675963
commit ee13675963
parent 1bd833785d
3 changed files with 63 additions and 9 deletions
--- a/src/hash.rs
+++ b/src/hash.rs
@ -12,11 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 /// Utility Trait to specify that payloads must be serializable into bytes.
 pub trait ByteEncoder {
    fn bytes(&self) -> Vec<u8>;
 }
 /// Utility Trait to specify the hashing algorithm and provide a common
 /// interface for that algorithm to provide. This interface is expected to
 /// be stateful.
 pub trait HashWriter: Default {
    /// Record bytes from an iterator into our hash algorithm.
    fn record<I: Iterator<Item = u8>>(&mut self, bs: I);
    /// Provide the current hash value based on the bytes that have so far been recorded.
    /// It is expected that you can call this method multiple times while recording the
    /// the bytes for input into the hash.
    fn hash(&self) -> Vec<u8>;
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -23,6 +23,17 @@ pub enum EdgeError {
    NoSuchDependents,
 }
 /// A Merkle-DAG implementation. This is a modification on the standard Merkle Tree data structure
 /// but instead of a tree it is a DAG and as a result can have multiple roots. A merkle-dag specifies
 /// a partial ordering on all the nodes and utilizes the api to ensure that this ordering is
 /// preserved during construction.
 ///
 /// The merkle dag consists of a set of pointers to the current known roots as well as the total set
 /// of nodes in the dag. Node payload items must be of a single type and implement the `ByteEncoder`
 /// trait.
 ///
 /// A merkle DAG instance is tied to a specific implementation of the HashWriter interface to ensure
 /// that all hash identifiers are of the same hash algorithm.
 pub struct DAG<N, HW>
 where
    N: ByteEncoder,
@ -37,11 +48,19 @@ where
    N: ByteEncoder,
    HW: HashWriter,
 {
    /// Construct a new empty DAG. The empty DAG is also the default for a DAG.
    pub fn new() -> Self {
        Self::default()
    }
-    pub fn add_node(&mut self, item: N, dependency_ids: Vec<Vec<u8>>) -> Result<(), EdgeError> {
+    /// Add a new payload with a required set of dependency_ids. This method will construct a new node
    /// and add it to the DAG with the given payload item and dependency id set. It is idempotent for any
    /// given set of inputs.
    pub fn add_node(
        &mut self,
        item: N,
        dependency_ids: BTreeSet<Vec<u8>>,
    ) -> Result<(), EdgeError> {
        let node = Node::<N, HW>::new(item, dependency_ids.clone());
        let id = node.id();
        if self.roots.contains(id) {
@ -56,7 +75,8 @@ where
        Ok(())
    }
-    pub fn get_node(&self, id: &Vec<u8>) -> Option<&Node<N, HW>> {
+    /// Get a node from the DAG by it's hash identifier if it exists.
    pub fn get_node_by_id(&self, id: &Vec<u8>) -> Option<&Node<N, HW>> {
        self.nodes.get(id)
    }
 }
--- a/src/node.rs
+++ b/src/node.rs
@ -1,5 +1,3 @@
 use std::marker::PhantomData;
 // Copyright 2022 Jeremy Wall (Jeremy@marzhilsltudios.com)
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -13,12 +11,25 @@ use std::marker::PhantomData;
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::{collections::BTreeSet, marker::PhantomData};
 use crate::hash::{ByteEncoder, HashWriter};
 /// A node in a merkle DAG. Nodes are composed of a payload item and a set of dependency_ids.
 /// They provide a unique identifier that is formed from the bytes of the payload as well
 /// as the bytes of the dependency_ids. This is guaranteed to be the id for the same payload
 /// and dependency ids every time making Nodes content-addressable.
 ///
 /// Nodes also expose the unique content address of the item payload alone as a convenience.
 ///
 /// Nodes are tied to a specific implementation of the HashWriter trait which is itself tied
 /// to the DAG they are stored in guaranteeing that the same Hashing implementation is used
 /// for each node in the DAG.
 pub struct Node<N, HW> {
    id: Vec<u8>,
    item: N,
-    dependency_ids: Vec<Vec<u8>>,
+    item_id: Vec<u8>,
    dependency_ids: BTreeSet<Vec<u8>>,
    _phantom: PhantomData<HW>,
 }
@ -27,16 +38,26 @@ where
    N: ByteEncoder,
    HW: HashWriter,
 {
-    pub fn new(item: N, mut dependency_ids: Vec<Vec<u8>>) -> Self {
+    /// Construct a new node with a payload and a set of dependency_ids.
    pub fn new(item: N, dependency_ids: BTreeSet<Vec<u8>>) -> Self {
        let mut hw = HW::default();
-        dependency_ids.sort();
+
        // NOTE(jwall): The order here is important. Our reliable id creation must be stable
        // for multiple calls to this constructor. This means that we must *always*
        // 1. Record the `item_id` hash first.
        hw.record(item.bytes().into_iter());
-        for d in dependency_ids.iter() {
+        let item_id = hw.hash();
        // 2. Sort the dependency ids before recording them into our node id hash.
        let mut dependency_list = dependency_ids.iter().cloned().collect::<Vec<Vec<u8>>>();
        dependency_list.sort();
        // 3. record the dependency ids into our node id hash in the sorted order.
        for d in dependency_list.iter() {
            hw.record(d.iter().cloned());
        }
        Self {
            id: hw.hash(),
            item,
            item_id,
            dependency_ids,
            _phantom: PhantomData,
        }
@ -50,7 +71,11 @@ where
        &self.item
    }
-    pub fn dependency_ids(&self) -> &Vec<Vec<u8>> {
+    pub fn item_id(&self) -> &Vec<u8> {
        &self.item_id
    }
    pub fn dependency_ids(&self) -> &BTreeSet<Vec<u8>> {
        &self.dependency_ids
    }
 }