From ee13675963036168d9d60c5c30267e86706a8f09 Mon Sep 17 00:00:00 2001 From: Jeremy Wall Date: Mon, 1 Aug 2022 20:52:31 -0400 Subject: [PATCH] Add Documentation and also fix some naming and types for clarity. --- src/hash.rs | 9 +++++++++ src/lib.rs | 24 ++++++++++++++++++++++-- src/node.rs | 39 ++++++++++++++++++++++++++++++++------- 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/src/hash.rs b/src/hash.rs index a87729f..2388b5e 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -12,11 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. +/// Utility Trait to specify that payloads must be serializable into bytes. pub trait ByteEncoder { fn bytes(&self) -> Vec; } +/// Utility Trait to specify the hashing algorithm and provide a common +/// interface for that algorithm to provide. This interface is expected to +/// be stateful. pub trait HashWriter: Default { + /// Record bytes from an iterator into our hash algorithm. fn record>(&mut self, bs: I); + + /// Provide the current hash value based on the bytes that have so far been recorded. + /// It is expected that you can call this method multiple times while recording the + /// the bytes for input into the hash. fn hash(&self) -> Vec; } diff --git a/src/lib.rs b/src/lib.rs index 5e66753..c5ad4fb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,6 +23,17 @@ pub enum EdgeError { NoSuchDependents, } +/// A Merkle-DAG implementation. This is a modification on the standard Merkle Tree data structure +/// but instead of a tree it is a DAG and as a result can have multiple roots. A merkle-dag specifies +/// a partial ordering on all the nodes and utilizes the api to ensure that this ordering is +/// preserved during construction. +/// +/// The merkle dag consists of a set of pointers to the current known roots as well as the total set +/// of nodes in the dag. Node payload items must be of a single type and implement the `ByteEncoder` +/// trait. +/// +/// A merkle DAG instance is tied to a specific implementation of the HashWriter interface to ensure +/// that all hash identifiers are of the same hash algorithm. pub struct DAG where N: ByteEncoder, @@ -37,11 +48,19 @@ where N: ByteEncoder, HW: HashWriter, { + /// Construct a new empty DAG. The empty DAG is also the default for a DAG. pub fn new() -> Self { Self::default() } - pub fn add_node(&mut self, item: N, dependency_ids: Vec>) -> Result<(), EdgeError> { + /// Add a new payload with a required set of dependency_ids. This method will construct a new node + /// and add it to the DAG with the given payload item and dependency id set. It is idempotent for any + /// given set of inputs. + pub fn add_node( + &mut self, + item: N, + dependency_ids: BTreeSet>, + ) -> Result<(), EdgeError> { let node = Node::::new(item, dependency_ids.clone()); let id = node.id(); if self.roots.contains(id) { @@ -56,7 +75,8 @@ where Ok(()) } - pub fn get_node(&self, id: &Vec) -> Option<&Node> { + /// Get a node from the DAG by it's hash identifier if it exists. + pub fn get_node_by_id(&self, id: &Vec) -> Option<&Node> { self.nodes.get(id) } } diff --git a/src/node.rs b/src/node.rs index 1b983fd..6ac7a29 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,5 +1,3 @@ -use std::marker::PhantomData; - // Copyright 2022 Jeremy Wall (Jeremy@marzhilsltudios.com) // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,12 +11,25 @@ use std::marker::PhantomData; // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +use std::{collections::BTreeSet, marker::PhantomData}; + use crate::hash::{ByteEncoder, HashWriter}; +/// A node in a merkle DAG. Nodes are composed of a payload item and a set of dependency_ids. +/// They provide a unique identifier that is formed from the bytes of the payload as well +/// as the bytes of the dependency_ids. This is guaranteed to be the id for the same payload +/// and dependency ids every time making Nodes content-addressable. +/// +/// Nodes also expose the unique content address of the item payload alone as a convenience. +/// +/// Nodes are tied to a specific implementation of the HashWriter trait which is itself tied +/// to the DAG they are stored in guaranteeing that the same Hashing implementation is used +/// for each node in the DAG. pub struct Node { id: Vec, item: N, - dependency_ids: Vec>, + item_id: Vec, + dependency_ids: BTreeSet>, _phantom: PhantomData, } @@ -27,16 +38,26 @@ where N: ByteEncoder, HW: HashWriter, { - pub fn new(item: N, mut dependency_ids: Vec>) -> Self { + /// Construct a new node with a payload and a set of dependency_ids. + pub fn new(item: N, dependency_ids: BTreeSet>) -> Self { let mut hw = HW::default(); - dependency_ids.sort(); + + // NOTE(jwall): The order here is important. Our reliable id creation must be stable + // for multiple calls to this constructor. This means that we must *always* + // 1. Record the `item_id` hash first. hw.record(item.bytes().into_iter()); - for d in dependency_ids.iter() { + let item_id = hw.hash(); + // 2. Sort the dependency ids before recording them into our node id hash. + let mut dependency_list = dependency_ids.iter().cloned().collect::>>(); + dependency_list.sort(); + // 3. record the dependency ids into our node id hash in the sorted order. + for d in dependency_list.iter() { hw.record(d.iter().cloned()); } Self { id: hw.hash(), item, + item_id, dependency_ids, _phantom: PhantomData, } @@ -50,7 +71,11 @@ where &self.item } - pub fn dependency_ids(&self) -> &Vec> { + pub fn item_id(&self) -> &Vec { + &self.item_id + } + + pub fn dependency_ids(&self) -> &BTreeSet> { &self.dependency_ids } }