diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..f97ed39 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "rust-analyzer.checkOnSave.features": "all" +} \ No newline at end of file diff --git a/src/dag.rs b/src/dag.rs new file mode 100644 index 0000000..81c0a95 --- /dev/null +++ b/src/dag.rs @@ -0,0 +1,180 @@ +// Copyright 2022 Jeremy Wall (Jeremy@marzhilsltudios.com) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::{BTreeMap, BTreeSet}; + +use crate::hash::{ByteEncoder, HashWriter}; +use crate::node::Node; + +/// Node comparison values. In a given Merkle DAG a Node can come `After`, `Before`, be `Equivalent`, or `Uncomparable`. +/// If the two nodes have the same id they are eqivalent. If two nodes are not part of the same sub graph within the DAG +/// then they are Uncomparable. If one node is an ancestor of another DAG then that node comes before the other. If the +/// reverse is true then that node comes after the other. +#[derive(PartialEq, Debug)] +pub enum NodeCompare { + After, + Before, + Equivalent, + Uncomparable, +} + +#[derive(Debug)] +pub enum EdgeError { + NoSuchDependents, +} + +/// A Merkle-DAG implementation. This is a modification on the standard Merkle Tree data structure +/// but instead of a tree it is a DAG and as a result can have multiple roots. A merkle-dag specifies +/// a partial ordering on all the nodes and utilizes the api to ensure that this ordering is +/// preserved during construction. +/// +/// The merkle dag consists of a set of pointers to the current known roots as well as the total set +/// of nodes in the dag. Node payload items must be of a single type and implement the `ByteEncoder` +/// trait. +/// +/// A merkle DAG instance is tied to a specific implementation of the HashWriter interface to ensure +/// that all hash identifiers are of the same hash algorithm. +#[derive(Clone, Debug)] +pub struct Merkle +where + N: ByteEncoder, + HW: HashWriter, +{ + roots: BTreeSet<[u8; HASH_LEN]>, + nodes: BTreeMap<[u8; HASH_LEN], Node>, +} + +impl Merkle +where + N: ByteEncoder, + HW: HashWriter, +{ + /// Construct a new empty DAG. The empty DAG is also the default for a DAG. + pub fn new() -> Self { + Self::default() + } + + /// Add a new payload with a required set of dependency_ids. This method will construct a new node + /// and add it to the DAG with the given payload item and dependency id set. It is idempotent for any + /// given set of inputs. + /// + /// One result of not constructing/adding nodes in this way is that we ensure that we always satisfy + /// the implementation rule in the merkel-crdt's whitepaper. + pub fn add_node<'a>( + &'a mut self, + item: N, + dependency_ids: BTreeSet<[u8; HASH_LEN]>, + ) -> Result<[u8; HASH_LEN], EdgeError> { + let node = Node::::new(item, dependency_ids.clone()); + let id = node.id().clone(); + if self.nodes.contains_key(&id) { + // We've already added this node so there is nothing left to do. + return Ok(id); + } + for dep_id in dependency_ids.iter() { + if !self.nodes.contains_key(dep_id) { + return Err(EdgeError::NoSuchDependents); + } + // If any of our dependencies is in the roots pointer list then + // it is time to remove it from there. + if self.roots.contains(dep_id) { + self.roots.remove(dep_id); + } + } + self.roots.insert(id.clone()); + self.nodes.insert(id.clone(), node); + Ok(id) + } + + /// Check if we already have a copy of a node. + pub fn check_for_node(&self, id: &[u8; HASH_LEN]) -> bool { + return self.nodes.contains_key(id); + } + + /// Get a node from the DAG by it's hash identifier if it exists. + pub fn get_node_by_id(&self, id: &[u8; HASH_LEN]) -> Option<&Node> { + self.nodes.get(id) + } + + /// Get the set of root node ids. + pub fn get_roots(&self) -> &BTreeSet<[u8; HASH_LEN]> { + &self.roots + } + + /// Get the map of all nodes in the DAG. + pub fn get_nodes(&self) -> &BTreeMap<[u8; HASH_LEN], Node> { + &self.nodes + } + + /// Compare two nodes by id in the graph. If the left id is an ancestor of the right node + /// then `returns `NodeCompare::Before`. If the right id is an ancestor of the left node + /// then returns `NodeCompare::After`. If both id's are equal then the returns + /// `NodeCompare::Equivalent`. If neither id are parts of the same subgraph then returns + /// `NodeCompare::Uncomparable`. + pub fn compare(&self, left: &[u8; HASH_LEN], right: &[u8; HASH_LEN]) -> NodeCompare { + if left == right { + NodeCompare::Equivalent + } else { + // Is left node an ancestor of right node? + if self.search_graph(right, left) { + NodeCompare::Before + // is right node an ancestor of left node? + } else if self.search_graph(left, right) { + NodeCompare::After + } else { + NodeCompare::Uncomparable + } + } + } + + fn search_graph(&self, root_id: &[u8; HASH_LEN], search_id: &[u8; HASH_LEN]) -> bool { + if root_id == search_id { + return true; + } + let root_node = match self.get_node_by_id(root_id) { + Some(n) => n, + None => { + return false; + } + }; + let mut stack = vec![root_node]; + while !stack.is_empty() { + let node = stack.pop().unwrap(); + let deps = node.dependency_ids(); + for dep in deps { + if search_id == dep { + return true; + } + stack.push(match self.get_node_by_id(dep) { + Some(n) => n, + None => panic!("Invalid DAG STATE encountered"), + }) + } + } + return false; + } +} + +impl Default for Merkle +where + N: ByteEncoder, + HW: HashWriter, +{ + fn default() -> Self { + Self { + roots: BTreeSet::new(), + nodes: BTreeMap::new(), + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 4d08ea6..b21026f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,170 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{BTreeMap, BTreeSet}; - -use hash::{ByteEncoder, HashWriter}; -use node::Node; - -mod hash; -mod node; - -/// Node comparison values. In a given Merkle DAG a Node can come `After`, `Before`, be `Equivalent`, or `Uncomparable`. -/// If the two nodes have the same id they are eqivalent. If two nodes are not part of the same sub graph within the DAG -/// then they are Uncomparable. If one node is an ancestor of another DAG then that node comes before the other. If the -/// reverse is true then that node comes after the other. -#[derive(PartialEq, Debug)] -pub enum NodeCompare { - After, - Before, - Equivalent, - Uncomparable, -} - -#[derive(Debug)] -pub enum EdgeError { - NoSuchDependents, -} - -/// A Merkle-DAG implementation. This is a modification on the standard Merkle Tree data structure -/// but instead of a tree it is a DAG and as a result can have multiple roots. A merkle-dag specifies -/// a partial ordering on all the nodes and utilizes the api to ensure that this ordering is -/// preserved during construction. -/// -/// The merkle dag consists of a set of pointers to the current known roots as well as the total set -/// of nodes in the dag. Node payload items must be of a single type and implement the `ByteEncoder` -/// trait. -/// -/// A merkle DAG instance is tied to a specific implementation of the HashWriter interface to ensure -/// that all hash identifiers are of the same hash algorithm. -#[derive(Clone, Debug)] -pub struct DAG -where - N: ByteEncoder, - HW: HashWriter, -{ - roots: BTreeSet<[u8; HASH_LEN]>, - nodes: BTreeMap<[u8; HASH_LEN], Node>, -} - -impl DAG -where - N: ByteEncoder, - HW: HashWriter, -{ - /// Construct a new empty DAG. The empty DAG is also the default for a DAG. - pub fn new() -> Self { - Self::default() - } - - /// Add a new payload with a required set of dependency_ids. This method will construct a new node - /// and add it to the DAG with the given payload item and dependency id set. It is idempotent for any - /// given set of inputs. - /// - /// One result of not constructing/adding nodes in this way is that we ensure that we always satisfy - /// the implementation rule in the merkel-crdt's whitepaper. - pub fn add_node<'a>( - &'a mut self, - item: N, - dependency_ids: BTreeSet<[u8; HASH_LEN]>, - ) -> Result<[u8; HASH_LEN], EdgeError> { - let node = Node::::new(item, dependency_ids.clone()); - let id = node.id().clone(); - if self.nodes.contains_key(&id) { - // We've already added this node so there is nothing left to do. - return Ok(id); - } - for dep_id in dependency_ids.iter() { - if !self.nodes.contains_key(dep_id) { - return Err(EdgeError::NoSuchDependents); - } - // If any of our dependencies is in the roots pointer list then - // it is time to remove it from there. - if self.roots.contains(dep_id) { - self.roots.remove(dep_id); - } - } - self.roots.insert(id.clone()); - self.nodes.insert(id.clone(), node); - Ok(id) - } - - /// Get a node from the DAG by it's hash identifier if it exists. - pub fn get_node_by_id(&self, id: &[u8; HASH_LEN]) -> Option<&Node> { - self.nodes.get(id) - } - - /// Get the set of root node ids. - pub fn get_roots(&self) -> &BTreeSet<[u8; HASH_LEN]> { - &self.roots - } - - /// Get the map of all nodes in the DAG. - pub fn get_nodes(&self) -> &BTreeMap<[u8; HASH_LEN], Node> { - &self.nodes - } - - /// Compare two nodes by id in the graph. If the left id is an ancestor of the right node - /// then `returns `NodeCompare::Before`. If the right id is an ancestor of the left node - /// then returns `NodeCompare::After`. If both id's are equal then the returns - /// `NodeCompare::Equivalent`. If neither id are parts of the same subgraph then returns - /// `NodeCompare::Uncomparable`. - pub fn compare(&self, left: &[u8; HASH_LEN], right: &[u8; HASH_LEN]) -> NodeCompare { - if left == right { - NodeCompare::Equivalent - } else { - // Is left node an ancestor of right node? - if self.search_graph(right, left) { - NodeCompare::Before - // is right node an ancestor of left node? - } else if self.search_graph(left, right) { - NodeCompare::After - } else { - NodeCompare::Uncomparable - } - } - } - - fn search_graph(&self, root_id: &[u8; HASH_LEN], search_id: &[u8; HASH_LEN]) -> bool { - if root_id == search_id { - return true; - } - let root_node = match self.get_node_by_id(root_id) { - Some(n) => n, - None => { - return false; - } - }; - let mut stack = vec![root_node]; - while !stack.is_empty() { - let node = stack.pop().unwrap(); - let deps = node.dependency_ids(); - for dep in deps { - if search_id == dep { - return true; - } - stack.push(match self.get_node_by_id(dep) { - Some(n) => n, - None => panic!("Invalid DAG STATE encountered"), - }) - } - } - return false; - } -} - -impl Default for DAG -where - N: ByteEncoder, - HW: HashWriter, -{ - fn default() -> Self { - Self { - roots: BTreeSet::new(), - nodes: BTreeMap::new(), - } - } -} +pub mod dag; +pub mod hash; +pub mod node; +pub mod prelude; #[cfg(test)] mod test; diff --git a/src/prelude.rs b/src/prelude.rs new file mode 100644 index 0000000..eb546d4 --- /dev/null +++ b/src/prelude.rs @@ -0,0 +1,17 @@ +// Copyright 2022 Jeremy Wall (Jeremy@marzhilsltudios.com) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub use crate::dag::*; +pub use crate::hash::*; +pub use crate::node::*; diff --git a/src/proptest.rs b/src/proptest.rs deleted file mode 100644 index 9ac3264..0000000 --- a/src/proptest.rs +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright 2022 Jeremy Wall (Jeremy@marzhilsltudios.com) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -use proptest::prelude::*; -use std::collections::{hash_map::DefaultHasher, BTreeMap, BTreeSet}; - -use crate::{NodeCompare, DAG}; - -fn simple_edge_strategy( - nodes_count: usize, -) -> impl Strategy, BTreeSet)> { - prop::collection::vec(".*", 4..nodes_count).prop_flat_map(|payloads| { - let nodes_len = payloads.len(); - ( - // our total list of nodes. - Just(payloads), - // our list of roots. - prop::collection::btree_set(1..nodes_len, 1..(nodes_len / 2)), - ) - }) -} - -fn complex_dag_strategy( - nodes_count: usize, - depth: usize, - branch: usize, -) -> impl Strategy> { - prop::collection::vec(".*", depth..nodes_count).prop_flat_map(move |payloads| { - let nodes_len = payloads.len(); - let mut dag = DAG::::new(); - // partition the payloads into depth pieces - let mut id_stack: Vec<[u8; 8]> = Vec::new(); - for chunk in payloads.chunks(nodes_len / depth) { - // loop through the partions adding each partions nodes to the dag. - let dep_sets: Vec> = if id_stack.is_empty() { - vec![BTreeSet::new()] - } else { - let mut dep_sets = Vec::new(); - for id_chunk in id_stack.chunks(branch) { - let id_set = id_chunk.iter().fold(BTreeSet::new(), |mut acc, item| { - acc.insert(item.clone()); - acc - }); - dep_sets.push(id_set); - } - dep_sets - }; - let dep_set_len = dep_sets.len(); - for (idx, p) in chunk.iter().enumerate() { - let dep_idx = idx % dep_set_len; - let dep_set = dep_sets[dep_idx].clone(); - id_stack.push(dag.add_node(p.clone(), dep_set).unwrap().clone()); - } - } - Just(dag) - }) -} - -proptest! { - #[test] - fn test_dag_add_node_properties((nodes, parent_idxs) in simple_edge_strategy(100)) { - // TODO implement the tests now - let mut dag = DAG::::new(); - let parent_count = parent_idxs.len(); - let mut dependents = BTreeMap::new(); - let mut node_set = BTreeSet::new(); - for (idx, n) in nodes.iter().cloned().enumerate() { - if !parent_idxs.contains(&idx) { - let node_id = dag.add_node(n, BTreeSet::new()).unwrap(); - node_set.insert(node_id.clone()); - let parent = idx % parent_count; - if dependents.contains_key(&parent) { - dependents.get_mut(&parent).map(|v: &mut BTreeSet<[u8; 8]>| v.insert(node_id)); - } else { - dependents.insert(parent, BTreeSet::from([node_id])); - } - } - } - for (pidx, dep_ids) in dependents { - let node_id = dag.add_node(nodes[pidx].clone(), dep_ids).unwrap(); - node_set.insert(node_id.clone()); - } - assert!(dag.get_roots().len() <= parent_count); - assert!(dag.get_nodes().len() == node_set.len()); - } -} - -proptest! { - #[test] - fn test_complex_dag_node_properties(dag in complex_dag_strategy(100, 10, 3)) { - // TODO(jwall): We can assert much more about the DAG if we get more clever in what we return. - let nodes = dag.get_nodes(); - assert!(nodes.len() <= 100); - - let roots = dag.get_roots(); - assert!(roots.len() < dag.get_nodes().len()); - - for node_id in nodes.keys() { - let mut is_descendant = false; - if roots.contains(node_id) { - continue; - } - for root in roots.iter() { - if let NodeCompare::After = dag.compare(root, node_id) { - // success - is_descendant = true; - } - } - assert!(is_descendant); - } - // Check that every root node is uncomparable. - for left_root in roots.iter() { - for right_root in roots.iter() { - if left_root != right_root { - assert_eq!(dag.compare(left_root, right_root), NodeCompare::Uncomparable); - } - } - } - } -} diff --git a/src/store.rs b/src/store.rs new file mode 100644 index 0000000..84d3f80 --- /dev/null +++ b/src/store.rs @@ -0,0 +1,27 @@ +// Copyright 2022 Jeremy Wall (Jeremy@marzhilsltudios.com) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::hash::{ByteEncoder, HashWriter}; +use crate::node::Node; + +pub enum StoreError {} + +pub trait Store +where + N: ByteEncoder, + HW: HashWriter, +{ + fn get(&self, id: &[u8; HASH_LEN]) -> &Node; + fn store(&mut self, node: Node) -> Result<(), StoreError>; +} diff --git a/src/test.rs b/src/test.rs index ef01c6c..17636b2 100644 --- a/src/test.rs +++ b/src/test.rs @@ -12,12 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. use std::collections::hash_map::DefaultHasher; +use std::collections::BTreeSet; -use super::*; +use crate::prelude::*; #[test] fn test_root_pointer_hygiene() { - let mut dag = DAG::<&str, DefaultHasher, 8>::new(); + let mut dag = Merkle::<&str, DefaultHasher, 8>::new(); let quax_node_id = dag.add_node("quax", BTreeSet::new()).unwrap(); assert_eq!( quax_node_id, @@ -38,7 +39,7 @@ fn test_root_pointer_hygiene() { #[test] fn test_insert_no_such_dependents_error() { let missing_dependent = Node::<&str, DefaultHasher, 8>::new("missing", BTreeSet::new()); - let mut dag = DAG::<&str, DefaultHasher, 8>::new(); + let mut dag = Merkle::<&str, DefaultHasher, 8>::new(); let mut dep_set = BTreeSet::new(); dep_set.insert(*missing_dependent.id()); assert!(dag.add_node("foo", dep_set).is_err()); @@ -48,7 +49,7 @@ fn test_insert_no_such_dependents_error() { #[test] fn test_adding_nodes_is_idempotent() { - let mut dag = DAG::<&str, DefaultHasher, 8>::new(); + let mut dag = Merkle::<&str, DefaultHasher, 8>::new(); let quax_node_id = dag.add_node("quax", BTreeSet::new()).unwrap(); assert_eq!( quax_node_id, @@ -64,7 +65,7 @@ fn test_adding_nodes_is_idempotent() { #[test] fn test_adding_nodes_is_idempotent_regardless_of_dep_order() { - let mut dag = DAG::<&str, DefaultHasher, 8>::new(); + let mut dag = Merkle::<&str, DefaultHasher, 8>::new(); let quake_node_id = dag.add_node("quake", BTreeSet::new()).unwrap(); let qualm_node_id = dag.add_node("qualm", BTreeSet::new()).unwrap(); let quell_node_id = dag.add_node("quell", BTreeSet::new()).unwrap(); @@ -86,7 +87,7 @@ fn test_adding_nodes_is_idempotent_regardless_of_dep_order() { #[test] fn test_node_comparison_equivalent() { - let mut dag = DAG::<&str, DefaultHasher, 8>::new(); + let mut dag = Merkle::<&str, DefaultHasher, 8>::new(); let quake_node_id = dag.add_node("quake", BTreeSet::new()).unwrap(); assert_eq!( dag.compare(&quake_node_id, &quake_node_id), @@ -96,7 +97,7 @@ fn test_node_comparison_equivalent() { #[test] fn test_node_comparison_before() { - let mut dag = DAG::<&str, DefaultHasher, 8>::new(); + let mut dag = Merkle::<&str, DefaultHasher, 8>::new(); let quake_node_id = dag.add_node("quake", BTreeSet::new()).unwrap(); let qualm_node_id = dag .add_node("qualm", BTreeSet::from([quake_node_id.clone()])) @@ -116,7 +117,7 @@ fn test_node_comparison_before() { #[test] fn test_node_comparison_after() { - let mut dag = DAG::<&str, DefaultHasher, 8>::new(); + let mut dag = Merkle::<&str, DefaultHasher, 8>::new(); let quake_node_id = dag.add_node("quake", BTreeSet::new()).unwrap(); let qualm_node_id = dag .add_node("qualm", BTreeSet::from([quake_node_id.clone()])) @@ -136,7 +137,7 @@ fn test_node_comparison_after() { #[test] fn test_node_comparison_no_shared_graph() { - let mut dag = DAG::<&str, DefaultHasher, 8>::new(); + let mut dag = Merkle::<&str, DefaultHasher, 8>::new(); let quake_node_id = dag.add_node("quake", BTreeSet::new()).unwrap(); let qualm_node_id = dag.add_node("qualm", BTreeSet::new()).unwrap(); let quell_node_id = dag.add_node("quell", BTreeSet::new()).unwrap();