From 3fbeba535c1e0d99ae73355b0e0fc2323f2e1b1f Mon Sep 17 00:00:00 2001 From: Jeremy Wall Date: Wed, 2 Jul 2025 16:20:51 -0500 Subject: [PATCH] fix: a more efficient get_dependents query --- offline-web-model/src/test.rs | 12 ++-- offline-web-storage/src/lib.rs | 101 +++++++++++++++++++++++---------- 2 files changed, 77 insertions(+), 36 deletions(-) diff --git a/offline-web-model/src/test.rs b/offline-web-model/src/test.rs index 37cd93d..2506ad7 100644 --- a/offline-web-model/src/test.rs +++ b/offline-web-model/src/test.rs @@ -10,7 +10,7 @@ fn get_deterministic_candidate(graph: &Graph) -> Arc { .refs .values() .filter(|r| r.name != graph.root.name && r.is_leaf()) - .map(|r| r.clone()) + .cloned() .collect(); // Sort by name to ensure deterministic ordering @@ -57,7 +57,7 @@ fn test_root_not_a_dependency() { let root_name = graph.root.name.clone(); // Check all references to ensure none have the root as a dependent - for (_, reference) in graph.refs.as_ref() { + for reference in graph.refs.as_ref().values() { for dep in &reference.dependents { assert_ne!( dep.name, root_name, @@ -88,7 +88,7 @@ fn test_all_nodes_connected_to_root() { collect_reachable(&graph.root, &mut reachable); // Check that all nodes in the graph are reachable from the root - for (name, _) in graph.refs.as_ref() { + for name in graph.refs.as_ref().keys() { assert!( reachable.contains(name), "All nodes should be reachable from the root: {}", @@ -273,12 +273,12 @@ fn test_reference_ids_stable_regardless_of_dependency_order() { let base_ref = Reference::new(Some(String::from("base_content")), String::from("/base")); // Test multiple different orders of adding the same dependencies - let orders = vec![ + let orders = [ vec![dep_a.clone(), dep_b.clone(), dep_c.clone(), dep_d.clone()], // alphabetical vec![dep_d.clone(), dep_c.clone(), dep_b.clone(), dep_a.clone()], // reverse alphabetical vec![dep_b.clone(), dep_d.clone(), dep_a.clone(), dep_c.clone()], // random order 1 vec![dep_c.clone(), dep_a.clone(), dep_d.clone(), dep_b.clone()], // random order 2 - vec![dep_d.clone(), dep_a.clone(), dep_b.clone(), dep_c.clone()], // random order 3 + vec![dep_d.clone(), dep_a.clone(), dep_b.clone(), dep_c.clone()], ]; let mut all_ids = Vec::new(); @@ -325,7 +325,7 @@ fn test_dependencies_lexicographically_ordered() { let graph = create_test_graph(); // Check all references to ensure their dependents are lexicographically ordered - for (_, reference) in graph.refs.as_ref() { + for reference in graph.refs.as_ref().values() { if reference.dependents.len() > 1 { // Check that dependents are ordered by name for i in 0..reference.dependents.len() - 1 { diff --git a/offline-web-storage/src/lib.rs b/offline-web-storage/src/lib.rs index c3c6ff1..caf4a5b 100644 --- a/offline-web-storage/src/lib.rs +++ b/offline-web-storage/src/lib.rs @@ -145,44 +145,85 @@ impl ReferenceStore { })) } - fn get_dependents<'a>( - &'a self, - parent_id: &'a str, - ) -> std::pin::Pin>>> + Send + 'a>> - { - Box::pin(async move { - let rows = sqlx::query( - r#" - SELECT r.id, r.content_address, r.name + async fn get_dependents(&self, parent_id: &str) -> Result>> { + // Get all dependents at once using a recursive CTE (Common Table Expression) + let rows = sqlx::query( + r#" + WITH RECURSIVE dependent_tree AS ( + -- Base case: direct dependents + SELECT r.id, r.content_address, r.name, rd.parent_id, 0 as level FROM refs r JOIN reference_dependencies rd ON r.id = rd.dependent_id WHERE rd.parent_id = ? - ORDER BY r.name - "#, + + UNION ALL + + -- Recursive case: dependents of dependents + SELECT r.id, r.content_address, r.name, rd.parent_id, dt.level + 1 + FROM refs r + JOIN reference_dependencies rd ON r.id = rd.dependent_id + JOIN dependent_tree dt ON rd.parent_id = dt.id ) - .bind(parent_id) - .fetch_all(&self.pool) - .await?; + SELECT id, content_address, name, parent_id, level + FROM dependent_tree + ORDER BY level, name + "#, + ) + .bind(parent_id) + .fetch_all(&self.pool) + .await?; - let mut dependents = Vec::new(); - for row in rows { - let id: String = row.get("id"); - let content_address: Option = row.get("content_address"); - let name: String = row.get("name"); + // Build the tree structure from the flattened results + let mut refs_map: std::collections::HashMap> = std::collections::HashMap::new(); + let mut children_map: std::collections::HashMap> = std::collections::HashMap::new(); - // Recursively get dependents for each dependent - let nested_dependents = self.get_dependents(&id).await?; + // First pass: create all references and track relationships + for row in &rows { + let id: String = row.get("id"); + let content_address: Option = row.get("content_address"); + let name: String = row.get("name"); + let parent_id: String = row.get("parent_id"); - dependents.push(Arc::new(Reference { - id, - content_address, - name, - dependents: nested_dependents, - })); + // Create reference without dependents first + let reference = Arc::new(Reference { + id: id.clone(), + content_address, + name, + dependents: Vec::new(), + }); + + refs_map.insert(id.clone(), reference); + children_map.entry(parent_id).or_default().push(id); + } + + // Second pass: build the tree by adding dependents to each reference + fn build_dependents( + ref_id: &str, + refs_map: &std::collections::HashMap>, + children_map: &std::collections::HashMap>, + ) -> Vec> { + if let Some(child_ids) = children_map.get(ref_id) { + let mut dependents = Vec::new(); + for child_id in child_ids { + if let Some(child_ref) = refs_map.get(child_id) { + let nested_dependents = build_dependents(child_id, refs_map, children_map); + let updated_child = Arc::new(Reference { + id: child_ref.id.clone(), + content_address: child_ref.content_address.clone(), + name: child_ref.name.clone(), + dependents: nested_dependents, + }); + dependents.push(updated_child); + } + } + dependents.sort_by(|a, b| a.name.cmp(&b.name)); + dependents + } else { + Vec::new() } + } - Ok(dependents) - }) + Ok(build_dependents(parent_id, &refs_map, &children_map)) } pub async fn get_references_by_name(&self, name: &str) -> Result> { @@ -253,7 +294,7 @@ impl ReferenceStore { ) -> Result<()> { let mut tx = self.pool.begin().await?; - for (_, reference) in updated_references { + for reference in updated_references.values() { // Update the reference sqlx::query( r#"