fix: a more efficient get_dependents query

This commit is contained in:
Jeremy Wall 2025-07-02 16:20:51 -05:00
parent 5f3fa32fa8
commit 3fbeba535c
2 changed files with 77 additions and 36 deletions

View File

@ -10,7 +10,7 @@ fn get_deterministic_candidate(graph: &Graph) -> Arc<Reference> {
.refs
.values()
.filter(|r| r.name != graph.root.name && r.is_leaf())
.map(|r| r.clone())
.cloned()
.collect();
// Sort by name to ensure deterministic ordering
@ -57,7 +57,7 @@ fn test_root_not_a_dependency() {
let root_name = graph.root.name.clone();
// Check all references to ensure none have the root as a dependent
for (_, reference) in graph.refs.as_ref() {
for reference in graph.refs.as_ref().values() {
for dep in &reference.dependents {
assert_ne!(
dep.name, root_name,
@ -88,7 +88,7 @@ fn test_all_nodes_connected_to_root() {
collect_reachable(&graph.root, &mut reachable);
// Check that all nodes in the graph are reachable from the root
for (name, _) in graph.refs.as_ref() {
for name in graph.refs.as_ref().keys() {
assert!(
reachable.contains(name),
"All nodes should be reachable from the root: {}",
@ -273,12 +273,12 @@ fn test_reference_ids_stable_regardless_of_dependency_order() {
let base_ref = Reference::new(Some(String::from("base_content")), String::from("/base"));
// Test multiple different orders of adding the same dependencies
let orders = vec![
let orders = [
vec![dep_a.clone(), dep_b.clone(), dep_c.clone(), dep_d.clone()], // alphabetical
vec![dep_d.clone(), dep_c.clone(), dep_b.clone(), dep_a.clone()], // reverse alphabetical
vec![dep_b.clone(), dep_d.clone(), dep_a.clone(), dep_c.clone()], // random order 1
vec![dep_c.clone(), dep_a.clone(), dep_d.clone(), dep_b.clone()], // random order 2
vec![dep_d.clone(), dep_a.clone(), dep_b.clone(), dep_c.clone()], // random order 3
vec![dep_d.clone(), dep_a.clone(), dep_b.clone(), dep_c.clone()],
];
let mut all_ids = Vec::new();
@ -325,7 +325,7 @@ fn test_dependencies_lexicographically_ordered() {
let graph = create_test_graph();
// Check all references to ensure their dependents are lexicographically ordered
for (_, reference) in graph.refs.as_ref() {
for reference in graph.refs.as_ref().values() {
if reference.dependents.len() > 1 {
// Check that dependents are ordered by name
for i in 0..reference.dependents.len() - 1 {

View File

@ -145,44 +145,85 @@ impl ReferenceStore {
}))
}
fn get_dependents<'a>(
&'a self,
parent_id: &'a str,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<Vec<Arc<Reference>>>> + Send + 'a>>
{
Box::pin(async move {
let rows = sqlx::query(
r#"
SELECT r.id, r.content_address, r.name
async fn get_dependents(&self, parent_id: &str) -> Result<Vec<Arc<Reference>>> {
// Get all dependents at once using a recursive CTE (Common Table Expression)
let rows = sqlx::query(
r#"
WITH RECURSIVE dependent_tree AS (
-- Base case: direct dependents
SELECT r.id, r.content_address, r.name, rd.parent_id, 0 as level
FROM refs r
JOIN reference_dependencies rd ON r.id = rd.dependent_id
WHERE rd.parent_id = ?
ORDER BY r.name
"#,
UNION ALL
-- Recursive case: dependents of dependents
SELECT r.id, r.content_address, r.name, rd.parent_id, dt.level + 1
FROM refs r
JOIN reference_dependencies rd ON r.id = rd.dependent_id
JOIN dependent_tree dt ON rd.parent_id = dt.id
)
.bind(parent_id)
.fetch_all(&self.pool)
.await?;
SELECT id, content_address, name, parent_id, level
FROM dependent_tree
ORDER BY level, name
"#,
)
.bind(parent_id)
.fetch_all(&self.pool)
.await?;
let mut dependents = Vec::new();
for row in rows {
let id: String = row.get("id");
let content_address: Option<String> = row.get("content_address");
let name: String = row.get("name");
// Build the tree structure from the flattened results
let mut refs_map: std::collections::HashMap<String, Arc<Reference>> = std::collections::HashMap::new();
let mut children_map: std::collections::HashMap<String, Vec<String>> = std::collections::HashMap::new();
// Recursively get dependents for each dependent
let nested_dependents = self.get_dependents(&id).await?;
// First pass: create all references and track relationships
for row in &rows {
let id: String = row.get("id");
let content_address: Option<String> = row.get("content_address");
let name: String = row.get("name");
let parent_id: String = row.get("parent_id");
dependents.push(Arc::new(Reference {
id,
content_address,
name,
dependents: nested_dependents,
}));
// Create reference without dependents first
let reference = Arc::new(Reference {
id: id.clone(),
content_address,
name,
dependents: Vec::new(),
});
refs_map.insert(id.clone(), reference);
children_map.entry(parent_id).or_default().push(id);
}
// Second pass: build the tree by adding dependents to each reference
fn build_dependents(
ref_id: &str,
refs_map: &std::collections::HashMap<String, Arc<Reference>>,
children_map: &std::collections::HashMap<String, Vec<String>>,
) -> Vec<Arc<Reference>> {
if let Some(child_ids) = children_map.get(ref_id) {
let mut dependents = Vec::new();
for child_id in child_ids {
if let Some(child_ref) = refs_map.get(child_id) {
let nested_dependents = build_dependents(child_id, refs_map, children_map);
let updated_child = Arc::new(Reference {
id: child_ref.id.clone(),
content_address: child_ref.content_address.clone(),
name: child_ref.name.clone(),
dependents: nested_dependents,
});
dependents.push(updated_child);
}
}
dependents.sort_by(|a, b| a.name.cmp(&b.name));
dependents
} else {
Vec::new()
}
}
Ok(dependents)
})
Ok(build_dependents(parent_id, &refs_map, &children_map))
}
pub async fn get_references_by_name(&self, name: &str) -> Result<Vec<Reference>> {
@ -253,7 +294,7 @@ impl ReferenceStore {
) -> Result<()> {
let mut tx = self.pool.begin().await?;
for (_, reference) in updated_references {
for reference in updated_references.values() {
// Update the reference
sqlx::query(
r#"