From 79cbe07f96ac8ba60a5eca100c7016fbe59edb80 Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Wed, 22 Jan 2025 12:04:36 -0500 Subject: [PATCH 01/11] feat: display cycle graph in error message (wip) --- crates/turborepo-graph-utils/src/lib.rs | 147 +++++++++++++++++- .../src/package_graph/mod.rs | 2 +- 2 files changed, 142 insertions(+), 7 deletions(-) diff --git a/crates/turborepo-graph-utils/src/lib.rs b/crates/turborepo-graph-utils/src/lib.rs index 2304e08fe277c..71f870aa23238 100644 --- a/crates/turborepo-graph-utils/src/lib.rs +++ b/crates/turborepo-graph-utils/src/lib.rs @@ -11,8 +11,11 @@ use thiserror::Error; #[derive(Debug, Error)] pub enum Error { - #[error("Cyclic dependency detected:\n{0}")] - CyclicDependencies(String), + #[error("Cyclic dependency detected:\n{cycle_lines}\nDot file saved to {dot:?}")] + CyclicDependencies { + cycle_lines: String, + dot: Option, + }, #[error("{0} depends on itself")] SelfDependency(String), } @@ -42,9 +45,88 @@ pub fn transitive_closure(graph: &Graph) -> Result<(), Error> { +pub fn some_smart_name(graph: &Graph) -> Option>> { + let sccs = petgraph::algo::tarjan_scc(graph) + .into_iter() + .filter(|cycle| cycle.len() > 1) + .collect::>(); + if sccs.is_empty() { + return None; + } + let subgraphs = sccs + .iter() + .map(|scc| { + let mut subgraph = graph.clone(); + subgraph.retain_nodes(|_, node| scc.contains(&node)); + subgraph + }) + .collect::>(); + + None +} + +/// Given a graph that has a cycle, return all minimal sets of edges that result +/// in a cycle no longer being present. +/// Minimal here means that if the cycle can be broken by only removing n edges, +/// then only sets containing n edges will be returned. +fn edges_to_break_cycle(graph: &Graph) -> Vec> { + let edges = graph.edge_indices().collect::>(); + let edge_sets = edges.iter().copied().powerset(); + let mut breaking_edge_sets = Vec::new(); + + let mut minimal_break_point = usize::MAX; + for edge_set in edge_sets { + let set_size = edge_set.len(); + if set_size > minimal_break_point { + break; + } + let mut trimmed_graph = graph.clone(); + for edge in &edge_set { + trimmed_graph.remove_edge(*edge); + } + + let is_cyclic = petgraph::algo::tarjan_scc(&trimmed_graph) + .into_iter() + .filter(|scc| scc.len() > 1) + .count() + > 0; + if !is_cyclic { + minimal_break_point = set_size; + breaking_edge_sets.push(edge_set.into_iter().collect()); + } + } + + breaking_edge_sets +} + +pub fn validate_graph( + graph: &Graph, +) -> Result<(), Error> { // This is equivalent to AcyclicGraph.Cycles from Go's dag library - let cycles_lines = petgraph::algo::tarjan_scc(&graph) + let sccs = petgraph::algo::tarjan_scc(graph); + let all_nodes_in_cycle = sccs + .iter() + .filter(|cycle| cycle.len() > 1) + .flatten() + .copied() + .collect::>(); + let dot = (!all_nodes_in_cycle.is_empty()) + .then(|| { + let mut subgraph = graph.clone(); + subgraph.retain_nodes(|_, node| all_nodes_in_cycle.contains(&node)); + let dot = format!( + "{:?}", + petgraph::dot::Dot::with_config(&subgraph, &[petgraph::dot::Config::EdgeNoLabel]) + ); + let tmpdir = std::env::temp_dir(); + let dot_path = tmpdir.join("cycle.dot"); + std::fs::write(&dot_path, dot).ok()?; + let lossy_path = dot_path.to_string_lossy(); + Some(lossy_path.into_owned()) + }) + .flatten(); + + let cycle_lines = sccs .into_iter() .filter(|cycle| cycle.len() > 1) .map(|cycle| { @@ -53,8 +135,8 @@ pub fn validate_graph(graph: &Graph) -> Result<(), Error> { }) .join("\n"); - if !cycles_lines.is_empty() { - return Err(Error::CyclicDependencies(cycles_lines)); + if !cycle_lines.is_empty() { + return Err(Error::CyclicDependencies { cycle_lines, dot }); } for edge in graph.edge_references() { @@ -107,4 +189,57 @@ mod test { d, c, b, a "###); } + + #[test] + fn test_basic_cycle_break() { + // Simple cycle where any edge would break the cycle + let mut g = Graph::new(); + let a = g.add_node("a"); + let b = g.add_node("b"); + let c = g.add_node("c"); + + let a_b = g.add_edge(a, b, ()); + let b_c = g.add_edge(b, c, ()); + let c_a = g.add_edge(c, a, ()); + + let breaks = edges_to_break_cycle(&g); + assert_eq!(breaks.len(), 3, "{:?}", breaks); + let mut edges_that_break = HashSet::new(); + for brk in breaks { + assert_eq!(brk.len(), 1); + edges_that_break.extend(brk.into_iter()); + } + assert_eq!( + edges_that_break, + [a_b, b_c, c_a].iter().copied().collect::>() + ); + } + + #[test] + fn test_double_cycle_break() { + // 2 cycles where only one edge would break the cycle + let mut g = Graph::new(); + let a = g.add_node("a"); + let b = g.add_node("b"); + let c = g.add_node("c"); + let d = g.add_node("d"); + + // Cycle 1 + let a_b = g.add_edge(a, b, ()); + let b_c = g.add_edge(b, c, ()); + let c_a = g.add_edge(c, a, ()); + + // Cycle 2 + let b_d = g.add_edge(b, d, ()); + let d_a = g.add_edge(d, a, ()); + + let breaks = edges_to_break_cycle(&g); + assert_eq!(breaks.len(), 1, "{:?}", breaks); + assert_eq!(breaks.into_iter().flatten().exactly_one().unwrap(), a_b); + } + + #[test] + fn test_cycle_break_two_edges() { + // cycle where only 2 edges required to break the cycle + } } diff --git a/crates/turborepo-repository/src/package_graph/mod.rs b/crates/turborepo-repository/src/package_graph/mod.rs index 94615ddffa747..aae4acd04e514 100644 --- a/crates/turborepo-repository/src/package_graph/mod.rs +++ b/crates/turborepo-repository/src/package_graph/mod.rs @@ -906,7 +906,7 @@ mod test { assert_matches!( pkg_graph.validate(), Err(builder::Error::InvalidPackageGraph( - graph::Error::CyclicDependencies(_) + graph::Error::CyclicDependencies { .. } )) ); } From 5687c5a84ba8f815234ee1eb581c6ddf1f5fc3c1 Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Fri, 24 Jan 2025 11:22:07 -0500 Subject: [PATCH 02/11] chore(graph): suggest cuts to remove cycle --- crates/turborepo-graph-utils/src/lib.rs | 166 +++++++++++------- .../visitor/.command.rs.pending-snap | 5 - .../src/.lib.rs.pending-snap | 3 - .../src/package_graph/builder.rs | 2 +- 4 files changed, 102 insertions(+), 74 deletions(-) delete mode 100644 crates/turborepo-lib/src/task_graph/visitor/.command.rs.pending-snap delete mode 100644 crates/turborepo-microfrontends/src/.lib.rs.pending-snap diff --git a/crates/turborepo-graph-utils/src/lib.rs b/crates/turborepo-graph-utils/src/lib.rs index 71f870aa23238..6ed61942bb21d 100644 --- a/crates/turborepo-graph-utils/src/lib.rs +++ b/crates/turborepo-graph-utils/src/lib.rs @@ -11,11 +11,8 @@ use thiserror::Error; #[derive(Debug, Error)] pub enum Error { - #[error("Cyclic dependency detected:\n{cycle_lines}\nDot file saved to {dot:?}")] - CyclicDependencies { - cycle_lines: String, - dot: Option, - }, + #[error("Cyclic dependency detected:\n{cycle_lines}")] + CyclicDependencies { cycle_lines: String }, #[error("{0} depends on itself")] SelfDependency(String), } @@ -45,31 +42,42 @@ pub fn transitive_closure(graph: &Graph) -> Option>> { +pub struct Cycle { + pub nodes: Vec, + pub cuts: Vec>, +} + +/// Given a graph will look for cycles and edge sets that will remove the cycle. +/// +/// If any cycles are found they will be returned alongside a list of sets of +/// edges that if they are cut will result in the graph no longer having a +/// cycle. +/// We return (N, N) tuples instead of edge indexes as indexes are not stable +/// across removals so the indexes in the subgraph do not match the full graph. +pub fn cycles_and_cut_candidates( + graph: &Graph, +) -> Vec> { let sccs = petgraph::algo::tarjan_scc(graph) .into_iter() .filter(|cycle| cycle.len() > 1) .collect::>(); - if sccs.is_empty() { - return None; - } - let subgraphs = sccs - .iter() - .map(|scc| { + sccs.into_iter() + .map(|nodes| { let mut subgraph = graph.clone(); - subgraph.retain_nodes(|_, node| scc.contains(&node)); - subgraph + subgraph.retain_nodes(|_, node| nodes.contains(&node)); + let cuts = edges_to_break_cycle(&subgraph); + Cycle { nodes, cuts } }) - .collect::>(); - - None + .collect() } /// Given a graph that has a cycle, return all minimal sets of edges that result /// in a cycle no longer being present. /// Minimal here means that if the cycle can be broken by only removing n edges, /// then only sets containing n edges will be returned. -fn edges_to_break_cycle(graph: &Graph) -> Vec> { +fn edges_to_break_cycle( + graph: &Graph, +) -> Vec> { let edges = graph.edge_indices().collect::>(); let edge_sets = edges.iter().copied().powerset(); let mut breaking_edge_sets = Vec::new(); @@ -81,9 +89,7 @@ fn edges_to_break_cycle(graph: &Graph) -> Vec(graph: &Graph) -> Vec 0; if !is_cyclic { minimal_break_point = set_size; - breaking_edge_sets.push(edge_set.into_iter().collect()); + breaking_edge_sets.push( + edge_set + .into_iter() + .map(|edge| { + let (src, dst) = graph.edge_endpoints(edge).unwrap(); + ( + graph.node_weight(src).unwrap().clone(), + graph.node_weight(dst).unwrap().clone(), + ) + }) + .collect(), + ); } } breaking_edge_sets } -pub fn validate_graph( - graph: &Graph, -) -> Result<(), Error> { - // This is equivalent to AcyclicGraph.Cycles from Go's dag library - let sccs = petgraph::algo::tarjan_scc(graph); - let all_nodes_in_cycle = sccs - .iter() - .filter(|cycle| cycle.len() > 1) - .flatten() - .copied() - .collect::>(); - let dot = (!all_nodes_in_cycle.is_empty()) - .then(|| { - let mut subgraph = graph.clone(); - subgraph.retain_nodes(|_, node| all_nodes_in_cycle.contains(&node)); - let dot = format!( - "{:?}", - petgraph::dot::Dot::with_config(&subgraph, &[petgraph::dot::Config::EdgeNoLabel]) - ); - let tmpdir = std::env::temp_dir(); - let dot_path = tmpdir.join("cycle.dot"); - std::fs::write(&dot_path, dot).ok()?; - let lossy_path = dot_path.to_string_lossy(); - Some(lossy_path.into_owned()) - }) - .flatten(); +pub fn validate_graph(graph: &Graph) -> Result<(), Error> { + let cycles = cycles_and_cut_candidates(graph); - let cycle_lines = sccs + let cycle_lines = cycles .into_iter() - .filter(|cycle| cycle.len() > 1) - .map(|cycle| { - let workspaces = cycle.into_iter().map(|id| graph.node_weight(id).unwrap()); - format!("\t{}", workspaces.format(", ")) + .map(|Cycle { nodes, cuts }| { + let workspaces = nodes.into_iter().map(|id| graph.node_weight(id).unwrap()); + let cuts = cuts.into_iter().map(format_cut).format("\n\t"); + format!( + "\t{}\n\tThe cycle can be broken by removing any of these sets of \ + dependencies:\n\t{cuts}", + workspaces.format(", ") + ) }) .join("\n"); if !cycle_lines.is_empty() { - return Err(Error::CyclicDependencies { cycle_lines, dot }); + return Err(Error::CyclicDependencies { cycle_lines }); } for edge in graph.edge_references() { @@ -151,6 +148,16 @@ pub fn validate_graph( Ok(()) } +fn format_cut(edges: impl IntoIterator) -> String { + let edges = edges + .into_iter() + .map(|(src, dst)| format!("{src} -> {dst}")) + .sorted() + .format(", "); + + format!("{{{edges}}}") +} + pub use walker::{WalkMessage, Walker}; #[cfg(test)] @@ -187,6 +194,8 @@ mod test { assert_snapshot!(err.to_string(), @r###" Cyclic dependency detected: d, c, b, a + The cycle can be broken by removing any of these sets of dependencies: + {b -> c} "###); } @@ -198,9 +207,9 @@ mod test { let b = g.add_node("b"); let c = g.add_node("c"); - let a_b = g.add_edge(a, b, ()); - let b_c = g.add_edge(b, c, ()); - let c_a = g.add_edge(c, a, ()); + g.add_edge(a, b, ()); + g.add_edge(b, c, ()); + g.add_edge(c, a, ()); let breaks = edges_to_break_cycle(&g); assert_eq!(breaks.len(), 3, "{:?}", breaks); @@ -211,7 +220,10 @@ mod test { } assert_eq!( edges_that_break, - [a_b, b_c, c_a].iter().copied().collect::>() + [("a", "b"), ("b", "c"), ("c", "a")] + .iter() + .copied() + .collect::>() ); } @@ -225,21 +237,45 @@ mod test { let d = g.add_node("d"); // Cycle 1 - let a_b = g.add_edge(a, b, ()); - let b_c = g.add_edge(b, c, ()); - let c_a = g.add_edge(c, a, ()); + g.add_edge(a, b, ()); + g.add_edge(b, c, ()); + g.add_edge(c, a, ()); // Cycle 2 - let b_d = g.add_edge(b, d, ()); - let d_a = g.add_edge(d, a, ()); + g.add_edge(b, d, ()); + g.add_edge(d, a, ()); let breaks = edges_to_break_cycle(&g); assert_eq!(breaks.len(), 1, "{:?}", breaks); - assert_eq!(breaks.into_iter().flatten().exactly_one().unwrap(), a_b); + assert_eq!( + breaks.into_iter().flatten().exactly_one().unwrap(), + ("a", "b") + ); } #[test] fn test_cycle_break_two_edges() { - // cycle where only 2 edges required to break the cycle + // cycle where multiple edges required to break the cycle + // a,b,c form a cycle with + // a <-> c and b <-> c + let mut g = Graph::new(); + let a = g.add_node("a"); + let b = g.add_node("b"); + let c = g.add_node("c"); + + g.add_edge(a, b, ()); + g.add_edge(b, c, ()); + g.add_edge(c, a, ()); + g.add_edge(a, c, ()); + g.add_edge(c, b, ()); + + let breaks = edges_to_break_cycle(&g); + assert_eq!(breaks.len(), 3, "{:?}", breaks); + let expected_1: HashSet<_> = [("b", "c"), ("a", "c")].iter().copied().collect(); + let expected_2: HashSet<_> = [("b", "c"), ("c", "a")].iter().copied().collect(); + let expected_3: HashSet<_> = [("c", "b"), ("c", "a")].iter().copied().collect(); + assert!(breaks.contains(&expected_1), "{breaks:?}"); + assert!(breaks.contains(&expected_2), "{breaks:?}"); + assert!(breaks.contains(&expected_3), "{breaks:?}"); } } diff --git a/crates/turborepo-lib/src/task_graph/visitor/.command.rs.pending-snap b/crates/turborepo-lib/src/task_graph/visitor/.command.rs.pending-snap deleted file mode 100644 index 367ad859f8f98..0000000000000 --- a/crates/turborepo-lib/src/task_graph/visitor/.command.rs.pending-snap +++ /dev/null @@ -1,5 +0,0 @@ -{"run_id":"1737179308-428315000","line":309,"new":{"module_name":"turborepo_lib__task_graph__visitor__command__test","snapshot_name":"error_short_circuits_factory","metadata":{"source":"crates/turborepo-lib/src/task_graph/visitor/command.rs","assertion_line":309,"expression":"cmd.to_string()"},"snapshot":"Internal errors encountered: oops!"},"old":{"module_name":"turborepo_lib__task_graph__visitor__command__test","metadata":{},"snapshot":"internal errors encountered: oops!"}} -{"run_id":"1737179538-131428000","line":309,"new":null,"old":null} -{"run_id":"1737179603-300467000","line":309,"new":null,"old":null} -{"run_id":"1737179676-323346000","line":309,"new":null,"old":null} -{"run_id":"1737179741-977012000","line":309,"new":null,"old":null} diff --git a/crates/turborepo-microfrontends/src/.lib.rs.pending-snap b/crates/turborepo-microfrontends/src/.lib.rs.pending-snap deleted file mode 100644 index 883186b1f2fd9..0000000000000 --- a/crates/turborepo-microfrontends/src/.lib.rs.pending-snap +++ /dev/null @@ -1,3 +0,0 @@ -{"run_id":"1737179605-821842000","line":208,"new":{"module_name":"turborepo_microfrontends__test","snapshot_name":"unsupported_version","metadata":{"source":"crates/turborepo-microfrontends/src/lib.rs","assertion_line":208,"expression":"err"},"snapshot":"Unsupported microfrontends configuration version: yolo. Supported versions: [\"1\", \"2\"]"},"old":{"module_name":"turborepo_microfrontends__test","metadata":{},"snapshot":"Unsupported micro-frontends configuration version: yolo. Supported versions: [\"1\", \"2\"]"}} -{"run_id":"1737179677-885648000","line":208,"new":{"module_name":"turborepo_microfrontends__test","snapshot_name":"unsupported_version","metadata":{"source":"crates/turborepo-microfrontends/src/lib.rs","assertion_line":208,"expression":"err"},"snapshot":"Unsupported microfrontends configuration version: yolo. Supported versions: [\"1\", \"2\"]"},"old":{"module_name":"turborepo_microfrontends__test","metadata":{},"snapshot":"Unsupported micro-frontends configuration version: yolo. Supported versions: [\"1\", \"2\"]"}} -{"run_id":"1737179744-116940000","line":208,"new":null,"old":null} diff --git a/crates/turborepo-repository/src/package_graph/builder.rs b/crates/turborepo-repository/src/package_graph/builder.rs index d3ed7bf35c053..526c49fe9e318 100644 --- a/crates/turborepo-repository/src/package_graph/builder.rs +++ b/crates/turborepo-repository/src/package_graph/builder.rs @@ -51,7 +51,7 @@ pub enum Error { PackageJson(#[from] crate::package_json::Error), #[error("package.json must have a name field:\n{0}")] PackageJsonMissingName(AbsoluteSystemPathBuf), - #[error("Invalid package dependency graph: {0}")] + #[error("Invalid package dependency graph:")] InvalidPackageGraph(#[source] graph::Error), #[error(transparent)] Lockfile(#[from] turborepo_lockfiles::Error), From 00549fb6248647df64c682fe6fdf136263ab3a45 Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Fri, 24 Jan 2025 14:39:39 -0500 Subject: [PATCH 03/11] chore: better error formatting --- crates/turborepo-graph-utils/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/turborepo-graph-utils/src/lib.rs b/crates/turborepo-graph-utils/src/lib.rs index 6ed61942bb21d..6ca6cd1710924 100644 --- a/crates/turborepo-graph-utils/src/lib.rs +++ b/crates/turborepo-graph-utils/src/lib.rs @@ -125,7 +125,7 @@ pub fn validate_graph(graph: &Graph) -> R let workspaces = nodes.into_iter().map(|id| graph.node_weight(id).unwrap()); let cuts = cuts.into_iter().map(format_cut).format("\n\t"); format!( - "\t{}\n\tThe cycle can be broken by removing any of these sets of \ + "\t{}\nThe cycle can be broken by removing any of these sets of \ dependencies:\n\t{cuts}", workspaces.format(", ") ) @@ -194,7 +194,7 @@ mod test { assert_snapshot!(err.to_string(), @r###" Cyclic dependency detected: d, c, b, a - The cycle can be broken by removing any of these sets of dependencies: + The cycle can be broken by removing any of these sets of dependencies: {b -> c} "###); } From cbc723a54d2213fcadffad60a9fdbae99c3d96b3 Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Fri, 24 Jan 2025 19:24:22 -0500 Subject: [PATCH 04/11] chore: add bench for cycle breaking --- .../examples/find_cuts.rs | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 crates/turborepo-graph-utils/examples/find_cuts.rs diff --git a/crates/turborepo-graph-utils/examples/find_cuts.rs b/crates/turborepo-graph-utils/examples/find_cuts.rs new file mode 100644 index 0000000000000..ec99882f5f242 --- /dev/null +++ b/crates/turborepo-graph-utils/examples/find_cuts.rs @@ -0,0 +1,30 @@ +use itertools::Itertools as _; +use petgraph::Graph; +use turborepo_graph_utils::cycles_and_cut_candidates; + +fn main() { + let size: usize = std::env::args().nth(1).unwrap().parse().unwrap(); + let g = generate_graph(size); + let cycles = cycles_and_cut_candidates(&g); + println!("found {} cycles", cycles.len()); + for (i, mut cycle) in cycles.into_iter().enumerate() { + let cut_size = cycle.cuts.pop().unwrap_or_default().len(); + println!("cycle {i} needs {cut_size} cuts to be removed"); + } +} + +// Generates a fully connected graph +fn generate_graph(size: usize) -> Graph { + let mut g = Graph::new(); + let nodes = (0..size) + .map(|i| g.add_node(i.to_string())) + .collect::>(); + + for (s, d) in nodes.iter().cartesian_product(nodes.iter()) { + if s != d { + g.add_edge(*s, *d, ()); + } + } + + g +} From aadb4b1ff395d1c055b48e8ba9cc0980a28492d2 Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Fri, 24 Jan 2025 19:35:40 -0500 Subject: [PATCH 05/11] perf(graph): detect cycles slightly faster --- crates/turborepo-graph-utils/src/lib.rs | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/crates/turborepo-graph-utils/src/lib.rs b/crates/turborepo-graph-utils/src/lib.rs index 6ca6cd1710924..757b97046b267 100644 --- a/crates/turborepo-graph-utils/src/lib.rs +++ b/crates/turborepo-graph-utils/src/lib.rs @@ -5,7 +5,7 @@ use std::{collections::HashSet, fmt::Display, hash::Hash}; use itertools::Itertools; use petgraph::{ prelude::*, - visit::{depth_first_search, Reversed}, + visit::{depth_first_search, IntoNeighbors, Reversed, Visitable}, }; use thiserror::Error; @@ -91,11 +91,7 @@ fn edges_to_break_cycle( let mut trimmed_graph = graph.clone(); trimmed_graph.retain_edges(|_, edge| !edge_set.contains(&edge)); - let is_cyclic = petgraph::algo::tarjan_scc(&trimmed_graph) - .into_iter() - .filter(|scc| scc.len() > 1) - .count() - > 0; + let is_cyclic = has_cycle(&trimmed_graph, trimmed_graph.node_indices()); if !is_cyclic { minimal_break_point = set_size; breaking_edge_sets.push( @@ -158,6 +154,23 @@ fn format_cut(edges: impl IntoIterator) -> String { format!("{{{edges}}}") } +// A fast failing DFS approach to detecting if there is a cycle left in the +// graph +fn has_cycle(graph: G, starts: I) -> bool +where + G: IntoNeighbors + Visitable, + I: IntoIterator, +{ + let result = petgraph::visit::depth_first_search(graph, starts, |event| match event { + petgraph::visit::DfsEvent::BackEdge(_, _) => Err(()), + _ => Ok(()), + }); + match result { + Ok(()) => false, + Err(()) => true, + } +} + pub use walker::{WalkMessage, Walker}; #[cfg(test)] From 467c1ffdc5c3f3c68ef6696be2fcbfd6ce5d17dc Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Sat, 25 Jan 2025 10:35:27 -0500 Subject: [PATCH 06/11] perf(graph): filter graph instead of cloning --- crates/turborepo-graph-utils/src/lib.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/turborepo-graph-utils/src/lib.rs b/crates/turborepo-graph-utils/src/lib.rs index 757b97046b267..68b7a846159f8 100644 --- a/crates/turborepo-graph-utils/src/lib.rs +++ b/crates/turborepo-graph-utils/src/lib.rs @@ -5,7 +5,7 @@ use std::{collections::HashSet, fmt::Display, hash::Hash}; use itertools::Itertools; use petgraph::{ prelude::*, - visit::{depth_first_search, IntoNeighbors, Reversed, Visitable}, + visit::{depth_first_search, EdgeFiltered, IntoNeighbors, Reversed, Visitable}, }; use thiserror::Error; @@ -88,10 +88,9 @@ fn edges_to_break_cycle( if set_size > minimal_break_point { break; } - let mut trimmed_graph = graph.clone(); - trimmed_graph.retain_edges(|_, edge| !edge_set.contains(&edge)); + let trimmed_graph = EdgeFiltered::from_fn(graph, |edge| !edge_set.contains(&edge.id())); - let is_cyclic = has_cycle(&trimmed_graph, trimmed_graph.node_indices()); + let is_cyclic = has_cycle(&trimmed_graph, trimmed_graph.0.node_indices()); if !is_cyclic { minimal_break_point = set_size; breaking_edge_sets.push( From 63989775162258b682544fed194d145037eb3dfe Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Sat, 25 Jan 2025 11:42:34 -0500 Subject: [PATCH 07/11] chore: remove temp var --- crates/turborepo-graph-utils/src/lib.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/turborepo-graph-utils/src/lib.rs b/crates/turborepo-graph-utils/src/lib.rs index 68b7a846159f8..23658a21cad74 100644 --- a/crates/turborepo-graph-utils/src/lib.rs +++ b/crates/turborepo-graph-utils/src/lib.rs @@ -57,11 +57,9 @@ pub struct Cycle { pub fn cycles_and_cut_candidates( graph: &Graph, ) -> Vec> { - let sccs = petgraph::algo::tarjan_scc(graph) + petgraph::algo::tarjan_scc(graph) .into_iter() .filter(|cycle| cycle.len() > 1) - .collect::>(); - sccs.into_iter() .map(|nodes| { let mut subgraph = graph.clone(); subgraph.retain_nodes(|_, node| nodes.contains(&node)); From c109403404c2e5dc94df0ce1e02f69f263076381 Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Sat, 25 Jan 2025 13:17:15 -0500 Subject: [PATCH 08/11] perf(graph): avoid allocating new visit map for each dfs --- Cargo.lock | 1 + crates/turborepo-graph-utils/Cargo.toml | 1 + .../examples/find_cuts.rs | 6 +- crates/turborepo-graph-utils/src/lib.rs | 72 ++++++++++++++----- 4 files changed, 63 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8dd037cedb45f..73949e834b4cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6361,6 +6361,7 @@ dependencies = [ name = "turborepo-graph-utils" version = "0.1.0" dependencies = [ + "fixedbitset", "futures", "insta", "itertools 0.10.5", diff --git a/crates/turborepo-graph-utils/Cargo.toml b/crates/turborepo-graph-utils/Cargo.toml index cfc319d9e786e..1cac6e930785e 100644 --- a/crates/turborepo-graph-utils/Cargo.toml +++ b/crates/turborepo-graph-utils/Cargo.toml @@ -8,6 +8,7 @@ license = "MIT" workspace = true [dependencies] +fixedbitset = "0.4.2" futures = "0.3.26" itertools = { workspace = true } log = "0.4.20" diff --git a/crates/turborepo-graph-utils/examples/find_cuts.rs b/crates/turborepo-graph-utils/examples/find_cuts.rs index ec99882f5f242..9ab548d8e5592 100644 --- a/crates/turborepo-graph-utils/examples/find_cuts.rs +++ b/crates/turborepo-graph-utils/examples/find_cuts.rs @@ -3,7 +3,7 @@ use petgraph::Graph; use turborepo_graph_utils::cycles_and_cut_candidates; fn main() { - let size: usize = std::env::args().nth(1).unwrap().parse().unwrap(); + let size: usize = cli_size().unwrap_or(6); let g = generate_graph(size); let cycles = cycles_and_cut_candidates(&g); println!("found {} cycles", cycles.len()); @@ -13,6 +13,10 @@ fn main() { } } +fn cli_size() -> Option { + std::env::args().nth(1)?.parse().ok() +} + // Generates a fully connected graph fn generate_graph(size: usize) -> Graph { let mut g = Graph::new(); diff --git a/crates/turborepo-graph-utils/src/lib.rs b/crates/turborepo-graph-utils/src/lib.rs index 23658a21cad74..99401d09e04d9 100644 --- a/crates/turborepo-graph-utils/src/lib.rs +++ b/crates/turborepo-graph-utils/src/lib.rs @@ -2,10 +2,11 @@ mod walker; use std::{collections::HashSet, fmt::Display, hash::Hash}; +use fixedbitset::FixedBitSet; use itertools::Itertools; use petgraph::{ prelude::*, - visit::{depth_first_search, EdgeFiltered, IntoNeighbors, Reversed, Visitable}, + visit::{depth_first_search, EdgeFiltered, IntoNeighbors, Reversed, VisitMap, Visitable}, }; use thiserror::Error; @@ -80,6 +81,9 @@ fn edges_to_break_cycle( let edge_sets = edges.iter().copied().powerset(); let mut breaking_edge_sets = Vec::new(); + // For each DFS + let mut cycle_detector = CycleDetector::new(graph); + let mut minimal_break_point = usize::MAX; for edge_set in edge_sets { let set_size = edge_set.len(); @@ -88,7 +92,7 @@ fn edges_to_break_cycle( } let trimmed_graph = EdgeFiltered::from_fn(graph, |edge| !edge_set.contains(&edge.id())); - let is_cyclic = has_cycle(&trimmed_graph, trimmed_graph.0.node_indices()); + let is_cyclic = cycle_detector.has_cycle(&trimmed_graph, trimmed_graph.0.node_indices()); if !is_cyclic { minimal_break_point = set_size; breaking_edge_sets.push( @@ -151,20 +155,56 @@ fn format_cut(edges: impl IntoIterator) -> String { format!("{{{edges}}}") } -// A fast failing DFS approach to detecting if there is a cycle left in the -// graph -fn has_cycle(graph: G, starts: I) -> bool -where - G: IntoNeighbors + Visitable, - I: IntoIterator, -{ - let result = petgraph::visit::depth_first_search(graph, starts, |event| match event { - petgraph::visit::DfsEvent::BackEdge(_, _) => Err(()), - _ => Ok(()), - }); - match result { - Ok(()) => false, - Err(()) => true, +struct CycleDetector { + visited: FixedBitSet, + finished: FixedBitSet, +} + +impl CycleDetector { + fn new(graph: &Graph) -> CycleDetector { + let visited = graph.visit_map(); + let finished = graph.visit_map(); + Self { visited, finished } + } + + // A fast failing DFS approach to detecting if there is a cycle left in the + // graph + // Used over `petgraph::visit::depth_first_search` as it allows us to reuse + // visit maps. + fn has_cycle(&mut self, graph: G, starts: I) -> bool + where + G: IntoNeighbors + Visitable, + I: IntoIterator, + { + self.visited.clear(); + self.finished.clear(); + for start in starts { + if Self::dfs(graph, start, &mut self.visited, &mut self.finished) { + return true; + } + } + return false; + } + + fn dfs(graph: G, u: G::NodeId, visited: &mut G::Map, finished: &mut G::Map) -> bool + where + G: IntoNeighbors + Visitable, + { + // We have already completed a DFS from this node + if finished.is_visited(&u) { + return false; + } + // If not the first visit we have a cycle + if !visited.visit(u) { + return true; + } + for v in graph.neighbors(u) { + if Self::dfs(graph, v, visited, finished) { + return true; + } + } + finished.visit(u); + return false; } } From 7e270d659fbb9ac47a7947e78d89314ba14580d3 Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Mon, 27 Jan 2025 08:46:16 -0500 Subject: [PATCH 09/11] chore(graph): remove temp var --- crates/turborepo-graph-utils/src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/turborepo-graph-utils/src/lib.rs b/crates/turborepo-graph-utils/src/lib.rs index 99401d09e04d9..79745a543af6c 100644 --- a/crates/turborepo-graph-utils/src/lib.rs +++ b/crates/turborepo-graph-utils/src/lib.rs @@ -77,8 +77,7 @@ pub fn cycles_and_cut_candidates( fn edges_to_break_cycle( graph: &Graph, ) -> Vec> { - let edges = graph.edge_indices().collect::>(); - let edge_sets = edges.iter().copied().powerset(); + let edge_sets = graph.edge_indices().powerset(); let mut breaking_edge_sets = Vec::new(); // For each DFS From eb37d8c72f26c87d634936b2ffcde08302a67934 Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Tue, 28 Jan 2025 08:17:24 -0500 Subject: [PATCH 10/11] chore(graph): fix lint --- crates/turborepo-graph-utils/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/turborepo-graph-utils/src/lib.rs b/crates/turborepo-graph-utils/src/lib.rs index 79745a543af6c..0e3d9c1770913 100644 --- a/crates/turborepo-graph-utils/src/lib.rs +++ b/crates/turborepo-graph-utils/src/lib.rs @@ -182,7 +182,7 @@ impl CycleDetector { return true; } } - return false; + false } fn dfs(graph: G, u: G::NodeId, visited: &mut G::Map, finished: &mut G::Map) -> bool @@ -203,7 +203,7 @@ impl CycleDetector { } } finished.visit(u); - return false; + false } } From a2da65ab76cb10230b98cfcdf32bcae3ee2d7a88 Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Tue, 28 Jan 2025 08:25:13 -0500 Subject: [PATCH 11/11] chore: comments about error formatting --- crates/turborepo-graph-utils/src/lib.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/turborepo-graph-utils/src/lib.rs b/crates/turborepo-graph-utils/src/lib.rs index 0e3d9c1770913..02f3ecd957136 100644 --- a/crates/turborepo-graph-utils/src/lib.rs +++ b/crates/turborepo-graph-utils/src/lib.rs @@ -121,7 +121,7 @@ pub fn validate_graph(graph: &Graph) -> R let workspaces = nodes.into_iter().map(|id| graph.node_weight(id).unwrap()); let cuts = cuts.into_iter().map(format_cut).format("\n\t"); format!( - "\t{}\nThe cycle can be broken by removing any of these sets of \ + "\t{}\n\nThe cycle can be broken by removing any of these sets of \ dependencies:\n\t{cuts}", workspaces.format(", ") ) @@ -151,7 +151,7 @@ fn format_cut(edges: impl IntoIterator) -> String { .sorted() .format(", "); - format!("{{{edges}}}") + format!("{{ {edges} }}") } struct CycleDetector { @@ -243,8 +243,9 @@ mod test { assert_snapshot!(err.to_string(), @r###" Cyclic dependency detected: d, c, b, a + The cycle can be broken by removing any of these sets of dependencies: - {b -> c} + { b -> c } "###); }