diff --git a/Cargo.lock b/Cargo.lock index 8dd037cedb45f..73949e834b4cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6361,6 +6361,7 @@ dependencies = [ name = "turborepo-graph-utils" version = "0.1.0" dependencies = [ + "fixedbitset", "futures", "insta", "itertools 0.10.5", diff --git a/crates/turborepo-graph-utils/Cargo.toml b/crates/turborepo-graph-utils/Cargo.toml index cfc319d9e786e..1cac6e930785e 100644 --- a/crates/turborepo-graph-utils/Cargo.toml +++ b/crates/turborepo-graph-utils/Cargo.toml @@ -8,6 +8,7 @@ license = "MIT" workspace = true [dependencies] +fixedbitset = "0.4.2" futures = "0.3.26" itertools = { workspace = true } log = "0.4.20" diff --git a/crates/turborepo-graph-utils/examples/find_cuts.rs b/crates/turborepo-graph-utils/examples/find_cuts.rs new file mode 100644 index 0000000000000..9ab548d8e5592 --- /dev/null +++ b/crates/turborepo-graph-utils/examples/find_cuts.rs @@ -0,0 +1,34 @@ +use itertools::Itertools as _; +use petgraph::Graph; +use turborepo_graph_utils::cycles_and_cut_candidates; + +fn main() { + let size: usize = cli_size().unwrap_or(6); + let g = generate_graph(size); + let cycles = cycles_and_cut_candidates(&g); + println!("found {} cycles", cycles.len()); + for (i, mut cycle) in cycles.into_iter().enumerate() { + let cut_size = cycle.cuts.pop().unwrap_or_default().len(); + println!("cycle {i} needs {cut_size} cuts to be removed"); + } +} + +fn cli_size() -> Option { + std::env::args().nth(1)?.parse().ok() +} + +// Generates a fully connected graph +fn generate_graph(size: usize) -> Graph { + let mut g = Graph::new(); + let nodes = (0..size) + .map(|i| g.add_node(i.to_string())) + .collect::>(); + + for (s, d) in nodes.iter().cartesian_product(nodes.iter()) { + if s != d { + g.add_edge(*s, *d, ()); + } + } + + g +} diff --git a/crates/turborepo-graph-utils/src/lib.rs b/crates/turborepo-graph-utils/src/lib.rs index 2304e08fe277c..02f3ecd957136 100644 --- a/crates/turborepo-graph-utils/src/lib.rs +++ b/crates/turborepo-graph-utils/src/lib.rs @@ -2,17 +2,18 @@ mod walker; use std::{collections::HashSet, fmt::Display, hash::Hash}; +use fixedbitset::FixedBitSet; use itertools::Itertools; use petgraph::{ prelude::*, - visit::{depth_first_search, Reversed}, + visit::{depth_first_search, EdgeFiltered, IntoNeighbors, Reversed, VisitMap, Visitable}, }; use thiserror::Error; #[derive(Debug, Error)] pub enum Error { - #[error("Cyclic dependency detected:\n{0}")] - CyclicDependencies(String), + #[error("Cyclic dependency detected:\n{cycle_lines}")] + CyclicDependencies { cycle_lines: String }, #[error("{0} depends on itself")] SelfDependency(String), } @@ -42,19 +43,93 @@ pub fn transitive_closure(graph: &Graph) -> Result<(), Error> { - // This is equivalent to AcyclicGraph.Cycles from Go's dag library - let cycles_lines = petgraph::algo::tarjan_scc(&graph) +pub struct Cycle { + pub nodes: Vec, + pub cuts: Vec>, +} + +/// Given a graph will look for cycles and edge sets that will remove the cycle. +/// +/// If any cycles are found they will be returned alongside a list of sets of +/// edges that if they are cut will result in the graph no longer having a +/// cycle. +/// We return (N, N) tuples instead of edge indexes as indexes are not stable +/// across removals so the indexes in the subgraph do not match the full graph. +pub fn cycles_and_cut_candidates( + graph: &Graph, +) -> Vec> { + petgraph::algo::tarjan_scc(graph) .into_iter() .filter(|cycle| cycle.len() > 1) - .map(|cycle| { - let workspaces = cycle.into_iter().map(|id| graph.node_weight(id).unwrap()); - format!("\t{}", workspaces.format(", ")) + .map(|nodes| { + let mut subgraph = graph.clone(); + subgraph.retain_nodes(|_, node| nodes.contains(&node)); + let cuts = edges_to_break_cycle(&subgraph); + Cycle { nodes, cuts } + }) + .collect() +} + +/// Given a graph that has a cycle, return all minimal sets of edges that result +/// in a cycle no longer being present. +/// Minimal here means that if the cycle can be broken by only removing n edges, +/// then only sets containing n edges will be returned. +fn edges_to_break_cycle( + graph: &Graph, +) -> Vec> { + let edge_sets = graph.edge_indices().powerset(); + let mut breaking_edge_sets = Vec::new(); + + // For each DFS + let mut cycle_detector = CycleDetector::new(graph); + + let mut minimal_break_point = usize::MAX; + for edge_set in edge_sets { + let set_size = edge_set.len(); + if set_size > minimal_break_point { + break; + } + let trimmed_graph = EdgeFiltered::from_fn(graph, |edge| !edge_set.contains(&edge.id())); + + let is_cyclic = cycle_detector.has_cycle(&trimmed_graph, trimmed_graph.0.node_indices()); + if !is_cyclic { + minimal_break_point = set_size; + breaking_edge_sets.push( + edge_set + .into_iter() + .map(|edge| { + let (src, dst) = graph.edge_endpoints(edge).unwrap(); + ( + graph.node_weight(src).unwrap().clone(), + graph.node_weight(dst).unwrap().clone(), + ) + }) + .collect(), + ); + } + } + + breaking_edge_sets +} + +pub fn validate_graph(graph: &Graph) -> Result<(), Error> { + let cycles = cycles_and_cut_candidates(graph); + + let cycle_lines = cycles + .into_iter() + .map(|Cycle { nodes, cuts }| { + let workspaces = nodes.into_iter().map(|id| graph.node_weight(id).unwrap()); + let cuts = cuts.into_iter().map(format_cut).format("\n\t"); + format!( + "\t{}\n\nThe cycle can be broken by removing any of these sets of \ + dependencies:\n\t{cuts}", + workspaces.format(", ") + ) }) .join("\n"); - if !cycles_lines.is_empty() { - return Err(Error::CyclicDependencies(cycles_lines)); + if !cycle_lines.is_empty() { + return Err(Error::CyclicDependencies { cycle_lines }); } for edge in graph.edge_references() { @@ -69,6 +144,69 @@ pub fn validate_graph(graph: &Graph) -> Result<(), Error> { Ok(()) } +fn format_cut(edges: impl IntoIterator) -> String { + let edges = edges + .into_iter() + .map(|(src, dst)| format!("{src} -> {dst}")) + .sorted() + .format(", "); + + format!("{{ {edges} }}") +} + +struct CycleDetector { + visited: FixedBitSet, + finished: FixedBitSet, +} + +impl CycleDetector { + fn new(graph: &Graph) -> CycleDetector { + let visited = graph.visit_map(); + let finished = graph.visit_map(); + Self { visited, finished } + } + + // A fast failing DFS approach to detecting if there is a cycle left in the + // graph + // Used over `petgraph::visit::depth_first_search` as it allows us to reuse + // visit maps. + fn has_cycle(&mut self, graph: G, starts: I) -> bool + where + G: IntoNeighbors + Visitable, + I: IntoIterator, + { + self.visited.clear(); + self.finished.clear(); + for start in starts { + if Self::dfs(graph, start, &mut self.visited, &mut self.finished) { + return true; + } + } + false + } + + fn dfs(graph: G, u: G::NodeId, visited: &mut G::Map, finished: &mut G::Map) -> bool + where + G: IntoNeighbors + Visitable, + { + // We have already completed a DFS from this node + if finished.is_visited(&u) { + return false; + } + // If not the first visit we have a cycle + if !visited.visit(u) { + return true; + } + for v in graph.neighbors(u) { + if Self::dfs(graph, v, visited, finished) { + return true; + } + } + finished.visit(u); + false + } +} + pub use walker::{WalkMessage, Walker}; #[cfg(test)] @@ -105,6 +243,89 @@ mod test { assert_snapshot!(err.to_string(), @r###" Cyclic dependency detected: d, c, b, a + + The cycle can be broken by removing any of these sets of dependencies: + { b -> c } "###); } + + #[test] + fn test_basic_cycle_break() { + // Simple cycle where any edge would break the cycle + let mut g = Graph::new(); + let a = g.add_node("a"); + let b = g.add_node("b"); + let c = g.add_node("c"); + + g.add_edge(a, b, ()); + g.add_edge(b, c, ()); + g.add_edge(c, a, ()); + + let breaks = edges_to_break_cycle(&g); + assert_eq!(breaks.len(), 3, "{:?}", breaks); + let mut edges_that_break = HashSet::new(); + for brk in breaks { + assert_eq!(brk.len(), 1); + edges_that_break.extend(brk.into_iter()); + } + assert_eq!( + edges_that_break, + [("a", "b"), ("b", "c"), ("c", "a")] + .iter() + .copied() + .collect::>() + ); + } + + #[test] + fn test_double_cycle_break() { + // 2 cycles where only one edge would break the cycle + let mut g = Graph::new(); + let a = g.add_node("a"); + let b = g.add_node("b"); + let c = g.add_node("c"); + let d = g.add_node("d"); + + // Cycle 1 + g.add_edge(a, b, ()); + g.add_edge(b, c, ()); + g.add_edge(c, a, ()); + + // Cycle 2 + g.add_edge(b, d, ()); + g.add_edge(d, a, ()); + + let breaks = edges_to_break_cycle(&g); + assert_eq!(breaks.len(), 1, "{:?}", breaks); + assert_eq!( + breaks.into_iter().flatten().exactly_one().unwrap(), + ("a", "b") + ); + } + + #[test] + fn test_cycle_break_two_edges() { + // cycle where multiple edges required to break the cycle + // a,b,c form a cycle with + // a <-> c and b <-> c + let mut g = Graph::new(); + let a = g.add_node("a"); + let b = g.add_node("b"); + let c = g.add_node("c"); + + g.add_edge(a, b, ()); + g.add_edge(b, c, ()); + g.add_edge(c, a, ()); + g.add_edge(a, c, ()); + g.add_edge(c, b, ()); + + let breaks = edges_to_break_cycle(&g); + assert_eq!(breaks.len(), 3, "{:?}", breaks); + let expected_1: HashSet<_> = [("b", "c"), ("a", "c")].iter().copied().collect(); + let expected_2: HashSet<_> = [("b", "c"), ("c", "a")].iter().copied().collect(); + let expected_3: HashSet<_> = [("c", "b"), ("c", "a")].iter().copied().collect(); + assert!(breaks.contains(&expected_1), "{breaks:?}"); + assert!(breaks.contains(&expected_2), "{breaks:?}"); + assert!(breaks.contains(&expected_3), "{breaks:?}"); + } } diff --git a/crates/turborepo-lib/src/task_graph/visitor/.command.rs.pending-snap b/crates/turborepo-lib/src/task_graph/visitor/.command.rs.pending-snap deleted file mode 100644 index 367ad859f8f98..0000000000000 --- a/crates/turborepo-lib/src/task_graph/visitor/.command.rs.pending-snap +++ /dev/null @@ -1,5 +0,0 @@ -{"run_id":"1737179308-428315000","line":309,"new":{"module_name":"turborepo_lib__task_graph__visitor__command__test","snapshot_name":"error_short_circuits_factory","metadata":{"source":"crates/turborepo-lib/src/task_graph/visitor/command.rs","assertion_line":309,"expression":"cmd.to_string()"},"snapshot":"Internal errors encountered: oops!"},"old":{"module_name":"turborepo_lib__task_graph__visitor__command__test","metadata":{},"snapshot":"internal errors encountered: oops!"}} -{"run_id":"1737179538-131428000","line":309,"new":null,"old":null} -{"run_id":"1737179603-300467000","line":309,"new":null,"old":null} -{"run_id":"1737179676-323346000","line":309,"new":null,"old":null} -{"run_id":"1737179741-977012000","line":309,"new":null,"old":null} diff --git a/crates/turborepo-microfrontends/src/.lib.rs.pending-snap b/crates/turborepo-microfrontends/src/.lib.rs.pending-snap deleted file mode 100644 index 883186b1f2fd9..0000000000000 --- a/crates/turborepo-microfrontends/src/.lib.rs.pending-snap +++ /dev/null @@ -1,3 +0,0 @@ -{"run_id":"1737179605-821842000","line":208,"new":{"module_name":"turborepo_microfrontends__test","snapshot_name":"unsupported_version","metadata":{"source":"crates/turborepo-microfrontends/src/lib.rs","assertion_line":208,"expression":"err"},"snapshot":"Unsupported microfrontends configuration version: yolo. Supported versions: [\"1\", \"2\"]"},"old":{"module_name":"turborepo_microfrontends__test","metadata":{},"snapshot":"Unsupported micro-frontends configuration version: yolo. Supported versions: [\"1\", \"2\"]"}} -{"run_id":"1737179677-885648000","line":208,"new":{"module_name":"turborepo_microfrontends__test","snapshot_name":"unsupported_version","metadata":{"source":"crates/turborepo-microfrontends/src/lib.rs","assertion_line":208,"expression":"err"},"snapshot":"Unsupported microfrontends configuration version: yolo. Supported versions: [\"1\", \"2\"]"},"old":{"module_name":"turborepo_microfrontends__test","metadata":{},"snapshot":"Unsupported micro-frontends configuration version: yolo. Supported versions: [\"1\", \"2\"]"}} -{"run_id":"1737179744-116940000","line":208,"new":null,"old":null} diff --git a/crates/turborepo-repository/src/package_graph/builder.rs b/crates/turborepo-repository/src/package_graph/builder.rs index d3ed7bf35c053..526c49fe9e318 100644 --- a/crates/turborepo-repository/src/package_graph/builder.rs +++ b/crates/turborepo-repository/src/package_graph/builder.rs @@ -51,7 +51,7 @@ pub enum Error { PackageJson(#[from] crate::package_json::Error), #[error("package.json must have a name field:\n{0}")] PackageJsonMissingName(AbsoluteSystemPathBuf), - #[error("Invalid package dependency graph: {0}")] + #[error("Invalid package dependency graph:")] InvalidPackageGraph(#[source] graph::Error), #[error(transparent)] Lockfile(#[from] turborepo_lockfiles::Error), diff --git a/crates/turborepo-repository/src/package_graph/mod.rs b/crates/turborepo-repository/src/package_graph/mod.rs index 94615ddffa747..aae4acd04e514 100644 --- a/crates/turborepo-repository/src/package_graph/mod.rs +++ b/crates/turborepo-repository/src/package_graph/mod.rs @@ -906,7 +906,7 @@ mod test { assert_matches!( pkg_graph.validate(), Err(builder::Error::InvalidPackageGraph( - graph::Error::CyclicDependencies(_) + graph::Error::CyclicDependencies { .. } )) ); }