From 860c6b90d79954e58396c40ef29fec028b725602 Mon Sep 17 00:00:00 2001 From: nicholaslyang Date: Wed, 29 Jan 2025 10:16:41 -0500 Subject: [PATCH 1/5] Nested workspace filtering --- crates/turborepo-globwalk/src/lib.rs | 58 ++++++++++++++++++++++++-- crates/turborepo-lib/src/boundaries.rs | 4 +- 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/crates/turborepo-globwalk/src/lib.rs b/crates/turborepo-globwalk/src/lib.rs index 9854ff793cba8..44ca4dfa930ec 100644 --- a/crates/turborepo-globwalk/src/lib.rs +++ b/crates/turborepo-globwalk/src/lib.rs @@ -27,7 +27,7 @@ pub enum WalkType { } pub use walkdir::Error as WalkDirError; -use wax::walk::Entry; +use wax::walk::{Entry, EntryResidue}; #[derive(Debug, thiserror::Error)] pub enum WalkError { @@ -282,6 +282,19 @@ pub struct GlobError { reason: String, } +#[derive(Debug, Default, Copy, Clone)] +pub struct Settings { + /// Don't recurse into a directory if it contains a `package.json` file + ignore_nested_workspaces: bool, +} + +impl Settings { + pub fn ignore_nested_workspaces(mut self) -> Self { + self.ignore_nested_workspaces = true; + self + } +} + /// ValidatedGlob. /// /// Represents an input string that we have either validated or @@ -340,6 +353,18 @@ impl FromStr for ValidatedGlob { } } +pub fn globwalk_with_settings( + base_path: &AbsoluteSystemPath, + include: &[ValidatedGlob], + exclude: &[ValidatedGlob], + walk_type: WalkType, + settings: Settings, +) -> Result, WalkError> { + let include = include.iter().map(|i| i.inner.clone()).collect::>(); + let exclude = exclude.iter().map(|e| e.inner.clone()).collect::>(); + globwalk_internal(base_path, &include, &exclude, walk_type, settings) +} + pub fn globwalk( base_path: &AbsoluteSystemPath, include: &[ValidatedGlob], @@ -348,7 +373,7 @@ pub fn globwalk( ) -> Result, WalkError> { let include = include.iter().map(|i| i.inner.clone()).collect::>(); let exclude = exclude.iter().map(|e| e.inner.clone()).collect::>(); - globwalk_internal(base_path, &include, &exclude, walk_type) + globwalk_internal(base_path, &include, &exclude, walk_type, Default::default()) } #[tracing::instrument] @@ -357,6 +382,7 @@ pub fn globwalk_internal( include: &[String], exclude: &[String], walk_type: WalkType, + settings: Settings, ) -> Result, WalkError> { let (base_path_new, include_paths, exclude_paths) = preprocess_paths_and_globs(base_path, include, exclude)?; @@ -376,7 +402,15 @@ pub fn globwalk_internal( // Use flat_map_iter as we only want parallelism for walking the globs and not iterating // over the results. // See https://docs.rs/rayon/latest/rayon/iter/trait.ParallelIterator.html#method.flat_map_iter - .flat_map_iter(|glob| walk_glob(walk_type, &base_path_new, ex_patterns.clone(), glob)) + .flat_map_iter(|glob| { + walk_glob( + walk_type, + &base_path_new, + ex_patterns.clone(), + glob, + settings, + ) + }) .collect() } @@ -386,16 +420,32 @@ fn walk_glob( base_path_new: &Path, ex_patterns: Vec, glob: Glob, + settings: Settings, ) -> Vec> { - glob.walk(base_path_new) + let iter = glob + .walk(base_path_new) .not(ex_patterns) .unwrap_or_else(|e| { // Per docs, only fails if exclusion list is too large, since we're using // pre-compiled globs panic!("Failed to compile exclusion globs: {}", e,) + }); + + if settings.ignore_nested_workspaces { + iter.filter_entry(|entry| { + let path = entry.path(); + if path.is_dir() && path != base_path_new && path.join("package.json").exists() { + return Some(EntryResidue::Tree); + } + + None }) .filter_map(|entry| visit_file(walk_type, entry)) .collect::>() + } else { + iter.filter_map(|entry| visit_file(walk_type, entry)) + .collect::>() + } } #[tracing::instrument] diff --git a/crates/turborepo-lib/src/boundaries.rs b/crates/turborepo-lib/src/boundaries.rs index d4ba29470d93e..5c95905b5e1b1 100644 --- a/crates/turborepo-lib/src/boundaries.rs +++ b/crates/turborepo-lib/src/boundaries.rs @@ -4,6 +4,7 @@ use std::{ }; use git2::Repository; +use globwalk::Settings; use itertools::Itertools; use miette::{Diagnostic, NamedSource, Report, SourceSpan}; use oxc_resolver::{ResolveError, Resolver}; @@ -189,7 +190,7 @@ impl Run { unresolved_external_dependencies: Option<&BTreeMap>, source_map: &SourceMap, ) -> Result<(usize, Vec), Error> { - let files = globwalk::globwalk( + let files = globwalk::globwalk_with_settings( package_root, &[ "**/*.js".parse().unwrap(), @@ -201,6 +202,7 @@ impl Run { ], &["**/node_modules/**".parse().unwrap()], globwalk::WalkType::Files, + Settings::default().ignore_nested_workspaces(), )?; let files_checked = files.len(); From 590b59ec5348fbc1bc381496a81f43d4cef19c06 Mon Sep 17 00:00:00 2001 From: nicholaslyang Date: Wed, 29 Jan 2025 10:39:19 -0500 Subject: [PATCH 2/5] Added test --- crates/turborepo-globwalk/src/lib.rs | 112 ++++++++++++++++++++------- 1 file changed, 83 insertions(+), 29 deletions(-) diff --git a/crates/turborepo-globwalk/src/lib.rs b/crates/turborepo-globwalk/src/lib.rs index 44ca4dfa930ec..eb5732f4c40e4 100644 --- a/crates/turborepo-globwalk/src/lib.rs +++ b/crates/turborepo-globwalk/src/lib.rs @@ -478,7 +478,7 @@ mod test { use crate::{ add_doublestar_to_dir, collapse_path, escape_glob_literals, fix_glob_pattern, globwalk, - ValidatedGlob, WalkError, WalkType, + Settings, ValidatedGlob, WalkError, WalkType, }; #[cfg(unix)] @@ -762,7 +762,8 @@ mod test { &["*.txt"], &[], &["/test.txt"], - &["/test.txt"] + &["/test.txt"], + Default::default() ; "hello world" )] #[test_case( @@ -771,7 +772,8 @@ mod test { &["subdir/test.txt", "test.txt"], &[], &["/subdir/test.txt", "/test.txt"], - &["/subdir/test.txt", "/test.txt"] + &["/subdir/test.txt", "/test.txt"], + Default::default() ; "bullet files" )] #[test_case(&[ @@ -804,7 +806,8 @@ mod test { "/repos/some-app/packages/colors/package.json", "/repos/some-app/packages/faker/package.json", "/repos/some-app/packages/left-pad/package.json", - ] + ], + Default::default() ; "finding workspace package.json files" )] #[test_case(&[ @@ -842,7 +845,8 @@ mod test { "/repos/some-app/packages/colors/package.json", "/repos/some-app/packages/faker/package.json", "/repos/some-app/packages/left-pad/package.json", - ] + ], + Default::default() ; "excludes unexpected workspace package.json files" )] #[test_case(&[ @@ -882,8 +886,37 @@ mod test { "/repos/some-app/packages/left-pad/package.json", "/repos/some-app/packages/xzibit/package.json", "/repos/some-app/packages/xzibit/packages/yo-dawg/package.json", - ] + ], + Default::default() ; "nested packages work")] + #[test_case(&[ + "/external/file.txt", + "/repos/some-app/apps/docs/package.json", + "/repos/some-app/apps/web/package.json", + "/repos/some-app/bower_components/readline/package.json", + "/repos/some-app/examples/package.json", + "/repos/some-app/node_modules/gulp/bower_components/readline/package.json", + "/repos/some-app/node_modules/react/package.json", + "/repos/some-app/package.json", + "/repos/some-app/packages/xzibit/package.json", + "/repos/some-app/packages/xzibit/node_modules/street-legal/package.json", + "/repos/some-app/packages/xzibit/node_modules/paint-colors/package.json", + "/repos/some-app/packages/xzibit/packages/yo-dawg/package.json", + "/repos/some-app/packages/xzibit/packages/yo-dawg/node_modules/meme/package.json", + "/repos/some-app/packages/xzibit/packages/yo-dawg/node_modules/yo-dawg/package.json", + "/repos/some-app/packages/colors/package.json", + "/repos/some-app/packages/faker/package.json", + "/repos/some-app/packages/left-pad/package.json", + "/repos/some-app/test/mocks/spanish-inquisition/package.json", + "/repos/some-app/tests/mocks/spanish-inquisition/package.json", + ], + "/repos/some-app/", + &["**/package.json"], + &[], + &["/repos/some-app/package.json"], + &["/repos/some-app/package.json"], + Settings::default().ignore_nested_workspaces() + ; "ignore nested workspaces setting only matches top level package")] #[test_case(&[ "/external/file.txt", "/repos/some-app/apps/docs/package.json", @@ -921,7 +954,8 @@ mod test { "/repos/some-app/packages/left-pad/package.json", "/repos/some-app/packages/xzibit/package.json", "/repos/some-app/packages/xzibit/packages/yo-dawg/package.json", - ] + ], + Default::default() ; "includes do not override excludes")] #[test_case(&[ "/external/file.txt", @@ -970,7 +1004,8 @@ mod test { "/repos/some-app/dist/js/node_modules/browserify.js", "/repos/some-app/public/dist/css/index.css", "/repos/some-app/public/dist/images/rick_astley.jpg", - ] + ], + Default::default() ; "output globbing grabs the desired content" )] #[test_case(&[ @@ -995,7 +1030,8 @@ mod test { "/repos/some-app/dist/js/index.js", "/repos/some-app/dist/js/lib.js", "/repos/some-app/dist/js/node_modules/browserify.js", - ] + ], + Default::default() ; "passing ** captures all children")] #[test_case(&[ "/repos/some-app/dist/index.html", @@ -1020,7 +1056,8 @@ mod test { "/repos/some-app/dist/js/index.js", "/repos/some-app/dist/js/lib.js", "/repos/some-app/dist/js/node_modules/browserify.js", - ] + ], + Default::default() ; "passing just a directory captures no children")] #[test_case(&[ "/repos/some-app/dist/index.html", @@ -1040,13 +1077,16 @@ mod test { "/repos/some-app/dist/js/index.js", "/repos/some-app/dist/js/lib.js", "/repos/some-app/dist/js/node_modules/browserify.js", - ] ; "redundant includes do not duplicate")] + ], + Default::default() + ; "redundant includes do not duplicate" + )] #[test_case(&[ "/repos/some-app/dist/index.html", "/repos/some-app/dist/js/index.js", "/repos/some-app/dist/js/lib.js", "/repos/some-app/dist/js/node_modules/browserify.js", - ], "/repos/some-app/", &["**"], &["**"], &[ ], &[ ] ; "exclude everything, include everything")] + ], "/repos/some-app/", &["**"], &["**"], &[ ], &[ ], Default::default() ; "exclude everything, include everything")] #[test_case(&[ "/repos/some-app/dist/index.html", "/repos/some-app/dist/js/index.js", @@ -1062,7 +1102,8 @@ mod test { ], &[ "/repos/some-app/dist/index.html", - ] + ], + Default::default() ; "passing just a directory to exclude prevents capture of children")] #[test_case(&[ "/repos/some-app/dist/index.html", @@ -1078,7 +1119,8 @@ mod test { "/repos/some-app/dist/index.html", // "/repos/some-app/dist/js", ], - &["/repos/some-app/dist/index.html",] + &["/repos/some-app/dist/index.html",], + Default::default() ; "passing ** to exclude prevents capture of children")] #[test_case(&[ "/repos/some-app/dist/index.html", @@ -1090,7 +1132,8 @@ mod test { &["**"], &["./"], &[], - &[] + &[], + Default::default() ; "exclude everything with folder . applies at base path" )] #[test_case(&[ @@ -1103,7 +1146,8 @@ mod test { &["**"], &["./dist"], &["repos/some-app"], - &[] + &[], + Default::default() ; "exclude everything with traversal applies at a non-base path" )] #[test_case(&[ @@ -1116,7 +1160,8 @@ mod test { &["**"], &["dist/../"], &[], - &[] + &[], + Default::default() ; "exclude everything with folder traversal (..) applies at base path" )] #[test_case(&[ @@ -1141,7 +1186,8 @@ mod test { "/repos/some-app/dist/js/index.js", "/repos/some-app/dist/js/lib.js", "/repos/some-app/dist/js/node_modules/browserify.js", - ] + ], + Default::default() ; "traversal works within base path" )] #[test_case(&[ @@ -1167,7 +1213,8 @@ mod test { "/repos/some-app/dist/js/index.js", "/repos/some-app/dist/js/lib.js", "/repos/some-app/dist/js/node_modules/browserify.js", - ] + ], + Default::default() ; "self references work (.)" )] #[test_case(&[ @@ -1179,7 +1226,7 @@ mod test { ], "/repos/some-app/", &["*"], &[ ], &[ "/repos/some-app/dist", "/repos/some-app/package.json", - ], &["/repos/some-app/package.json"] ; "depth of 1 includes handles folders properly")] + ], &["/repos/some-app/package.json"], Default::default() ; "depth of 1 includes handles folders properly")] #[test_case(&[ "/repos/some-app/package.json", "/repos/some-app/dist/index.html", @@ -1195,7 +1242,8 @@ mod test { "/repos/some-app/dist", "/repos/some-app/package.json", ], - &["/repos/some-app/package.json"] + &["/repos/some-app/package.json"], + Default::default() ; "depth of 1 excludes prevents capturing folders")] #[test_case(&[ "/repos/some-app/dist/index.html", @@ -1220,7 +1268,8 @@ mod test { "/repos/some-app/dist/js/index.js", "/repos/some-app/dist/js/lib.js", "/repos/some-app/dist/js/node_modules/browserify.js", - ] + ], + Default::default() ; "No-trailing slash basePath works")] #[test_case(&[ "/repos/some-app/included.txt", @@ -1229,7 +1278,7 @@ mod test { "/repos/some-app/included.txt", ], &[ "/repos/some-app/included.txt", - ] ; "exclude single file")] + ], Default::default() ; "exclude single file")] #[test_case(&[ "/repos/some-app/one/included.txt", "/repos/some-app/one/two/included.txt", @@ -1253,7 +1302,7 @@ mod test { "/repos/some-app/one/included.txt", "/repos/some-app/one/two/included.txt", "/repos/some-app/one/two/three/included.txt", - ] ; "exclude nested single file")] + ], Default::default() ; "exclude nested single file")] #[test_case(&[ "/repos/some-app/one/included.txt", "/repos/some-app/one/two/included.txt", @@ -1261,7 +1310,7 @@ mod test { "/repos/some-app/one/excluded.txt", "/repos/some-app/one/two/excluded.txt", "/repos/some-app/one/two/three/excluded.txt", - ], "/repos/some-app", &["**"], &["**"], &[], &[] ; "exclude everything")] + ], "/repos/some-app", &["**"], &["**"], &[], &[], Default::default() ; "exclude everything")] #[test_case(&[ "/repos/some-app/one/included.txt", "/repos/some-app/one/two/included.txt", @@ -1269,7 +1318,7 @@ mod test { "/repos/some-app/one/excluded.txt", "/repos/some-app/one/two/excluded.txt", "/repos/some-app/one/two/three/excluded.txt", - ], "/repos/some-app", &["**"], &["**/"], &[], &[] ; "exclude everything with slash")] + ], "/repos/some-app", &["**"], &["**/"], &[], &[], Default::default() ; "exclude everything with slash")] #[test_case(&[ "/repos/some-app/foo/bar", "/repos/some-app/some-foo/bar", @@ -1284,7 +1333,8 @@ mod test { ], &[ "/repos/some-app/included", - ] + ], + Default::default() ; "exclude everything with leading star" )] #[test_case(&[ @@ -1302,7 +1352,8 @@ mod test { ], &[ "/repos/some-app/included", - ] + ], + Default::default() ; "exclude everything with trailing star" )] fn glob_walk_files( @@ -1312,6 +1363,7 @@ mod test { exclude: &[&str], expected: &[&str], expected_files: &[&str], + settings: Settings, ) { let dir = setup_files(files); let base_path = base_path.trim_start_matches('/'); @@ -1329,7 +1381,9 @@ mod test { (crate::WalkType::Files, expected_files), (crate::WalkType::All, expected), ] { - let success = super::globwalk(&path, &include, &exclude, walk_type).unwrap(); + let success = + super::globwalk_with_settings(&path, &include, &exclude, walk_type, settings) + .unwrap(); let success = success .iter() From 4f6175ce2884b66bcbccfba51f72973368ae7a0f Mon Sep 17 00:00:00 2001 From: nicholaslyang Date: Wed, 29 Jan 2025 10:47:18 -0500 Subject: [PATCH 3/5] Some documentation and renaming --- crates/turborepo-globwalk/src/lib.rs | 16 ++++++++++------ crates/turborepo-lib/src/boundaries.rs | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/crates/turborepo-globwalk/src/lib.rs b/crates/turborepo-globwalk/src/lib.rs index eb5732f4c40e4..b012c5a8b289c 100644 --- a/crates/turborepo-globwalk/src/lib.rs +++ b/crates/turborepo-globwalk/src/lib.rs @@ -284,13 +284,17 @@ pub struct GlobError { #[derive(Debug, Default, Copy, Clone)] pub struct Settings { - /// Don't recurse into a directory if it contains a `package.json` file - ignore_nested_workspaces: bool, + /// Don't recurse into a directory if it contains a `package.json` file. + /// NOTE: If globbing from the root of a workspace, this setting will cause + /// us to not recurse into the individual packages. Therefore, only use this + /// setting if you are globbing in an individual package at not at the + /// workspace root. + ignore_nested_packages: bool, } impl Settings { - pub fn ignore_nested_workspaces(mut self) -> Self { - self.ignore_nested_workspaces = true; + pub fn ignore_nested_packages(mut self) -> Self { + self.ignore_nested_packages = true; self } } @@ -431,7 +435,7 @@ fn walk_glob( panic!("Failed to compile exclusion globs: {}", e,) }); - if settings.ignore_nested_workspaces { + if settings.ignore_nested_packages { iter.filter_entry(|entry| { let path = entry.path(); if path.is_dir() && path != base_path_new && path.join("package.json").exists() { @@ -915,7 +919,7 @@ mod test { &[], &["/repos/some-app/package.json"], &["/repos/some-app/package.json"], - Settings::default().ignore_nested_workspaces() + Settings::default().ignore_nested_packages() ; "ignore nested workspaces setting only matches top level package")] #[test_case(&[ "/external/file.txt", diff --git a/crates/turborepo-lib/src/boundaries.rs b/crates/turborepo-lib/src/boundaries.rs index 5c95905b5e1b1..b8a075a9b1aef 100644 --- a/crates/turborepo-lib/src/boundaries.rs +++ b/crates/turborepo-lib/src/boundaries.rs @@ -202,7 +202,7 @@ impl Run { ], &["**/node_modules/**".parse().unwrap()], globwalk::WalkType::Files, - Settings::default().ignore_nested_workspaces(), + Settings::default().ignore_nested_packages(), )?; let files_checked = files.len(); From a1dc959571d36a27c31f1fa555adadd11507c61f Mon Sep 17 00:00:00 2001 From: nicholaslyang Date: Wed, 29 Jan 2025 10:55:01 -0500 Subject: [PATCH 4/5] Added test to boundaries --- turborepo-tests/integration/fixtures/boundaries/package.json | 4 ++-- .../boundaries/packages/another/nested-package/index.js | 2 ++ .../boundaries/packages/another/nested-package/package.json | 3 +++ 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 turborepo-tests/integration/fixtures/boundaries/packages/another/nested-package/index.js create mode 100644 turborepo-tests/integration/fixtures/boundaries/packages/another/nested-package/package.json diff --git a/turborepo-tests/integration/fixtures/boundaries/package.json b/turborepo-tests/integration/fixtures/boundaries/package.json index 83520cd0fc7cb..404d3338e68cf 100644 --- a/turborepo-tests/integration/fixtures/boundaries/package.json +++ b/turborepo-tests/integration/fixtures/boundaries/package.json @@ -8,7 +8,7 @@ }, "packageManager": "npm@10.5.0", "workspaces": [ - "apps/**", - "packages/**" + "apps/*", + "packages/*" ] } diff --git a/turborepo-tests/integration/fixtures/boundaries/packages/another/nested-package/index.js b/turborepo-tests/integration/fixtures/boundaries/packages/another/nested-package/index.js new file mode 100644 index 0000000000000..5ccdd557de48e --- /dev/null +++ b/turborepo-tests/integration/fixtures/boundaries/packages/another/nested-package/index.js @@ -0,0 +1,2 @@ +// Invalid import but we're in a nested package so it doesn't get detected +import { walkThePlank } from "../../module-package/my-module.mjs"; diff --git a/turborepo-tests/integration/fixtures/boundaries/packages/another/nested-package/package.json b/turborepo-tests/integration/fixtures/boundaries/packages/another/nested-package/package.json new file mode 100644 index 0000000000000..36b2111fc8d32 --- /dev/null +++ b/turborepo-tests/integration/fixtures/boundaries/packages/another/nested-package/package.json @@ -0,0 +1,3 @@ +{ + "name": "nested-package" +} From b7c4382d91cf6b4726877639c0587c0fea34506e Mon Sep 17 00:00:00 2001 From: nicholaslyang Date: Thu, 30 Jan 2025 11:03:39 -0500 Subject: [PATCH 5/5] PR feedback --- crates/turborepo-globwalk/src/lib.rs | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/crates/turborepo-globwalk/src/lib.rs b/crates/turborepo-globwalk/src/lib.rs index b012c5a8b289c..da7678f3dc775 100644 --- a/crates/turborepo-globwalk/src/lib.rs +++ b/crates/turborepo-globwalk/src/lib.rs @@ -895,30 +895,19 @@ mod test { ; "nested packages work")] #[test_case(&[ "/external/file.txt", - "/repos/some-app/apps/docs/package.json", - "/repos/some-app/apps/web/package.json", - "/repos/some-app/bower_components/readline/package.json", - "/repos/some-app/examples/package.json", - "/repos/some-app/node_modules/gulp/bower_components/readline/package.json", - "/repos/some-app/node_modules/react/package.json", "/repos/some-app/package.json", + "/repos/some-app/index.js", + "/repos/some-app/just-a-dir/index.js", "/repos/some-app/packages/xzibit/package.json", - "/repos/some-app/packages/xzibit/node_modules/street-legal/package.json", - "/repos/some-app/packages/xzibit/node_modules/paint-colors/package.json", - "/repos/some-app/packages/xzibit/packages/yo-dawg/package.json", - "/repos/some-app/packages/xzibit/packages/yo-dawg/node_modules/meme/package.json", - "/repos/some-app/packages/xzibit/packages/yo-dawg/node_modules/yo-dawg/package.json", + "/repos/some-app/packages/xzibit/index.js", "/repos/some-app/packages/colors/package.json", - "/repos/some-app/packages/faker/package.json", - "/repos/some-app/packages/left-pad/package.json", - "/repos/some-app/test/mocks/spanish-inquisition/package.json", - "/repos/some-app/tests/mocks/spanish-inquisition/package.json", + "/repos/some-app/packages/colors/index.js", ], "/repos/some-app/", - &["**/package.json"], + &["**/*.js"], &[], - &["/repos/some-app/package.json"], - &["/repos/some-app/package.json"], + &["/repos/some-app/index.js", "/repos/some-app/just-a-dir/index.js"], + &["/repos/some-app/index.js", "/repos/some-app/just-a-dir/index.js"], Settings::default().ignore_nested_packages() ; "ignore nested workspaces setting only matches top level package")] #[test_case(&[