这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
28 changes: 12 additions & 16 deletions checkov/kustomize/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import yaml
from typing import Optional, Dict, Any, TextIO, TYPE_CHECKING

from checkov.common.parallelizer.parallel_runner import parallel_runner


from checkov.common.graph.graph_builder import CustomAttributes
from checkov.common.graph.graph_builder.consts import GraphSource
Expand Down Expand Up @@ -702,23 +704,17 @@ def run_kustomize_to_k8s(
shared_kustomize_file_mappings = pickle_deepcopy(manager.dict()) # type:ignore[arg-type] # works with DictProxy
shared_kustomize_file_mappings.clear()

jobs = []
for filePath in self.kustomizeProcessedFolderAndMeta:
p = multiprocessing.Process(
target=self._run_kustomize_parser,
args=(
filePath,
shared_kustomize_file_mappings,
self.kustomizeProcessedFolderAndMeta,
self.templateRendererCommand,
self.target_folder_path
)
items = [
(
filePath,
shared_kustomize_file_mappings,
self.kustomizeProcessedFolderAndMeta,
self.templateRendererCommand,
self.target_folder_path,
)
jobs.append(p)
p.start()

for proc in jobs:
proc.join()
for filePath in self.kustomizeProcessedFolderAndMeta
]
list(parallel_runner.run_function(self._run_kustomize_parser, items))

self.kustomizeFileMappings = dict(shared_kustomize_file_mappings)

Expand Down
88 changes: 32 additions & 56 deletions checkov/terraform/module_loading/module_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,16 @@
import os
import re
from pathlib import Path
from typing import List, Callable, TYPE_CHECKING
from typing import List, Callable, TYPE_CHECKING, Any, Optional, Dict

from checkov.common.util.env_vars_config import env_vars_config
from checkov.common.parallelizer.parallel_runner import parallel_runner
from checkov.common.util.file_utils import read_file_with_any_encoding
from checkov.terraform.module_loading.registry import module_loader_registry
from checkov.terraform.parser_utils import load_or_die_quietly

if TYPE_CHECKING:
from checkov.terraform.module_loading.registry import ModuleLoaderRegistry

MODULE_NAME_PATTERN = re.compile(r'[^#]*\bmodule\s*"(?P<name>.*)"')
MODULE_SOURCE_PATTERN = re.compile(r'[^#]*\bsource\s*=\s*"(?P<link>.*)"')
MODULE_VERSION_PATTERN = re.compile(r'[^#]*\bversion\s*=\s*"(?P<operator>=|!=|>=|>|<=|<|~>\s*)?(?P<version>[\d.]+-?\w*)"')


class ModuleDownload:
def __init__(self, source_dir: str) -> None:
Expand Down Expand Up @@ -56,69 +52,46 @@ def find_tf_managed_modules(path: str) -> List[ModuleDownload]:
return modules_found


def find_modules(path: str) -> List[ModuleDownload]:
def find_modules(path: str, loaded_files_cache: Optional[Dict[str, Any]] = None,
parsing_errors: Optional[Dict[str, Exception]] = None, excluded_paths: Optional[list[str]] = None) -> list[ModuleDownload]:
modules_found: list[ModuleDownload] = []
if loaded_files_cache is None:
loaded_files_cache = {}
if parsing_errors is None:
parsing_errors = {}

excluded_paths_regex = re.compile('|'.join(f"({excluded_paths})")) if excluded_paths else None
for root, _, full_file_names in os.walk(path):
for file_name in full_file_names:
if not file_name.endswith('.tf'):
if not file_name.endswith(".tf"):
continue
if root.startswith(os.path.join(path, ".terraform", "modules")):
# don't scan the modules folder used by Terraform
continue
file_path = os.path.join(root, file_name)
if excluded_paths_regex and excluded_paths_regex.search(file_path):
continue

try:
content = read_file_with_any_encoding(file_path=os.path.join(path, root, file_name))
if "module " not in content:
# if there is no "module " ref in the whole file, then no need to search line by line
continue

curr_md = None
comment_out = re.findall(r'/\*.*?\*/', content, re.DOTALL)
for line in content.splitlines():
if not curr_md:
if line.startswith('module'):
in_comment_out = [line for a in comment_out if line in a]
if in_comment_out:
# if the "module " ref in the comment out part
continue
curr_md = ModuleDownload(os.path.dirname(os.path.join(root, file_name)))

# also extract the name for easier mapping against the TF modules.json file
match = re.match(MODULE_NAME_PATTERN, line)
if match:
curr_md.module_name = match.group("name")

continue
else:
if line.startswith('}'):
if curr_md.module_link is None:
logging.warning(f'A module at {curr_md.source_dir} had no source, skipping')
else:
curr_md.address = f"{curr_md.module_link}:{curr_md.version}"
modules_found.append(curr_md)
curr_md = None
continue

if "source" in line:
match = re.match(MODULE_SOURCE_PATTERN, line)
if match:
curr_md.module_link = match.group('link')
continue

if "version" in line:
match = re.match(MODULE_VERSION_PATTERN, line)
if match:
curr_md.version = f"{match.group('operator')}{match.group('version')}" if match.group('operator') else match.group('version')
except (UnicodeDecodeError, FileNotFoundError) as e:
logging.warning(f"Skipping {os.path.join(path, root, file_name)} because of {e}")
data = load_or_die_quietly(file_path, parsing_errors)
if not data:
continue

loaded_files_cache[file_path] = data
if "module" not in data:
continue
for module in data["module"]:
for module_name, module_data in module.items():
md = ModuleDownload(os.path.dirname(file_path))
md.module_name = module_name
md.module_link = module_data.get("source", [None])[0]
md.version = module_data.get("version", [None])[0]
if md.module_link:
md.address = f"{md.module_link}:{md.version}" if md.version else md.module_link
modules_found.append(md)
return modules_found


def should_download(path: str | None) -> bool:

return path is not None and not (path.startswith('./') or path.startswith('../') or path.startswith('/'))


Expand All @@ -127,13 +100,16 @@ def load_tf_modules(
should_download_module: Callable[[str | None], bool] = should_download,
run_parallel: bool = False,
modules_to_load: List[ModuleDownload] | None = None,
stop_on_failure: bool = False
stop_on_failure: bool = False,
loaded_files_cache: dict[str, Any] | None = None,
parsing_errors: dict[str, Exception] | None = None,
excluded_paths: List[str] | None = None,
) -> None:
module_loader_registry.root_dir = path
if not modules_to_load and env_vars_config.CHECKOV_EXPERIMENTAL_TERRAFORM_MANAGED_MODULES:
modules_to_load = find_tf_managed_modules(path)
if not modules_to_load:
modules_to_load = find_modules(path)
modules_to_load = find_modules(path, loaded_files_cache=loaded_files_cache, parsing_errors=parsing_errors, excluded_paths=excluded_paths)

# To avoid duplicate work, we need to get the distinct module sources
distinct_modules = list({m.address: m for m in modules_to_load}.values())
Expand Down
71 changes: 71 additions & 0 deletions checkov/terraform/parser_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from __future__ import annotations

import json
import logging
import os
import platform
import threading
from pathlib import Path
from typing import Any, cast, Optional, TextIO, Type

import hcl2

from checkov.common.util.env_vars_config import env_vars_config
from checkov.common.util.stopit import ThreadingTimeout, SignalTimeout
from checkov.common.util.stopit.utils import BaseTimeout
from checkov.terraform import validate_malformed_definitions, clean_bad_definitions
from checkov.terraform.modules.module_utils import _Hcl2Payload


def load_or_die_quietly(
file: str | Path | os.DirEntry[str], parsing_errors: dict[str, Exception], clean_definitions: bool = True
) -> Optional[_Hcl2Payload]:
"""
Load JSON or HCL, depending on filename.
:return: None if the file can't be loaded
"""
file_path = os.fspath(file)
file_name = os.path.basename(file_path)

if file_name.endswith(".tfvars"):
clean_definitions = False

try:
logging.debug(f"Parsing {file_path}")

with open(file_path, "r", encoding="utf-8-sig") as f:
if file_name.endswith(".json"):
return cast("_Hcl2Payload", json.load(f))
else:
raw_data = __parse_with_timeout(f)
non_malformed_definitions = validate_malformed_definitions(raw_data)
if clean_definitions:
return clean_bad_definitions(non_malformed_definitions)
else:
return non_malformed_definitions
except Exception as e:
logging.debug(f"failed while parsing file {file_path}", exc_info=True)
parsing_errors[file_path] = e
return None


# if we are not running in a thread, run the hcl2.load function with a timeout, to prevent from getting stuck in parsing.
def __parse_with_timeout(f: TextIO) -> dict[str, list[dict[str, Any]]]:
# setting up timeout class
timeout_class: Optional[Type[BaseTimeout]] = None
if platform.system() == "Windows":
timeout_class = ThreadingTimeout
elif threading.current_thread() is threading.main_thread():
timeout_class = SignalTimeout

# if we're not running on the main thread, don't use timeout
parsing_timeout = env_vars_config.HCL_PARSE_TIMEOUT_SEC or 0
if not timeout_class or not parsing_timeout:
return hcl2.load(f)

with timeout_class(parsing_timeout) as to_ctx_mgr:
raw_data = hcl2.load(f)
if to_ctx_mgr.state == to_ctx_mgr.TIMED_OUT:
logging.debug(f"reached timeout when parsing file {f} using hcl2")
raise Exception(f"file took more than {parsing_timeout} seconds to parse")
return raw_data
75 changes: 6 additions & 69 deletions checkov/terraform/tf_parser.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,29 @@
from __future__ import annotations

import json
import logging
import os
import platform
import threading
from collections import defaultdict
from pathlib import Path
from typing import Optional, Dict, Mapping, Set, Tuple, Callable, Any, List, cast, TYPE_CHECKING, overload, TextIO, Type

import hcl2
from typing import Optional, Dict, Mapping, Set, Tuple, Callable, Any, List, cast, TYPE_CHECKING, overload

from checkov.common.parallelizer.parallel_runner import parallel_runner
from checkov.common.runners.base_runner import filter_ignored_paths, IGNORE_HIDDEN_DIRECTORY_ENV
from checkov.common.util.consts import DEFAULT_EXTERNAL_MODULES_DIR, RESOLVED_MODULE_ENTRY_NAME
from checkov.common.util.data_structures_utils import pickle_deepcopy
from checkov.common.util.deep_merge import pickle_deep_merge
from checkov.common.util.env_vars_config import env_vars_config
from checkov.common.util.stopit import ThreadingTimeout, SignalTimeout
from checkov.common.util.stopit.utils import BaseTimeout
from checkov.common.util.type_forcers import force_list
from checkov.common.variables.context import EvaluationContext
from checkov.terraform import validate_malformed_definitions, clean_bad_definitions
from checkov.terraform.graph_builder.graph_components.block_types import BlockType
from checkov.terraform.graph_builder.graph_components.module import Module
from checkov.terraform.module_loading.content import ModuleContent
from checkov.terraform.module_loading.module_finder import load_tf_modules
from checkov.terraform.module_loading.registry import module_loader_registry as default_ml_registry, \
ModuleLoaderRegistry
from checkov.terraform.module_loading.module_finder import load_tf_modules
from checkov.common.util.parser_utils import is_acceptable_module_param
from checkov.terraform.modules.module_utils import safe_index, \
remove_module_dependency_from_path, \
clean_parser_types, serialize_definitions, _Hcl2Payload
remove_module_dependency_from_path, clean_parser_types, serialize_definitions
from checkov.terraform.modules.module_objects import TFModule, TFDefinitionKey
from checkov.terraform.parser_utils import load_or_die_quietly


if TYPE_CHECKING:
from typing_extensions import TypeGuard
Expand Down Expand Up @@ -105,7 +96,7 @@ def parse_directory(
default_ml_registry.download_external_modules = download_external_modules
default_ml_registry.external_modules_folder_name = external_modules_download_path
default_ml_registry.module_content_cache = external_modules_content_cache if external_modules_content_cache else {}
load_tf_modules(directory)
load_tf_modules(directory, loaded_files_cache=self.loaded_files_map, parsing_errors=self.out_parsing_errors, excluded_paths=self.excluded_paths)
self._parse_directory(dir_filter=lambda d: self._check_process_dir(d), vars_files=vars_files)
self._update_resolved_modules()
return self.out_definitions
Expand Down Expand Up @@ -699,57 +690,3 @@ def get_tf_definition_object_from_module_dependency(
return TFDefinitionKey(path.file_path, TFModule(path=module_dependency.file_path, name=module_dependency_name))
return TFDefinitionKey(path.file_path, TFModule(path=module_dependency.file_path, name=module_dependency_name,
nested_tf_module=module_dependency.tf_source_modules))


def load_or_die_quietly(
file: str | Path | os.DirEntry[str], parsing_errors: dict[str, Exception], clean_definitions: bool = True
) -> Optional[_Hcl2Payload]:
"""
Load JSON or HCL, depending on filename.
:return: None if the file can't be loaded
"""
file_path = os.fspath(file)
file_name = os.path.basename(file_path)

if file_name.endswith('.tfvars'):
clean_definitions = False

try:
logging.debug(f"Parsing {file_path}")

with open(file_path, "r", encoding="utf-8-sig") as f:
if file_name.endswith(".json"):
return cast("_Hcl2Payload", json.load(f))
else:
raw_data = __parse_with_timeout(f)
non_malformed_definitions = validate_malformed_definitions(raw_data)
if clean_definitions:
return clean_bad_definitions(non_malformed_definitions)
else:
return non_malformed_definitions
except Exception as e:
logging.debug(f'failed while parsing file {file_path}', exc_info=True)
parsing_errors[file_path] = e
return None


# if we are not running in a thread, run the hcl2.load function with a timeout, to prevent from getting stuck in parsing.
def __parse_with_timeout(f: TextIO) -> dict[str, list[dict[str, Any]]]:
# setting up timeout class
timeout_class: Optional[Type[BaseTimeout]] = None
if platform.system() == 'Windows':
timeout_class = ThreadingTimeout
elif threading.current_thread() is threading.main_thread():
timeout_class = SignalTimeout

# if we're not running on the main thread, don't use timeout
parsing_timeout = env_vars_config.HCL_PARSE_TIMEOUT_SEC or 0
if not timeout_class or not parsing_timeout:
return hcl2.load(f)

with timeout_class(parsing_timeout) as to_ctx_mgr:
raw_data = hcl2.load(f)
if to_ctx_mgr.state == to_ctx_mgr.TIMED_OUT:
logging.debug(f"reached timeout when parsing file {f} using hcl2")
raise Exception(f"file took more than {parsing_timeout} seconds to parse")
return raw_data
2 changes: 2 additions & 0 deletions tests/kustomize/test_runner_image_referencer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import os
import sys
from pathlib import Path
from unittest import mock

Expand All @@ -21,6 +22,7 @@


@pytest.mark.xfail(reason="This is probably connected to the OS + kustomize version")
@pytest.mark.skipif((3, 9) <= sys.version_info < (3, 11), reason="fails on python 3.9 and 3.10 due to path.resolve issues.")
@pytest.mark.skipif(os.name == "nt" or not kustomize_exists(), reason="kustomize not installed or Windows OS")
@pytest.mark.parametrize("allow_kustomize_file_edits, code_lines", [
(True, "18-34"),
Expand Down
14 changes: 14 additions & 0 deletions tests/terraform/module_loading/data/nested_modules/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
module "example_vm" {
source = "terraform-aws-modules/vpc/aws"
version = "3.14.0"

name = "my-vpc"
cidr = "10.0.0.0/16"

source_image_reference = {
publisher = "Canonical"
offer = "UbuntuServer"
sku = "18.04-LTS"
version = "3.15.2"
}
}
Loading
Loading