diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py index ca47690ea..997133b70 100644 --- a/fuzzing/fuzz-targets/fuzz_submodule.py +++ b/fuzzing/fuzz-targets/fuzz_submodule.py @@ -3,25 +3,16 @@ import os import tempfile from configparser import ParsingError -from utils import is_expected_exception_message, get_max_filename_length - -if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover - path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) - os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary - +from utils import ( + setup_git_environment, + handle_exception, + get_max_filename_length, +) + +# Setup the git environment +setup_git_environment() from git import Repo, GitCommandError, InvalidGitRepositoryError -if not sys.warnoptions: # pragma: no cover - # The warnings filter below can be overridden by passing the -W option - # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable. - import warnings - import logging - - # Fuzzing data causes some modules to generate a large number of warnings - # which are not usually interesting and make the test output hard to read, so we ignore them. - warnings.simplefilter("ignore") - logging.getLogger().setLevel(logging.ERROR) - def TestOneInput(data): fdp = atheris.FuzzedDataProvider(data) @@ -35,12 +26,13 @@ def TestOneInput(data): sub_repo = Repo.init(submodule_temp_dir, bare=fdp.ConsumeBool()) sub_repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))) - submodule_name = f"submodule_{fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))}" + submodule_name = fdp.ConsumeUnicodeNoSurrogates( + fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(repo.working_tree_dir))) + ) submodule_path = os.path.join(repo.working_tree_dir, submodule_name) - submodule_url = sub_repo.git_dir - submodule = repo.create_submodule(submodule_name, submodule_path, url=submodule_url) - repo.index.commit(f"Added submodule {submodule_name}") + submodule = repo.create_submodule(submodule_name, submodule_path, url=sub_repo.git_dir) + repo.index.commit("Added submodule") with submodule.config_writer() as writer: key_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes())) @@ -88,18 +80,11 @@ def TestOneInput(data): BrokenPipeError, ): return -1 - except ValueError as e: - expected_messages = [ - "SHA is empty", - "Reference at", - "embedded null byte", - "This submodule instance does not exist anymore", - "cmd stdin was empty", - ] - if is_expected_exception_message(e, expected_messages): + except Exception as e: + if isinstance(e, ValueError) and "embedded null byte" in str(e): return -1 else: - raise e + return handle_exception(e) def main(): diff --git a/fuzzing/fuzz-targets/utils.py b/fuzzing/fuzz-targets/utils.py index f522d2959..97e6eab98 100644 --- a/fuzzing/fuzz-targets/utils.py +++ b/fuzzing/fuzz-targets/utils.py @@ -1,6 +1,9 @@ import atheris # pragma: no cover import os # pragma: no cover -from typing import List # pragma: no cover +import re # pragma: no cover +import traceback # pragma: no cover +import sys # pragma: no cover +from typing import Set, Tuple, List # pragma: no cover @atheris.instrument_func @@ -35,3 +38,85 @@ def get_max_filename_length(path: str) -> int: # pragma: no cover int: The maximum filename length. """ return os.pathconf(path, "PC_NAME_MAX") + + +@atheris.instrument_func +def read_lines_from_file(file_path: str) -> list: + """Read lines from a file and return them as a list.""" + try: + with open(file_path, "r") as f: + return [line.strip() for line in f if line.strip()] + except FileNotFoundError: + print(f"File not found: {file_path}") + return [] + except IOError as e: + print(f"Error reading file {file_path}: {e}") + return [] + + +@atheris.instrument_func +def load_exception_list(file_path: str = "explicit-exceptions-list.txt") -> Set[Tuple[str, str]]: + """Load and parse the exception list from a default or specified file.""" + try: + bundle_dir = os.path.dirname(os.path.abspath(__file__)) + full_path = os.path.join(bundle_dir, file_path) + lines = read_lines_from_file(full_path) + exception_list: Set[Tuple[str, str]] = set() + for line in lines: + match = re.match(r"(.+):(\d+):", line) + if match: + file_path: str = match.group(1).strip() + line_number: str = str(match.group(2).strip()) + exception_list.add((file_path, line_number)) + return exception_list + except Exception as e: + print(f"Error loading exception list: {e}") + return set() + + +@atheris.instrument_func +def match_exception_with_traceback(exception_list: Set[Tuple[str, str]], exc_traceback) -> bool: + """Match exception traceback with the entries in the exception list.""" + for filename, lineno, _, _ in traceback.extract_tb(exc_traceback): + for file_pattern, line_pattern in exception_list: + # Ensure filename and line_number are strings for regex matching + if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)): + return True + return False + + +@atheris.instrument_func +def check_exception_against_list(exc_traceback, exception_file: str = "explicit-exceptions-list.txt") -> bool: + """Check if the exception traceback matches any entry in the exception list.""" + exception_list = load_exception_list(exception_file) + return match_exception_with_traceback(exception_list, exc_traceback) + + +@atheris.instrument_func +def handle_exception(e: Exception) -> int: + """Encapsulate exception handling logic for reusability.""" + exc_traceback = e.__traceback__ + if check_exception_against_list(exc_traceback): + return -1 + else: + raise e + + +@atheris.instrument_func +def setup_git_environment() -> None: + """Set up the environment variables for Git.""" + bundle_dir = os.path.dirname(os.path.abspath(__file__)) + if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover + bundled_git_binary_path = os.path.join(bundle_dir, "git") + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path + + if not sys.warnoptions: # pragma: no cover + # The warnings filter below can be overridden by passing the -W option + # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable. + import warnings + import logging + + # Fuzzing data causes some modules to generate a large number of warnings + # which are not usually interesting and make the test output hard to read, so we ignore them. + warnings.simplefilter("ignore") + logging.getLogger().setLevel(logging.ERROR) diff --git a/fuzzing/oss-fuzz-scripts/build.sh b/fuzzing/oss-fuzz-scripts/build.sh index e0b3a50ab..c156e872d 100644 --- a/fuzzing/oss-fuzz-scripts/build.sh +++ b/fuzzing/oss-fuzz-scripts/build.sh @@ -15,5 +15,5 @@ find "$SRC" -maxdepth 1 \ # Build fuzzers in $OUT. find "$SRC/gitpython/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do - compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." + compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." --add-data="$SRC/explicit-exceptions-list.txt:." done diff --git a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh index bbdcf5357..924a3cbf3 100755 --- a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh +++ b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh @@ -91,6 +91,17 @@ create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora" prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing" +pushd "$SRC/gitpython/" +# Search for 'raise' and 'assert' statements in Python files within GitPython's source code and submodules, saving the +# matched file path, line number, and line content to a file named 'explicit-exceptions-list.txt'. +# This file can then be used by fuzz harnesses to check exception tracebacks and filter out explicitly raised or otherwise +# anticipated exceptions to reduce false positive test failures. + +git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- '*.py' -- ':!setup.py' -- ':!test/**' -- ':!fuzzing/**' > "$SRC/explicit-exceptions-list.txt" + +popd + + # The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below. python3 -m pip install --upgrade pip # Upgrade to the latest versions known to work at the time the below changes were introduced: diff --git a/pyproject.toml b/pyproject.toml index 603e2597c..6cf4b3f5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,10 @@ lint.unfixable = [ "test/**" = [ "B018", # useless-expression ] +"fuzzing/fuzz-targets/**" = [ + "E402", # environment setup must happen before the `git` module is imported, thus cannot happen at top of file +] + [tool.codespell] ignore-words-list="gud,doesnt"
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: