From af0cd933e84b9f83210c0f12f95a456606ee79e9 Mon Sep 17 00:00:00 2001 From: David Lakin Date: Thu, 6 Jun 2024 02:17:25 -0400 Subject: [PATCH 1/6] Fix "OSError: [Errno 36] File name too long" in fuzz_submodule Fixes a bug in the `fuzz_submodule` harness where the fuzzed data can produce file names that exceed the maximum size allowed byt the OS. This issue came up previously and was fixed in #1922, but the submodule file name fixed here was missed in that PR. Fixes: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=69456 --- fuzzing/fuzz-targets/fuzz_submodule.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py index ca47690ea..9f5828d8d 100644 --- a/fuzzing/fuzz-targets/fuzz_submodule.py +++ b/fuzzing/fuzz-targets/fuzz_submodule.py @@ -35,12 +35,13 @@ def TestOneInput(data): sub_repo = Repo.init(submodule_temp_dir, bare=fdp.ConsumeBool()) sub_repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))) - submodule_name = f"submodule_{fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))}" + submodule_name = fdp.ConsumeUnicodeNoSurrogates( + fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(repo.working_tree_dir))) + ) submodule_path = os.path.join(repo.working_tree_dir, submodule_name) - submodule_url = sub_repo.git_dir - submodule = repo.create_submodule(submodule_name, submodule_path, url=submodule_url) - repo.index.commit(f"Added submodule {submodule_name}") + submodule = repo.create_submodule(submodule_name, submodule_path, url=sub_repo.git_dir) + repo.index.commit("Added submodule") with submodule.config_writer() as writer: key_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes())) From 7de1556d3895c718f0f0772530ff7cde5457d9d8 Mon Sep 17 00:00:00 2001 From: David Lakin Date: Thu, 8 Aug 2024 16:54:37 -0400 Subject: [PATCH 2/6] Filter out non-bug exceptions using a pre-defined exception list. This reduces false positive test failures by identifying and gracefully handling exceptions that are explicitly raised by GitPython, thus reducing the false-positive fuzzing test failure rate. --- fuzzing/fuzz-targets/fuzz_submodule.py | 56 +++++++++++++++---- fuzzing/oss-fuzz-scripts/build.sh | 2 +- .../container-environment-bootstrap.sh | 11 ++++ 3 files changed, 56 insertions(+), 13 deletions(-) diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py index 9f5828d8d..05c543bf8 100644 --- a/fuzzing/fuzz-targets/fuzz_submodule.py +++ b/fuzzing/fuzz-targets/fuzz_submodule.py @@ -1,16 +1,51 @@ +# ruff: noqa: E402 import atheris import sys import os +import traceback import tempfile from configparser import ParsingError -from utils import is_expected_exception_message, get_max_filename_length +from utils import get_max_filename_length +import re + +bundle_dir = os.path.dirname(os.path.abspath(__file__)) if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover - path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) - os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary + bundled_git_binary_path = os.path.join(bundle_dir, "git") + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path from git import Repo, GitCommandError, InvalidGitRepositoryError + +def load_exception_list(file_path): + """Load and parse the exception list from a file.""" + try: + with open(file_path, "r") as file: + lines = file.readlines() + exception_list = set() + for line in lines: + match = re.match(r"(.+):(\d+):", line) + if match: + file_path = match.group(1).strip() + line_number = int(match.group(2).strip()) + exception_list.add((file_path, line_number)) + return exception_list + except FileNotFoundError: + print("File not found: %s", file_path) + return set() + except Exception as e: + print("Error loading exception list: %s", e) + return set() + + +def check_exception_against_list(exception_list, exc_traceback): + """Check if the exception traceback matches any entry in the exception list.""" + for filename, lineno, _, _ in traceback.extract_tb(exc_traceback): + if (filename, lineno) in exception_list: + return True + return False + + if not sys.warnoptions: # pragma: no cover # The warnings filter below can be overridden by passing the -W option # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable. @@ -89,17 +124,14 @@ def TestOneInput(data): BrokenPipeError, ): return -1 - except ValueError as e: - expected_messages = [ - "SHA is empty", - "Reference at", - "embedded null byte", - "This submodule instance does not exist anymore", - "cmd stdin was empty", - ] - if is_expected_exception_message(e, expected_messages): + except Exception as e: + exc_traceback = e.__traceback__ + exception_list = load_exception_list(os.path.join(bundle_dir, "explicit-exceptions-list.txt")) + if check_exception_against_list(exception_list, exc_traceback): + print("Exception matches an entry in the exception list.") return -1 else: + print("Exception does not match any entry in the exception list.") raise e diff --git a/fuzzing/oss-fuzz-scripts/build.sh b/fuzzing/oss-fuzz-scripts/build.sh index e0b3a50ab..c156e872d 100644 --- a/fuzzing/oss-fuzz-scripts/build.sh +++ b/fuzzing/oss-fuzz-scripts/build.sh @@ -15,5 +15,5 @@ find "$SRC" -maxdepth 1 \ # Build fuzzers in $OUT. find "$SRC/gitpython/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do - compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." + compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." --add-data="$SRC/explicit-exceptions-list.txt:." done diff --git a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh index bbdcf5357..af1ddf014 100755 --- a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh +++ b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh @@ -91,6 +91,17 @@ create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora" prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing" +pushd "$SRC/gitpython/" +# Search for 'raise' and 'assert' statements in Python files within GitPython's 'git/' directory and its submodules, +# remove trailing colons, and save to 'explicit-exceptions-list.txt'. This file can then be used by fuzz harnesses to +# check exception tracebacks: +# If an exception found by the fuzzer originated in a file + line number in explicit-exceptions-list.txt, then it is not a bug. + +git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- "git/**/*.py" > "$SRC/explicit-exceptions-list.txt" + +popd + + # The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below. python3 -m pip install --upgrade pip # Upgrade to the latest versions known to work at the time the below changes were introduced: From 799b9cae745f50f2c0c590e8b3e19bfea199c463 Mon Sep 17 00:00:00 2001 From: David Lakin Date: Thu, 8 Aug 2024 18:58:28 -0400 Subject: [PATCH 3/6] Improve `check_exception_against_list` matching logic using regex Changes: - `match_exception_with_traceback` uses regular expressions for more flexible matching of file paths and line numbers. This allows for partial matches and more complex patterns. - Improve `check_exception_against_list` by delegating to `match_exception_with_traceback` for checking tracebacks against exception list entries. - `load_exception_list`: Remains largely unchanged, as it correctly parses the file and line number from each exception entry. However, we ensure the set consists of regex patterns to match against tracebacks. --- fuzzing/fuzz-targets/fuzz_submodule.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py index 05c543bf8..37f069079 100644 --- a/fuzzing/fuzz-targets/fuzz_submodule.py +++ b/fuzzing/fuzz-targets/fuzz_submodule.py @@ -31,21 +31,27 @@ def load_exception_list(file_path): exception_list.add((file_path, line_number)) return exception_list except FileNotFoundError: - print("File not found: %s", file_path) + print(f"File not found: {file_path}") return set() except Exception as e: - print("Error loading exception list: %s", e) + print(f"Error loading exception list: {e}") return set() -def check_exception_against_list(exception_list, exc_traceback): - """Check if the exception traceback matches any entry in the exception list.""" +def match_exception_with_traceback(exception_list, exc_traceback): + """Match exception traceback with the entries in the exception list.""" for filename, lineno, _, _ in traceback.extract_tb(exc_traceback): - if (filename, lineno) in exception_list: - return True + for file_pattern, line_pattern in exception_list: + if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)): + return True return False +def check_exception_against_list(exception_list, exc_traceback): + """Check if the exception traceback matches any entry in the exception list.""" + return match_exception_with_traceback(exception_list, exc_traceback) + + if not sys.warnoptions: # pragma: no cover # The warnings filter below can be overridden by passing the -W option # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable. @@ -128,10 +134,8 @@ def TestOneInput(data): exc_traceback = e.__traceback__ exception_list = load_exception_list(os.path.join(bundle_dir, "explicit-exceptions-list.txt")) if check_exception_against_list(exception_list, exc_traceback): - print("Exception matches an entry in the exception list.") return -1 else: - print("Exception does not match any entry in the exception list.") raise e From 2e9c23995b70372a18edc4d0b143b6b522d3fb39 Mon Sep 17 00:00:00 2001 From: David Lakin Date: Thu, 8 Aug 2024 19:38:06 -0400 Subject: [PATCH 4/6] Extract environment setup and exception checking boilerplate logic Changes: - Simplify exception handling in test harnesses via `handle_exception(e)` in the `except Exception as e:` block. - `setup_git_environment` is a step towards centralizing environment variable and logging configuration set up consistently across different fuzzing scripts. **Only applying it to a single test for now is an intentional choice in case it fails to work in the ClusterFuzz environment!** If it proves successful, a follow-up change set will be welcome. --- fuzzing/fuzz-targets/fuzz_submodule.py | 70 +++------------------ fuzzing/fuzz-targets/utils.py | 87 +++++++++++++++++++++++++- 2 files changed, 95 insertions(+), 62 deletions(-) diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py index 37f069079..634572bf2 100644 --- a/fuzzing/fuzz-targets/fuzz_submodule.py +++ b/fuzzing/fuzz-targets/fuzz_submodule.py @@ -1,67 +1,17 @@ -# ruff: noqa: E402 import atheris import sys import os -import traceback import tempfile from configparser import ParsingError -from utils import get_max_filename_length -import re - -bundle_dir = os.path.dirname(os.path.abspath(__file__)) - -if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover - bundled_git_binary_path = os.path.join(bundle_dir, "git") - os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path - from git import Repo, GitCommandError, InvalidGitRepositoryError +from utils import ( + setup_git_environment, + handle_exception, + get_max_filename_length, +) - -def load_exception_list(file_path): - """Load and parse the exception list from a file.""" - try: - with open(file_path, "r") as file: - lines = file.readlines() - exception_list = set() - for line in lines: - match = re.match(r"(.+):(\d+):", line) - if match: - file_path = match.group(1).strip() - line_number = int(match.group(2).strip()) - exception_list.add((file_path, line_number)) - return exception_list - except FileNotFoundError: - print(f"File not found: {file_path}") - return set() - except Exception as e: - print(f"Error loading exception list: {e}") - return set() - - -def match_exception_with_traceback(exception_list, exc_traceback): - """Match exception traceback with the entries in the exception list.""" - for filename, lineno, _, _ in traceback.extract_tb(exc_traceback): - for file_pattern, line_pattern in exception_list: - if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)): - return True - return False - - -def check_exception_against_list(exception_list, exc_traceback): - """Check if the exception traceback matches any entry in the exception list.""" - return match_exception_with_traceback(exception_list, exc_traceback) - - -if not sys.warnoptions: # pragma: no cover - # The warnings filter below can be overridden by passing the -W option - # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable. - import warnings - import logging - - # Fuzzing data causes some modules to generate a large number of warnings - # which are not usually interesting and make the test output hard to read, so we ignore them. - warnings.simplefilter("ignore") - logging.getLogger().setLevel(logging.ERROR) +# Setup the git environment +setup_git_environment() def TestOneInput(data): @@ -131,12 +81,10 @@ def TestOneInput(data): ): return -1 except Exception as e: - exc_traceback = e.__traceback__ - exception_list = load_exception_list(os.path.join(bundle_dir, "explicit-exceptions-list.txt")) - if check_exception_against_list(exception_list, exc_traceback): + if isinstance(e, ValueError) and "embedded null byte" in str(e): return -1 else: - raise e + return handle_exception(e) def main(): diff --git a/fuzzing/fuzz-targets/utils.py b/fuzzing/fuzz-targets/utils.py index f522d2959..97e6eab98 100644 --- a/fuzzing/fuzz-targets/utils.py +++ b/fuzzing/fuzz-targets/utils.py @@ -1,6 +1,9 @@ import atheris # pragma: no cover import os # pragma: no cover -from typing import List # pragma: no cover +import re # pragma: no cover +import traceback # pragma: no cover +import sys # pragma: no cover +from typing import Set, Tuple, List # pragma: no cover @atheris.instrument_func @@ -35,3 +38,85 @@ def get_max_filename_length(path: str) -> int: # pragma: no cover int: The maximum filename length. """ return os.pathconf(path, "PC_NAME_MAX") + + +@atheris.instrument_func +def read_lines_from_file(file_path: str) -> list: + """Read lines from a file and return them as a list.""" + try: + with open(file_path, "r") as f: + return [line.strip() for line in f if line.strip()] + except FileNotFoundError: + print(f"File not found: {file_path}") + return [] + except IOError as e: + print(f"Error reading file {file_path}: {e}") + return [] + + +@atheris.instrument_func +def load_exception_list(file_path: str = "explicit-exceptions-list.txt") -> Set[Tuple[str, str]]: + """Load and parse the exception list from a default or specified file.""" + try: + bundle_dir = os.path.dirname(os.path.abspath(__file__)) + full_path = os.path.join(bundle_dir, file_path) + lines = read_lines_from_file(full_path) + exception_list: Set[Tuple[str, str]] = set() + for line in lines: + match = re.match(r"(.+):(\d+):", line) + if match: + file_path: str = match.group(1).strip() + line_number: str = str(match.group(2).strip()) + exception_list.add((file_path, line_number)) + return exception_list + except Exception as e: + print(f"Error loading exception list: {e}") + return set() + + +@atheris.instrument_func +def match_exception_with_traceback(exception_list: Set[Tuple[str, str]], exc_traceback) -> bool: + """Match exception traceback with the entries in the exception list.""" + for filename, lineno, _, _ in traceback.extract_tb(exc_traceback): + for file_pattern, line_pattern in exception_list: + # Ensure filename and line_number are strings for regex matching + if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)): + return True + return False + + +@atheris.instrument_func +def check_exception_against_list(exc_traceback, exception_file: str = "explicit-exceptions-list.txt") -> bool: + """Check if the exception traceback matches any entry in the exception list.""" + exception_list = load_exception_list(exception_file) + return match_exception_with_traceback(exception_list, exc_traceback) + + +@atheris.instrument_func +def handle_exception(e: Exception) -> int: + """Encapsulate exception handling logic for reusability.""" + exc_traceback = e.__traceback__ + if check_exception_against_list(exc_traceback): + return -1 + else: + raise e + + +@atheris.instrument_func +def setup_git_environment() -> None: + """Set up the environment variables for Git.""" + bundle_dir = os.path.dirname(os.path.abspath(__file__)) + if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover + bundled_git_binary_path = os.path.join(bundle_dir, "git") + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path + + if not sys.warnoptions: # pragma: no cover + # The warnings filter below can be overridden by passing the -W option + # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable. + import warnings + import logging + + # Fuzzing data causes some modules to generate a large number of warnings + # which are not usually interesting and make the test output hard to read, so we ignore them. + warnings.simplefilter("ignore") + logging.getLogger().setLevel(logging.ERROR) From 27de8676c64b549038b4fdd994a20f1ce996ad5e Mon Sep 17 00:00:00 2001 From: David Lakin Date: Thu, 8 Aug 2024 20:35:13 -0400 Subject: [PATCH 5/6] Fix buggy `git grep` pathspec args To ensure that all necessary files are included in the explicit-exceptions-list.txt file and unwanted files and directories are not. --- .../container-environment-bootstrap.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh index af1ddf014..924a3cbf3 100755 --- a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh +++ b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh @@ -92,12 +92,12 @@ create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora" prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing" pushd "$SRC/gitpython/" -# Search for 'raise' and 'assert' statements in Python files within GitPython's 'git/' directory and its submodules, -# remove trailing colons, and save to 'explicit-exceptions-list.txt'. This file can then be used by fuzz harnesses to -# check exception tracebacks: -# If an exception found by the fuzzer originated in a file + line number in explicit-exceptions-list.txt, then it is not a bug. +# Search for 'raise' and 'assert' statements in Python files within GitPython's source code and submodules, saving the +# matched file path, line number, and line content to a file named 'explicit-exceptions-list.txt'. +# This file can then be used by fuzz harnesses to check exception tracebacks and filter out explicitly raised or otherwise +# anticipated exceptions to reduce false positive test failures. -git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- "git/**/*.py" > "$SRC/explicit-exceptions-list.txt" +git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- '*.py' -- ':!setup.py' -- ':!test/**' -- ':!fuzzing/**' > "$SRC/explicit-exceptions-list.txt" popd From 2ed33345667706c5755708e88c989ede06f2414f Mon Sep 17 00:00:00 2001 From: David Lakin Date: Fri, 9 Aug 2024 00:06:44 -0400 Subject: [PATCH 6/6] Fix order of environment setup and git module import The environment setup must happen before the `git` module is imported, otherwise GitPython won't be able to find the Git executable and raise an exception that causes the ClusterFuzz fuzzer runs to fail. --- fuzzing/fuzz-targets/fuzz_submodule.py | 2 +- pyproject.toml | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py index 634572bf2..997133b70 100644 --- a/fuzzing/fuzz-targets/fuzz_submodule.py +++ b/fuzzing/fuzz-targets/fuzz_submodule.py @@ -3,7 +3,6 @@ import os import tempfile from configparser import ParsingError -from git import Repo, GitCommandError, InvalidGitRepositoryError from utils import ( setup_git_environment, handle_exception, @@ -12,6 +11,7 @@ # Setup the git environment setup_git_environment() +from git import Repo, GitCommandError, InvalidGitRepositoryError def TestOneInput(data): diff --git a/pyproject.toml b/pyproject.toml index 603e2597c..6cf4b3f5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,10 @@ lint.unfixable = [ "test/**" = [ "B018", # useless-expression ] +"fuzzing/fuzz-targets/**" = [ + "E402", # environment setup must happen before the `git` module is imported, thus cannot happen at top of file +] + [tool.codespell] ignore-words-list="gud,doesnt" pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy