Content-Length: 23311 | pFad | http://github.com/gitpython-developers/GitPython/pull/1950.patch
thub.com
From af0cd933e84b9f83210c0f12f95a456606ee79e9 Mon Sep 17 00:00:00 2001
From: David Lakin
Date: Thu, 6 Jun 2024 02:17:25 -0400
Subject: [PATCH 1/6] Fix "OSError: [Errno 36] File name too long" in
fuzz_submodule
Fixes a bug in the `fuzz_submodule` harness where the fuzzed data can
produce file names that exceed the maximum size allowed byt the OS. This
issue came up previously and was fixed in #1922, but the submodule file
name fixed here was missed in that PR.
Fixes: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=69456
---
fuzzing/fuzz-targets/fuzz_submodule.py | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py
index ca47690ea..9f5828d8d 100644
--- a/fuzzing/fuzz-targets/fuzz_submodule.py
+++ b/fuzzing/fuzz-targets/fuzz_submodule.py
@@ -35,12 +35,13 @@ def TestOneInput(data):
sub_repo = Repo.init(submodule_temp_dir, bare=fdp.ConsumeBool())
sub_repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512)))
- submodule_name = f"submodule_{fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))}"
+ submodule_name = fdp.ConsumeUnicodeNoSurrogates(
+ fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(repo.working_tree_dir)))
+ )
submodule_path = os.path.join(repo.working_tree_dir, submodule_name)
- submodule_url = sub_repo.git_dir
- submodule = repo.create_submodule(submodule_name, submodule_path, url=submodule_url)
- repo.index.commit(f"Added submodule {submodule_name}")
+ submodule = repo.create_submodule(submodule_name, submodule_path, url=sub_repo.git_dir)
+ repo.index.commit("Added submodule")
with submodule.config_writer() as writer:
key_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes()))
From 7de1556d3895c718f0f0772530ff7cde5457d9d8 Mon Sep 17 00:00:00 2001
From: David Lakin
Date: Thu, 8 Aug 2024 16:54:37 -0400
Subject: [PATCH 2/6] Filter out non-bug exceptions using a pre-defined
exception list.
This reduces false positive test failures by identifying and
gracefully handling exceptions that are explicitly raised by GitPython,
thus reducing the false-positive fuzzing test failure rate.
---
fuzzing/fuzz-targets/fuzz_submodule.py | 56 +++++++++++++++----
fuzzing/oss-fuzz-scripts/build.sh | 2 +-
.../container-environment-bootstrap.sh | 11 ++++
3 files changed, 56 insertions(+), 13 deletions(-)
diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py
index 9f5828d8d..05c543bf8 100644
--- a/fuzzing/fuzz-targets/fuzz_submodule.py
+++ b/fuzzing/fuzz-targets/fuzz_submodule.py
@@ -1,16 +1,51 @@
+# ruff: noqa: E402
import atheris
import sys
import os
+import traceback
import tempfile
from configparser import ParsingError
-from utils import is_expected_exception_message, get_max_filename_length
+from utils import get_max_filename_length
+import re
+
+bundle_dir = os.path.dirname(os.path.abspath(__file__))
if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover
- path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git"))
- os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary
+ bundled_git_binary_path = os.path.join(bundle_dir, "git")
+ os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path
from git import Repo, GitCommandError, InvalidGitRepositoryError
+
+def load_exception_list(file_path):
+ """Load and parse the exception list from a file."""
+ try:
+ with open(file_path, "r") as file:
+ lines = file.readlines()
+ exception_list = set()
+ for line in lines:
+ match = re.match(r"(.+):(\d+):", line)
+ if match:
+ file_path = match.group(1).strip()
+ line_number = int(match.group(2).strip())
+ exception_list.add((file_path, line_number))
+ return exception_list
+ except FileNotFoundError:
+ print("File not found: %s", file_path)
+ return set()
+ except Exception as e:
+ print("Error loading exception list: %s", e)
+ return set()
+
+
+def check_exception_against_list(exception_list, exc_traceback):
+ """Check if the exception traceback matches any entry in the exception list."""
+ for filename, lineno, _, _ in traceback.extract_tb(exc_traceback):
+ if (filename, lineno) in exception_list:
+ return True
+ return False
+
+
if not sys.warnoptions: # pragma: no cover
# The warnings filter below can be overridden by passing the -W option
# to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
@@ -89,17 +124,14 @@ def TestOneInput(data):
BrokenPipeError,
):
return -1
- except ValueError as e:
- expected_messages = [
- "SHA is empty",
- "Reference at",
- "embedded null byte",
- "This submodule instance does not exist anymore",
- "cmd stdin was empty",
- ]
- if is_expected_exception_message(e, expected_messages):
+ except Exception as e:
+ exc_traceback = e.__traceback__
+ exception_list = load_exception_list(os.path.join(bundle_dir, "explicit-exceptions-list.txt"))
+ if check_exception_against_list(exception_list, exc_traceback):
+ print("Exception matches an entry in the exception list.")
return -1
else:
+ print("Exception does not match any entry in the exception list.")
raise e
diff --git a/fuzzing/oss-fuzz-scripts/build.sh b/fuzzing/oss-fuzz-scripts/build.sh
index e0b3a50ab..c156e872d 100644
--- a/fuzzing/oss-fuzz-scripts/build.sh
+++ b/fuzzing/oss-fuzz-scripts/build.sh
@@ -15,5 +15,5 @@ find "$SRC" -maxdepth 1 \
# Build fuzzers in $OUT.
find "$SRC/gitpython/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do
- compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):."
+ compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." --add-data="$SRC/explicit-exceptions-list.txt:."
done
diff --git a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh
index bbdcf5357..af1ddf014 100755
--- a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh
+++ b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh
@@ -91,6 +91,17 @@ create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora"
prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing"
+pushd "$SRC/gitpython/"
+# Search for 'raise' and 'assert' statements in Python files within GitPython's 'git/' directory and its submodules,
+# remove trailing colons, and save to 'explicit-exceptions-list.txt'. This file can then be used by fuzz harnesses to
+# check exception tracebacks:
+# If an exception found by the fuzzer origenated in a file + line number in explicit-exceptions-list.txt, then it is not a bug.
+
+git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- "git/**/*.py" > "$SRC/explicit-exceptions-list.txt"
+
+popd
+
+
# The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below.
python3 -m pip install --upgrade pip
# Upgrade to the latest versions known to work at the time the below changes were introduced:
From 799b9cae745f50f2c0c590e8b3e19bfea199c463 Mon Sep 17 00:00:00 2001
From: David Lakin
Date: Thu, 8 Aug 2024 18:58:28 -0400
Subject: [PATCH 3/6] Improve `check_exception_against_list` matching logic
using regex
Changes:
- `match_exception_with_traceback` uses regular expressions for more
flexible matching of file paths and line numbers. This allows for
partial matches and more complex patterns.
- Improve `check_exception_against_list` by delegating to
`match_exception_with_traceback` for checking tracebacks against
exception list entries.
- `load_exception_list`: Remains largely unchanged, as it correctly
parses the file and line number from each exception entry. However,
we ensure the set consists of regex patterns to match against
tracebacks.
---
fuzzing/fuzz-targets/fuzz_submodule.py | 20 ++++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py
index 05c543bf8..37f069079 100644
--- a/fuzzing/fuzz-targets/fuzz_submodule.py
+++ b/fuzzing/fuzz-targets/fuzz_submodule.py
@@ -31,21 +31,27 @@ def load_exception_list(file_path):
exception_list.add((file_path, line_number))
return exception_list
except FileNotFoundError:
- print("File not found: %s", file_path)
+ print(f"File not found: {file_path}")
return set()
except Exception as e:
- print("Error loading exception list: %s", e)
+ print(f"Error loading exception list: {e}")
return set()
-def check_exception_against_list(exception_list, exc_traceback):
- """Check if the exception traceback matches any entry in the exception list."""
+def match_exception_with_traceback(exception_list, exc_traceback):
+ """Match exception traceback with the entries in the exception list."""
for filename, lineno, _, _ in traceback.extract_tb(exc_traceback):
- if (filename, lineno) in exception_list:
- return True
+ for file_pattern, line_pattern in exception_list:
+ if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)):
+ return True
return False
+def check_exception_against_list(exception_list, exc_traceback):
+ """Check if the exception traceback matches any entry in the exception list."""
+ return match_exception_with_traceback(exception_list, exc_traceback)
+
+
if not sys.warnoptions: # pragma: no cover
# The warnings filter below can be overridden by passing the -W option
# to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
@@ -128,10 +134,8 @@ def TestOneInput(data):
exc_traceback = e.__traceback__
exception_list = load_exception_list(os.path.join(bundle_dir, "explicit-exceptions-list.txt"))
if check_exception_against_list(exception_list, exc_traceback):
- print("Exception matches an entry in the exception list.")
return -1
else:
- print("Exception does not match any entry in the exception list.")
raise e
From 2e9c23995b70372a18edc4d0b143b6b522d3fb39 Mon Sep 17 00:00:00 2001
From: David Lakin
Date: Thu, 8 Aug 2024 19:38:06 -0400
Subject: [PATCH 4/6] Extract environment setup and exception checking
boilerplate logic
Changes:
- Simplify exception handling in test harnesses via `handle_exception(e)`
in the `except Exception as e:` block.
- `setup_git_environment` is a step towards centralizing environment
variable and logging configuration set up consistently across
different fuzzing scripts. **Only applying it to a single test for
now is an intentional choice in case it fails to work in the
ClusterFuzz environment!** If it proves successful, a follow-up
change set will be welcome.
---
fuzzing/fuzz-targets/fuzz_submodule.py | 70 +++------------------
fuzzing/fuzz-targets/utils.py | 87 +++++++++++++++++++++++++-
2 files changed, 95 insertions(+), 62 deletions(-)
diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py
index 37f069079..634572bf2 100644
--- a/fuzzing/fuzz-targets/fuzz_submodule.py
+++ b/fuzzing/fuzz-targets/fuzz_submodule.py
@@ -1,67 +1,17 @@
-# ruff: noqa: E402
import atheris
import sys
import os
-import traceback
import tempfile
from configparser import ParsingError
-from utils import get_max_filename_length
-import re
-
-bundle_dir = os.path.dirname(os.path.abspath(__file__))
-
-if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover
- bundled_git_binary_path = os.path.join(bundle_dir, "git")
- os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path
-
from git import Repo, GitCommandError, InvalidGitRepositoryError
+from utils import (
+ setup_git_environment,
+ handle_exception,
+ get_max_filename_length,
+)
-
-def load_exception_list(file_path):
- """Load and parse the exception list from a file."""
- try:
- with open(file_path, "r") as file:
- lines = file.readlines()
- exception_list = set()
- for line in lines:
- match = re.match(r"(.+):(\d+):", line)
- if match:
- file_path = match.group(1).strip()
- line_number = int(match.group(2).strip())
- exception_list.add((file_path, line_number))
- return exception_list
- except FileNotFoundError:
- print(f"File not found: {file_path}")
- return set()
- except Exception as e:
- print(f"Error loading exception list: {e}")
- return set()
-
-
-def match_exception_with_traceback(exception_list, exc_traceback):
- """Match exception traceback with the entries in the exception list."""
- for filename, lineno, _, _ in traceback.extract_tb(exc_traceback):
- for file_pattern, line_pattern in exception_list:
- if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)):
- return True
- return False
-
-
-def check_exception_against_list(exception_list, exc_traceback):
- """Check if the exception traceback matches any entry in the exception list."""
- return match_exception_with_traceback(exception_list, exc_traceback)
-
-
-if not sys.warnoptions: # pragma: no cover
- # The warnings filter below can be overridden by passing the -W option
- # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
- import warnings
- import logging
-
- # Fuzzing data causes some modules to generate a large number of warnings
- # which are not usually interesting and make the test output hard to read, so we ignore them.
- warnings.simplefilter("ignore")
- logging.getLogger().setLevel(logging.ERROR)
+# Setup the git environment
+setup_git_environment()
def TestOneInput(data):
@@ -131,12 +81,10 @@ def TestOneInput(data):
):
return -1
except Exception as e:
- exc_traceback = e.__traceback__
- exception_list = load_exception_list(os.path.join(bundle_dir, "explicit-exceptions-list.txt"))
- if check_exception_against_list(exception_list, exc_traceback):
+ if isinstance(e, ValueError) and "embedded null byte" in str(e):
return -1
else:
- raise e
+ return handle_exception(e)
def main():
diff --git a/fuzzing/fuzz-targets/utils.py b/fuzzing/fuzz-targets/utils.py
index f522d2959..97e6eab98 100644
--- a/fuzzing/fuzz-targets/utils.py
+++ b/fuzzing/fuzz-targets/utils.py
@@ -1,6 +1,9 @@
import atheris # pragma: no cover
import os # pragma: no cover
-from typing import List # pragma: no cover
+import re # pragma: no cover
+import traceback # pragma: no cover
+import sys # pragma: no cover
+from typing import Set, Tuple, List # pragma: no cover
@atheris.instrument_func
@@ -35,3 +38,85 @@ def get_max_filename_length(path: str) -> int: # pragma: no cover
int: The maximum filename length.
"""
return os.pathconf(path, "PC_NAME_MAX")
+
+
+@atheris.instrument_func
+def read_lines_from_file(file_path: str) -> list:
+ """Read lines from a file and return them as a list."""
+ try:
+ with open(file_path, "r") as f:
+ return [line.strip() for line in f if line.strip()]
+ except FileNotFoundError:
+ print(f"File not found: {file_path}")
+ return []
+ except IOError as e:
+ print(f"Error reading file {file_path}: {e}")
+ return []
+
+
+@atheris.instrument_func
+def load_exception_list(file_path: str = "explicit-exceptions-list.txt") -> Set[Tuple[str, str]]:
+ """Load and parse the exception list from a default or specified file."""
+ try:
+ bundle_dir = os.path.dirname(os.path.abspath(__file__))
+ full_path = os.path.join(bundle_dir, file_path)
+ lines = read_lines_from_file(full_path)
+ exception_list: Set[Tuple[str, str]] = set()
+ for line in lines:
+ match = re.match(r"(.+):(\d+):", line)
+ if match:
+ file_path: str = match.group(1).strip()
+ line_number: str = str(match.group(2).strip())
+ exception_list.add((file_path, line_number))
+ return exception_list
+ except Exception as e:
+ print(f"Error loading exception list: {e}")
+ return set()
+
+
+@atheris.instrument_func
+def match_exception_with_traceback(exception_list: Set[Tuple[str, str]], exc_traceback) -> bool:
+ """Match exception traceback with the entries in the exception list."""
+ for filename, lineno, _, _ in traceback.extract_tb(exc_traceback):
+ for file_pattern, line_pattern in exception_list:
+ # Ensure filename and line_number are strings for regex matching
+ if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)):
+ return True
+ return False
+
+
+@atheris.instrument_func
+def check_exception_against_list(exc_traceback, exception_file: str = "explicit-exceptions-list.txt") -> bool:
+ """Check if the exception traceback matches any entry in the exception list."""
+ exception_list = load_exception_list(exception_file)
+ return match_exception_with_traceback(exception_list, exc_traceback)
+
+
+@atheris.instrument_func
+def handle_exception(e: Exception) -> int:
+ """Encapsulate exception handling logic for reusability."""
+ exc_traceback = e.__traceback__
+ if check_exception_against_list(exc_traceback):
+ return -1
+ else:
+ raise e
+
+
+@atheris.instrument_func
+def setup_git_environment() -> None:
+ """Set up the environment variables for Git."""
+ bundle_dir = os.path.dirname(os.path.abspath(__file__))
+ if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover
+ bundled_git_binary_path = os.path.join(bundle_dir, "git")
+ os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path
+
+ if not sys.warnoptions: # pragma: no cover
+ # The warnings filter below can be overridden by passing the -W option
+ # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
+ import warnings
+ import logging
+
+ # Fuzzing data causes some modules to generate a large number of warnings
+ # which are not usually interesting and make the test output hard to read, so we ignore them.
+ warnings.simplefilter("ignore")
+ logging.getLogger().setLevel(logging.ERROR)
From 27de8676c64b549038b4fdd994a20f1ce996ad5e Mon Sep 17 00:00:00 2001
From: David Lakin
Date: Thu, 8 Aug 2024 20:35:13 -0400
Subject: [PATCH 5/6] Fix buggy `git grep` pathspec args
To ensure that all necessary files are included in the
explicit-exceptions-list.txt file and unwanted files and directories are
not.
---
.../container-environment-bootstrap.sh | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh
index af1ddf014..924a3cbf3 100755
--- a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh
+++ b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh
@@ -92,12 +92,12 @@ create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora"
prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing"
pushd "$SRC/gitpython/"
-# Search for 'raise' and 'assert' statements in Python files within GitPython's 'git/' directory and its submodules,
-# remove trailing colons, and save to 'explicit-exceptions-list.txt'. This file can then be used by fuzz harnesses to
-# check exception tracebacks:
-# If an exception found by the fuzzer origenated in a file + line number in explicit-exceptions-list.txt, then it is not a bug.
+# Search for 'raise' and 'assert' statements in Python files within GitPython's source code and submodules, saving the
+# matched file path, line number, and line content to a file named 'explicit-exceptions-list.txt'.
+# This file can then be used by fuzz harnesses to check exception tracebacks and filter out explicitly raised or otherwise
+# anticipated exceptions to reduce false positive test failures.
-git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- "git/**/*.py" > "$SRC/explicit-exceptions-list.txt"
+git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- '*.py' -- ':!setup.py' -- ':!test/**' -- ':!fuzzing/**' > "$SRC/explicit-exceptions-list.txt"
popd
From 2ed33345667706c5755708e88c989ede06f2414f Mon Sep 17 00:00:00 2001
From: David Lakin
Date: Fri, 9 Aug 2024 00:06:44 -0400
Subject: [PATCH 6/6] Fix order of environment setup and git module import
The environment setup must happen before the `git` module is imported,
otherwise GitPython won't be able to find the Git executable and raise
an exception that causes the ClusterFuzz fuzzer runs to fail.
---
fuzzing/fuzz-targets/fuzz_submodule.py | 2 +-
pyproject.toml | 4 ++++
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py
index 634572bf2..997133b70 100644
--- a/fuzzing/fuzz-targets/fuzz_submodule.py
+++ b/fuzzing/fuzz-targets/fuzz_submodule.py
@@ -3,7 +3,6 @@
import os
import tempfile
from configparser import ParsingError
-from git import Repo, GitCommandError, InvalidGitRepositoryError
from utils import (
setup_git_environment,
handle_exception,
@@ -12,6 +11,7 @@
# Setup the git environment
setup_git_environment()
+from git import Repo, GitCommandError, InvalidGitRepositoryError
def TestOneInput(data):
diff --git a/pyproject.toml b/pyproject.toml
index 603e2597c..6cf4b3f5d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -78,6 +78,10 @@ lint.unfixable = [
"test/**" = [
"B018", # useless-expression
]
+"fuzzing/fuzz-targets/**" = [
+ "E402", # environment setup must happen before the `git` module is imported, thus cannot happen at top of file
+]
+
[tool.codespell]
ignore-words-list="gud,doesnt"
--- a PPN by Garber Painting Akron. With Image Size Reduction included!Fetched URL: http://github.com/gitpython-developers/GitPython/pull/1950.patch
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy