diff --git a/fuzzing/README.md b/fuzzing/README.md index 9d02bf72f..286f529eb 100644 --- a/fuzzing/README.md +++ b/fuzzing/README.md @@ -76,25 +76,6 @@ Contains Python files for each fuzz test. reason, fuzz tests should gracefully handle anticipated exception cases with a `try`/`except` block to avoid false positives that halt the fuzzing engine. -### Dictionaries (`dictionaries/`) - -Provides hints to the fuzzing engine about inputs that might trigger unique code paths. Each fuzz target may have a -corresponding `.dict` file. For information about dictionary syntax, refer to -the [LibFuzzer documentation on the subject](https://llvm.org/docs/LibFuzzer.html#dictionaries). - -**Things to Know**: - -- OSS-Fuzz loads dictionary files per fuzz target if one exists with the same name, all others are ignored. -- Most entries in the dictionary files found here are escaped hex or Unicode values that were recommended by the fuzzing - engine after previous runs. -- A default set of dictionary entries are created for all fuzz targets as part of the build process, regardless of an - existing file here. -- Development or updates to dictionaries should reflect the varied formats and edge cases relevant to the - functionalities under test. -- Example dictionaries (some of which are used to build the default dictionaries mentioned above) can be found here: - - [AFL++ dictionary repository](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries#readme) - - [Google/fuzzing dictionary repository](https://github.com/google/fuzzing/tree/master/dictionaries) - ### OSS-Fuzz Scripts (`oss-fuzz-scripts/`) Includes scripts for building and integrating fuzz targets with OSS-Fuzz: diff --git a/fuzzing/dictionaries/fuzz_blob.dict b/fuzzing/dictionaries/fuzz_blob.dict deleted file mode 100644 index 7f123f830..000000000 --- a/fuzzing/dictionaries/fuzz_blob.dict +++ /dev/null @@ -1 +0,0 @@ -"\\377\\377\\377\\377\\377\\377\\377\\377" diff --git a/fuzzing/dictionaries/fuzz_config.dict b/fuzzing/dictionaries/fuzz_config.dict deleted file mode 100644 index b545ddfc8..000000000 --- a/fuzzing/dictionaries/fuzz_config.dict +++ /dev/null @@ -1,56 +0,0 @@ -"\\004\\000\\000\\000\\000\\000\\000\\000" -"\\006\\000\\000\\000\\000\\000\\000\\000" -"_validate_value_" -"\\000\\000\\000\\000\\000\\000\\000\\000" -"rem" -"__eq__" -"\\001\\000\\000\\000" -"__abstrac" -"_mutating_methods_" -"items" -"\\0021\\"" -"\\001\\000" -"\\000\\000\\000\\000" -"DEFAULT" -"getfloat" -"\\004\\000\\000\\000\\000\\000\\000\\000" -"news" -"\\037\\000\\000\\000\\000\\000\\000\\000" -"\\001\\000\\000\\000\\000\\000\\000\\037" -"\\000\\000\\000\\000\\000\\000\\000\\014" -"list" -"\\376\\377\\377\\377\\377\\377\\377\\377" -"items_all" -"\\004\\000\\000\\000\\000\\000\\000\\000" -"\\377\\377\\377\\377\\377\\377\\377\\014" -"\\001\\000\\000\\000" -"_acqui" -"\\000\\000\\000\\000\\000\\000\\000\\000" -"__ne__" -"__exit__" -"__modu" -"uucp" -"__str__" -"\\001\\000\\000\\000" -"\\017\\000\\000\\000\\000\\000\\000\\000" -"_has_incl" -"update" -"\\377\\377\\377\\377\\377\\377\\377\\023" -"setdef" -"setdefaul" -"\\000\\000\\000\\000" -"\\001\\000\\000\\000" -"\\001\\000" -"\\022\\000\\000\\000\\000\\000\\000\\000" -"_value_to_string" -"__abstr" -"\\001\\000\\000\\000\\000\\000\\000\\000" -"\\000\\000\\000\\000\\000\\000\\000\\022" -"\\377\\377\\377\\377" -"\\004\\000\\000\\000\\000\\000\\000\\000" -"\\000\\000\\000\\000\\000\\000\\000\\000" -"\\000\\000\\000\\000\\000\\000\\000\\037" -"\\001\\000\\000\\000\\000\\000\\000\\013" -"_OPT_TM" -"__name__" -"_get_conv" diff --git a/fuzzing/oss-fuzz-scripts/build.sh b/fuzzing/oss-fuzz-scripts/build.sh index 58c9adb5a..e0b3a50ab 100644 --- a/fuzzing/oss-fuzz-scripts/build.sh +++ b/fuzzing/oss-fuzz-scripts/build.sh @@ -7,34 +7,13 @@ set -euo pipefail python3 -m pip install . -# Directory to look in for dictionaries, options files, and seed corpora: -SEED_DATA_DIR="$SRC/seed_data" - -find "$SEED_DATA_DIR" \( -name '*_seed_corpus.zip' -o -name '*.options' -o -name '*.dict' \) \ - ! \( -name '__base.*' \) -exec printf 'Copying: %s\n' {} \; \ +find "$SRC" -maxdepth 1 \ + \( -name '*_seed_corpus.zip' -o -name '*.options' -o -name '*.dict' \) \ + -exec printf '[%s] Copying: %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" {} \; \ -exec chmod a-x {} \; \ -exec cp {} "$OUT" \; # Build fuzzers in $OUT. find "$SRC/gitpython/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." - - common_base_dictionary_filename="$SEED_DATA_DIR/__base.dict" - if [[ -r "$common_base_dictionary_filename" ]]; then - # Strip the `.py` extension from the filename and replace it with `.dict`. - fuzz_harness_dictionary_filename="$(basename "$fuzz_harness" .py).dict" - output_file="$OUT/$fuzz_harness_dictionary_filename" - - printf 'Appending %s to %s\n' "$common_base_dictionary_filename" "$output_file" - if [[ -s "$output_file" ]]; then - # If a dictionary file for this fuzzer already exists and is not empty, - # we append a new line to the end of it before appending any new entries. - # - # LibFuzzer will happily ignore multiple empty lines in a dictionary but fail with an error - # if any single line has incorrect syntax (e.g., if we accidentally add two entries to the same line.) - # See docs for valid syntax: https://llvm.org/docs/LibFuzzer.html#id32 - echo >>"$output_file" - fi - cat "$common_base_dictionary_filename" >>"$output_file" - fi done diff --git a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh index 76ec97c7f..bbdcf5357 100755 --- a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh +++ b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh @@ -9,23 +9,20 @@ set -euo pipefail # Prerequisites # ################# -for cmd in python3 git wget rsync; do +for cmd in python3 git wget zip; do command -v "$cmd" >/dev/null 2>&1 || { printf '[%s] Required command %s not found, exiting.\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$cmd" >&2 exit 1 } done -SEED_DATA_DIR="$SRC/seed_data" -mkdir -p "$SEED_DATA_DIR" - ############# # Functions # ############# download_and_concatenate_common_dictionaries() { # Assign the first argument as the target file where all contents will be concatenated - target_file="$1" + local target_file="$1" # Shift the arguments so the first argument (target_file path) is removed # and only URLs are left for the loop below. @@ -38,22 +35,61 @@ download_and_concatenate_common_dictionaries() { done } -fetch_seed_corpora() { - # Seed corpus zip files are hosted in a separate repository to avoid additional bloat in this repo. - git clone --depth 1 https://github.com/gitpython-developers/qa-assets.git qa-assets && - rsync -avc qa-assets/gitpython/corpra/ "$SEED_DATA_DIR/" && - rm -rf qa-assets # Clean up the cloned repo to keep the Docker image as slim as possible. +create_seed_corpora_zips() { + local seed_corpora_dir="$1" + local output_zip + for dir in "$seed_corpora_dir"/*; do + if [ -d "$dir" ] && [ -n "$dir" ]; then + output_zip="$SRC/$(basename "$dir")_seed_corpus.zip" + printf '[%s] Zipping the contents of %s into %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$dir" "$output_zip" + zip -jur "$output_zip" "$dir"/* + fi + done +} + +prepare_dictionaries_for_fuzz_targets() { + local dictionaries_dir="$1" + local fuzz_targets_dir="$2" + local common_base_dictionary_filename="$WORK/__base.dict" + + printf '[%s] Copying .dict files from %s to %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$dictionaries_dir" "$SRC/" + cp -v "$dictionaries_dir"/*.dict "$SRC/" + + download_and_concatenate_common_dictionaries "$common_base_dictionary_filename" \ + "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/utf8.dict" \ + "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/url.dict" + + find "$fuzz_targets_dir" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do + if [[ -r "$common_base_dictionary_filename" ]]; then + # Strip the `.py` extension from the filename and replace it with `.dict`. + fuzz_harness_dictionary_filename="$(basename "$fuzz_harness" .py).dict" + local output_file="$SRC/$fuzz_harness_dictionary_filename" + + printf '[%s] Appending %s to %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$common_base_dictionary_filename" "$output_file" + if [[ -s "$output_file" ]]; then + # If a dictionary file for this fuzzer already exists and is not empty, + # we append a new line to the end of it before appending any new entries. + # + # LibFuzzer will happily ignore multiple empty lines in a dictionary but fail with an error + # if any single line has incorrect syntax (e.g., if we accidentally add two entries to the same line.) + # See docs for valid syntax: https://llvm.org/docs/LibFuzzer.html#id32 + echo >>"$output_file" + fi + cat "$common_base_dictionary_filename" >>"$output_file" + fi + done } ######################## # Main execution logic # ######################## +# Seed corpora and dictionaries are hosted in a separate repository to avoid additional bloat in this repo. +# We clone into the $WORK directory because OSS-Fuzz cleans it up after building the image, keeping the image small. +git clone --depth 1 https://github.com/gitpython-developers/qa-assets.git "$WORK/qa-assets" -fetch_seed_corpora +create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora" -download_and_concatenate_common_dictionaries "$SEED_DATA_DIR/__base.dict" \ - "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/utf8.dict" \ - "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/url.dict" +prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing" # The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below. python3 -m pip install --upgrade pip
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: