diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml deleted file mode 100644 index 80b383665..000000000 --- a/.github/workflows/bandit.yml +++ /dev/null @@ -1,35 +0,0 @@ -# Bandit static analysis (for Python code) -name: Bandit -on: - push: - branches-ignore: - - 'dependabot/**' - pull_request: - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - bandit: - name: Bandit - strategy: - matrix: - os: [ubuntu-latest, windows-latest] - runs-on: ${{matrix.os}} - - steps: - - name: Checkout repository - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - fetch-depth: 0 - - - name: Install Bandit - run: python3 -m pip install bandit - - # Run Bandit recursively, but omit _deps directory (with 3rd party code) - - name: Run Bandit - run: python3 -m bandit -r . -x '/_deps/' diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index f6fe2ad26..dfa03fc4f 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -18,6 +18,8 @@ permissions: jobs: coverity: name: Coverity + # run only on upstream; forks do not know Username/Password + if: github.repository == 'oneapi-src/unified-memory-framework' runs-on: ubuntu-latest steps: - name: Checkout repository diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 4fdd89766..3d9bfc29b 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -16,7 +16,7 @@ permissions: jobs: build: name: Build docs - runs-on: ubuntu-latest + runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} steps: - name: Checkout repository @@ -29,8 +29,16 @@ jobs: sudo apt-get update sudo apt-get install -y doxygen - - name: Install pip requirements - run: python3 -m pip install -r third_party/requirements.txt + # Latest distros do not allow global pip installation + - name: Install Python requirements in venv + run: | + python3 -m venv .venv + . .venv/bin/activate + echo "$PATH" >> $GITHUB_PATH + python3 -m pip install -r third_party/requirements.txt + + - name: Setup PATH for python + run: echo "$HOME/.local/bin" >> $GITHUB_PATH - name: Build the documentation working-directory: scripts @@ -53,7 +61,7 @@ jobs: name: github-pages url: ${{ steps.deployment.outputs.page_url }} - runs-on: ubuntu-latest + runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} steps: - name: Deploy the documentation to GitHub Pages diff --git a/.github/workflows/gpu.yml b/.github/workflows/gpu.yml deleted file mode 100644 index 3024b9f7e..000000000 --- a/.github/workflows/gpu.yml +++ /dev/null @@ -1,102 +0,0 @@ -# This workflow builds and tests providers using GPU memory. It requires -# "level_zero" labeled self-hosted runners installed on systems with the -# appropriate GPU and drivers. -name: GPU - -on: [workflow_call] - -permissions: - contents: read - -env: - BUILD_DIR : "${{github.workspace}}/build" - INSTL_DIR : "${{github.workspace}}/../install-dir" - -jobs: - gpu: - name: Build - env: - BUILD_TYPE: Release - VCPKG_PATH: "${{github.workspace}}/../../../../vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/../../../../vcpkg/packages/tbb_x64-windows;${{github.workspace}}/../../../../vcpkg/packages/jemalloc_x64-windows" - # run only on upstream; forks will not have the HW - if: github.repository == 'oneapi-src/unified-memory-framework' - strategy: - matrix: - shared_library: ['ON', 'OFF'] - os: ['Ubuntu', 'Windows'] - include: - - os: 'Ubuntu' - compiler: {c: gcc, cxx: g++} - number_of_processors: '$(nproc)' - - os: 'Windows' - compiler: {c: cl, cxx: cl} - number_of_processors: '$Env:NUMBER_OF_PROCESSORS' - - runs-on: ["DSS-LEVEL_ZERO", "DSS-${{matrix.os}}"] - steps: - - name: Checkout - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - fetch-depth: 0 - - - name: Get information about platform - if: matrix.os == 'Ubuntu' - run: .github/scripts/get_system_info.sh - - - name: Configure build for Win - if: matrix.os == 'Windows' - run: > - cmake - -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" - -B ${{env.BUILD_DIR}} - -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" - -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} - -DCMAKE_C_COMPILER=${{matrix.compiler.c}} - -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} - -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} - -DUMF_BUILD_BENCHMARKS=ON - -DUMF_BUILD_TESTS=ON - -DUMF_BUILD_GPU_TESTS=ON - -DUMF_BUILD_GPU_EXAMPLES=ON - -DUMF_FORMAT_CODE_STYLE=OFF - -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON - -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON - -DUMF_TESTS_FAIL_ON_SKIP=ON - - - name: Configure build for Ubuntu - if: matrix.os == 'Ubuntu' - run: > - cmake - -B ${{env.BUILD_DIR}} - -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" - -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} - -DCMAKE_C_COMPILER=${{matrix.compiler.c}} - -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} - -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} - -DUMF_BUILD_BENCHMARKS=ON - -DUMF_BUILD_TESTS=ON - -DUMF_BUILD_GPU_TESTS=ON - -DUMF_BUILD_GPU_EXAMPLES=ON - -DUMF_FORMAT_CODE_STYLE=OFF - -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON - -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON - -DUMF_TESTS_FAIL_ON_SKIP=ON - - - name: Build UMF - run: cmake --build ${{env.BUILD_DIR}} --config ${{env.BUILD_TYPE}} -j ${{matrix.number_of_processors}} - - - name: Run tests - working-directory: ${{env.BUILD_DIR}} - run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure --test-dir test - - - name: Run examples - working-directory: ${{env.BUILD_DIR}} - run: ctest --output-on-failure --test-dir examples -C ${{env.BUILD_TYPE}} - - - name: Run benchmarks - working-directory: ${{env.BUILD_DIR}} - run: ctest --output-on-failure --test-dir benchmark -C ${{env.BUILD_TYPE}} --exclude-regex umf-bench-multithreaded diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index f2bf8f08f..281ae0061 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -79,6 +79,7 @@ jobs: -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF + -DUMF_BUILD_CUDA_PROVIDER=OFF -DUMF_USE_VALGRIND=1 -DUMF_TESTS_FAIL_ON_SKIP=ON @@ -87,3 +88,109 @@ jobs: - name: Run tests under valgrind run: ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{github.workspace}}/build ${{matrix.tool}} + + # TODO fix #843 + #icx: + # name: ICX + # env: + # VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" + # BUILD_DIR : "${{github.workspace}}/build" + # strategy: + # matrix: + # os: ['windows-2019', 'windows-2022'] + # build_type: [Debug] + # compiler: [{c: icx, cxx: icx}] + # shared_library: ['ON', 'OFF'] + # include: + # - os: windows-2022 + # build_type: Release + # compiler: {c: icx, cxx: icx} + # shared_library: 'ON' + # + # runs-on: ${{matrix.os}} + # + # steps: + # - name: Checkout + # uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + # with: + # fetch-depth: 0 + # + # - name: Initialize vcpkg + # uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 + # with: + # vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 + # vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg + # vcpkgJsonGlob: '**/vcpkg.json' + # + # - name: Install dependencies + # run: vcpkg install + # + # - name: Install Ninja + # uses: seanmiddleditch/gha-setup-ninja@96bed6edff20d1dd61ecff9b75cc519d516e6401 # v5 + # + # - name: Download icx compiler + # env: + # # Link source: https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler-download.html + # CMPLR_LINK: "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/15a35578-2f9a-4f39-804b-3906e0a5f8fc/w_dpcpp-cpp-compiler_p_2024.2.1.83_offline.exe" + # run: | + # Invoke-WebRequest -Uri "${{ env.CMPLR_LINK }}" -OutFile compiler_install.exe + # + # - name: Install icx compiler + # shell: cmd + # run: | + # start /b /wait .\compiler_install.exe -s -x -f extracted --log extract.log + # extracted\bootstrapper.exe -s --action install --eula=accept -p=NEED_VS2017_INTEGRATION=0 ^ + # -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=. + # + # - name: Configure build + # shell: cmd + # run: | + # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + # call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" + # cmake ^ + # -B ${{env.BUILD_DIR}} ^ + # -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" ^ + # -DCMAKE_C_COMPILER=${{matrix.compiler.c}} ^ + # -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} ^ + # -G Ninja ^ + # -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} ^ + # -DUMF_FORMAT_CODE_STYLE=OFF ^ + # -DUMF_DEVELOPER_MODE=ON ^ + # -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON ^ + # -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON ^ + # -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON ^ + # -DUMF_BUILD_CUDA_PROVIDER=ON ^ + # -DUMF_TESTS_FAIL_ON_SKIP=ON + # + # - name: Build UMF + # shell: cmd + # run: | + # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + # call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" + # cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j %NUMBER_OF_PROCESSORS% + # + # - name: Run tests + # shell: cmd + # working-directory: ${{env.BUILD_DIR}} + # run: | + # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + # call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" + # ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test + + L0: + uses: ./.github/workflows/reusable_gpu.yml + with: + name: "LEVEL_ZERO" + CUDA: + uses: ./.github/workflows/reusable_gpu.yml + with: + name: "CUDA" + + # Full execution of QEMU tests + QEMU: + uses: ./.github/workflows/reusable_qemu.yml + with: + short_run: false + # Beside the 2 LTS Ubuntu, we also test this on the latest Ubuntu - to be updated + # every 6 months, so we verify the latest version of packages (compilers, etc.). + os: "['ubuntu-22.04', 'ubuntu-24.04', 'ubuntu-24.10']" diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml new file mode 100644 index 000000000..6057df5f0 --- /dev/null +++ b/.github/workflows/performance.yml @@ -0,0 +1,115 @@ +name: Performance + +on: + # Can be triggered via manual "dispatch" (from workflow view in GitHub Actions tab) + workflow_dispatch: + inputs: + pr_no: + description: PR number (if 0, it'll run on the main) + type: number + required: true + +permissions: + contents: read + pull-requests: write + +env: + BUILD_DIR : "${{github.workspace}}/build" + +jobs: + perf-l0: + name: Build UMF and run performance tests + runs-on: "L0_PERF" + + steps: + # Workspace on self-hosted runners is not cleaned automatically. + # We have to delete the files created outside of using actions. + - name: Cleanup self-hosted workspace + if: always() + run: | + ls -la ./ + rm -rf ./* || true + + - name: Add comment to PR + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + if: ${{ always() && inputs.pr_no != 0 }} + with: + script: | + const pr_no = '${{ inputs.pr_no }}'; + const provider = 'LEVEL_ZERO'; + const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; + const body = `Performance workflow for ${provider}_PROVIDER run:\n${url}`; + + github.rest.issues.createComment({ + issue_number: pr_no, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }) + + - name: Checkout UMF + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Get information about platform + run: .github/scripts/get_system_info.sh + + # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged. + - name: Fetch PR's merge commit + if: ${{ inputs.pr_no != 0 }} + working-directory: ${{github.workspace}} + env: + PR_NO: ${{ inputs.pr_no }} + run: | + git fetch -- https://github.com/${{github.repository}} +refs/pull/${PR_NO}/*:refs/remotes/origin/pr/${PR_NO}/* + git checkout origin/pr/${PR_NO}/merge + git rev-parse origin/pr/${PR_NO}/merge + + - name: Configure build + run: > + cmake + -B ${{env.BUILD_DIR}} + -DCMAKE_BUILD_TYPE=Release + -DUMF_BUILD_SHARED_LIBRARY=ON + -DUMF_BUILD_BENCHMARKS=ON + -DUMF_BUILD_BENCHMARKS_MT=ON + -DUMF_BUILD_TESTS=OFF + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=OFF + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + + - name: Build + run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) + + - name: Run benchmarks + working-directory: ${{env.BUILD_DIR}} + id: benchmarks + run: numactl -N 1 ctest -V --test-dir benchmark -C Release + + - name: Add comment to PR + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + if: ${{ always() && inputs.pr_no != 0 }} + with: + script: | + let markdown = "" + try { + const fs = require('fs'); + markdown = fs.readFileSync('umf_perf_results.md', 'utf8'); + } catch(err) { + } + + const pr_no = '${{ inputs.pr_no }}'; + const provider = 'LEVEL_ZERO'; + const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; + const test_status = '${{ steps.benchmarks.outcome }}'; + const job_status = '${{ job.status }}'; + const body = `Performance workflow for ${provider}_PROVIDER run:\n${url}\nJob status: ${job_status}. Test status: ${test_status}.\n ${markdown}`; + + github.rest.issues.createComment({ + issue_number: pr_no, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }) diff --git a/.github/workflows/pr_push.yml b/.github/workflows/pr_push.yml index c35664a56..9623b69f1 100644 --- a/.github/workflows/pr_push.yml +++ b/.github/workflows/pr_push.yml @@ -1,4 +1,5 @@ -# Checks required for a PR to merge. This workflow mostly call other workflows. +# Run checks required for a PR to merge and verify if post-merge commit is valid. +# This workflow only call other workflows. name: PR/push on: @@ -15,91 +16,75 @@ permissions: contents: read jobs: - CodeStyle: - name: Coding style - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - fetch-depth: 0 - - - name: Install apt packages - run: | - sudo apt-get update - sudo apt-get install -y black cmake clang-format-15 cmake-format libhwloc-dev - - - name: Configure CMake - run: > - cmake - -B ${{github.workspace}}/build - -DUMF_FORMAT_CODE_STYLE=ON - -DUMF_BUILD_TESTS=OFF - -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF - -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF - - - name: Check C/C++ formatting - run: cmake --build build --target clang-format-check - - - name: Check CMake formatting - run: | - cmake --build build --target cmake-format-apply - git diff --exit-code - - - name: Check Python formatting - run: cmake --build build --target black-format-check - + CodeChecks: + uses: ./.github/workflows/reusable_checks.yml DocsBuild: - name: Build docs - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - fetch-depth: 0 - - - name: Install doxygen - run: | - sudo apt-get update - sudo apt-get install -y doxygen - - - name: Install pip requirements - run: python3 -m pip install -r third_party/requirements.txt - - - name: Build the documentation - working-directory: scripts - run: python3 generate_docs.py - - Spellcheck: - uses: ./.github/workflows/spellcheck.yml + uses: ./.github/workflows/reusable_docs_build.yml FastBuild: name: Fast builds - needs: [Spellcheck, CodeStyle] - uses: ./.github/workflows/fast.yml + needs: [CodeChecks, DocsBuild] + uses: ./.github/workflows/reusable_fast.yml Build: name: Basic builds needs: [FastBuild] - uses: ./.github/workflows/basic.yml + uses: ./.github/workflows/reusable_basic.yml + DevDax: + needs: [FastBuild] + uses: ./.github/workflows/reusable_dax.yml + MultiNuma: + needs: [FastBuild] + uses: ./.github/workflows/reusable_multi_numa.yml + L0: + needs: [Build] + uses: ./.github/workflows/reusable_gpu.yml + with: + name: "LEVEL_ZERO" + shared_lib: "['ON']" + CUDA: + needs: [Build] + uses: ./.github/workflows/reusable_gpu.yml + with: + name: "CUDA" + shared_lib: "['ON']" Sanitizers: needs: [FastBuild] - uses: ./.github/workflows/sanitizers.yml - Qemu: + uses: ./.github/workflows/reusable_sanitizers.yml + QEMU: needs: [FastBuild] - uses: ./.github/workflows/qemu.yml + uses: ./.github/workflows/reusable_qemu.yml + with: + short_run: true Benchmarks: needs: [Build] - uses: ./.github/workflows/benchmarks.yml + uses: ./.github/workflows/reusable_benchmarks.yml ProxyLib: needs: [Build] - uses: ./.github/workflows/proxy_lib.yml - GPU: - needs: [Build] - uses: ./.github/workflows/gpu.yml + uses: ./.github/workflows/reusable_proxy_lib.yml Valgrind: needs: [Build] - uses: ./.github/workflows/valgrind.yml - MultiNuma: + uses: ./.github/workflows/reusable_valgrind.yml + Coverage: + # total coverage (on upstream only) + if: github.repository == 'oneapi-src/unified-memory-framework' + needs: [Build, DevDax, L0, CUDA, MultiNuma, QEMU, ProxyLib] + uses: ./.github/workflows/reusable_coverage.yml + secrets: inherit + with: + trigger: "${{github.event_name}}" + Coverage_partial: + # partial coverage (on forks) + if: github.repository != 'oneapi-src/unified-memory-framework' + needs: [Build, QEMU, ProxyLib] + uses: ./.github/workflows/reusable_coverage.yml + CodeQL: + needs: [Build] + permissions: + contents: read + security-events: write + uses: ./.github/workflows/reusable_codeql.yml + Trivy: needs: [Build] - uses: ./.github/workflows/multi_numa.yml + permissions: + contents: read + security-events: write + uses: ./.github/workflows/reusable_trivy.yml diff --git a/.github/workflows/qemu.yml b/.github/workflows/qemu.yml deleted file mode 100644 index f8916c7de..000000000 --- a/.github/workflows/qemu.yml +++ /dev/null @@ -1,112 +0,0 @@ -# Builds project on qemu with custom hmat settings -name: Qemu - -on: workflow_call - -env: - CI_BRANCH: "${{ github.head_ref || github.ref_name }}" - -permissions: - contents: read - -jobs: - qemu-build: - name: Qemu - runs-on: ubuntu-22.04 - - steps: - - name: Checkout - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - fetch-depth: 0 - - - name: Enable KVM - run: | - echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules - sudo udevadm control --reload-rules - sudo udevadm trigger --name-match=kvm - - name: Install qemu - run: | - sudo apt update && sudo apt install -y qemu-system genisoimage qemu-utils - - name: Install libvirt and script dependencies - run: | - sudo apt-get install -y libvirt-clients libvirt-daemon-system libvirt-daemon virtinst bridge-utils - pip install -r scripts/qemu/requirements.txt - sudo usermod -a -G kvm,libvirt $USER - - name: Run ssh-keygen - run: ssh-keygen -b 4096 -N '' -f ~/.ssh/id_rsa - - name: Generate iso with user info - run: | - pub_key=$(cat ~/.ssh/id_rsa.pub) - - cat > user-data << EOF - #cloud-config - - # Add a 'cxltest' user to the system with a password - users: - - default - - name: cxltest - gecos: CXL Test User - primary_group: wheel - groups: users - sudo: ALL=(ALL) NOPASSWD:ALL - lock_passwd: false - ssh-authorized-keys: - - $pub_key - shell: /usr/bin/bash - - # Set local logins - chpasswd: - list: | - root:password - cxltest:password - expire: False - EOF - - cat > meta-data << EOF - instance-id: cxl-test - local-hostname: cxl-test - EOF - - sudo -Sk genisoimage -output ubuntu-cloud-init.iso -volid cidata -joliet -rock ./user-data ./meta-data - - name: Download ubuntu image - run: wget https://cloud-images.ubuntu.com/releases/lunar/release/ubuntu-23.04-server-cloudimg-amd64.img - - name: Resize image - run: qemu-img resize ./ubuntu-23.04-server-cloudimg-amd64.img +4G - - name: Build - run: | - scripts/qemu/start_qemu.sh scripts/qemu/configs/default.xml - - if [ ${{ github.event_name }} = 'pull_request' ]; then - CI_REPO="${{ github.event.pull_request.head.repo.full_name }}" - else - CI_REPO="$GITHUB_REPOSITORY" - fi - - scp -P 2222 ${{github.workspace}}/scripts/qemu/run-build.sh cxltest@127.0.0.1:/home/cxltest - scp -P 2222 ${{github.workspace}}/scripts/qemu/run-tests.sh cxltest@127.0.0.1:/home/cxltest - ssh cxltest@127.0.0.1 -p 2222 -t "bash /home/cxltest/run-build.sh https://github.com/$CI_REPO ${{env.CI_BRANCH}}" - - ssh cxltest@127.0.0.1 -p 2222 -t "sudo shutdown -h now" - - - name: Run tests - run: | - for config_file in scripts/qemu/configs/*.xml; do - config_name=$(basename $config_file .xml) - - echo testing $config_name - while ps -aux | grep qemu-system-x86_64 | grep -q -v grep; do - echo "Waiting for QEMU to shut down..." - sleep 5 - done - scripts/qemu/start_qemu.sh $config_file - - if [ ${{ github.event_name }} = 'pull_request' ]; then - CI_REPO="${{ github.event.pull_request.head.repo.full_name }}" - else - CI_REPO="$GITHUB_REPOSITORY" - fi - - ssh cxltest@127.0.0.1 -p 2222 -t "bash /home/cxltest/run-tests.sh" - ssh cxltest@127.0.0.1 -p 2222 -t "sudo shutdown -h now" - done diff --git a/.github/workflows/basic.yml b/.github/workflows/reusable_basic.yml similarity index 79% rename from .github/workflows/basic.yml rename to .github/workflows/reusable_basic.yml index 7c3d2ebc9..1c13a771b 100644 --- a/.github/workflows/basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -8,9 +8,11 @@ permissions: env: # for installation testing - it should match with version set in CMake - UMF_VERSION: 0.9.0 + UMF_VERSION: 0.10.0 BUILD_DIR : "${{github.workspace}}/build" INSTL_DIR : "${{github.workspace}}/../install-dir" + COVERAGE_DIR : "${{github.workspace}}/coverage" + COVERAGE_NAME : "exports-coverage-basic" jobs: ubuntu-build: @@ -22,6 +24,7 @@ jobs: compiler: [{c: gcc, cxx: g++}] shared_library: ['OFF'] level_zero_provider: ['ON'] + cuda_provider: ['ON'] install_tbb: ['ON'] disable_hwloc: ['OFF'] link_hwloc_statically: ['OFF'] @@ -31,59 +34,84 @@ jobs: compiler: {c: gcc-7, cxx: g++-7} shared_library: 'OFF' level_zero_provider: 'ON' + cuda_provider: 'ON' install_tbb: 'ON' + disable_hwloc: 'OFF' + link_hwloc_statically: 'OFF' - os: 'ubuntu-22.04' build_type: Release compiler: {c: clang, cxx: clang++} shared_library: 'OFF' level_zero_provider: 'ON' + cuda_provider: 'ON' install_tbb: 'ON' + disable_hwloc: 'OFF' + link_hwloc_statically: 'OFF' - os: 'ubuntu-22.04' build_type: Release compiler: {c: gcc, cxx: g++} shared_library: 'ON' level_zero_provider: 'ON' + cuda_provider: 'ON' install_tbb: 'ON' - - os: 'ubuntu-22.04' + disable_hwloc: 'OFF' + link_hwloc_statically: 'OFF' + - os: 'ubuntu-24.04' build_type: Debug compiler: {c: gcc, cxx: g++} shared_library: 'ON' level_zero_provider: 'ON' + cuda_provider: 'ON' install_tbb: 'ON' - # test level_zero_provider='OFF' + disable_hwloc: 'OFF' + link_hwloc_statically: 'OFF' + # test level_zero_provider='OFF' and cuda_provider='OFF' - os: 'ubuntu-22.04' build_type: Release compiler: {c: gcc, cxx: g++} shared_library: 'OFF' level_zero_provider: 'OFF' + cuda_provider: 'OFF' install_tbb: 'ON' + disable_hwloc: 'OFF' + link_hwloc_statically: 'OFF' # test icx compiler - - os: 'ubuntu-22.04' - build_type: Release - compiler: {c: icx, cxx: icpx} - shared_library: 'ON' - level_zero_provider: 'ON' - install_tbb: 'ON' + # - os: 'ubuntu-22.04' + # build_type: Release + # compiler: {c: icx, cxx: icpx} + # shared_library: 'ON' + # level_zero_provider: 'ON' + # cuda_provider: 'ON' + # install_tbb: 'ON' + # disable_hwloc: 'OFF' + # link_hwloc_statically: 'OFF' # test without installing TBB - os: 'ubuntu-22.04' build_type: Release compiler: {c: gcc, cxx: g++} shared_library: 'ON' level_zero_provider: 'ON' + cuda_provider: 'ON' install_tbb: 'OFF' + disable_hwloc: 'OFF' + link_hwloc_statically: 'OFF' - os: 'ubuntu-22.04' build_type: Debug compiler: {c: gcc, cxx: g++} shared_library: 'ON' level_zero_provider: 'ON' + cuda_provider: 'ON' install_tbb: 'ON' disable_hwloc: 'ON' + link_hwloc_statically: 'OFF' - os: 'ubuntu-22.04' build_type: Release compiler: {c: gcc, cxx: g++} shared_library: 'ON' level_zero_provider: 'ON' + cuda_provider: 'ON' install_tbb: 'ON' + disable_hwloc: 'OFF' link_hwloc_statically: 'ON' runs-on: ${{matrix.os}} @@ -96,8 +124,8 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y clang cmake libnuma-dev libjemalloc-dev - + sudo apt-get install -y clang cmake libnuma-dev libjemalloc-dev lcov + - name: Install TBB apt package if: matrix.install_tbb == 'ON' run: | @@ -133,6 +161,7 @@ jobs: -DCMAKE_C_COMPILER=${{matrix.compiler.c}} -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} -DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}} + -DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}} -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON @@ -140,15 +169,34 @@ jobs: -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_DISABLE_HWLOC=${{matrix.disable_hwloc}} -DUMF_LINK_HWLOC_STATICALLY=${{matrix.link_hwloc_statically}} + ${{ matrix.build_type == 'Debug' && matrix.compiler.c == 'gcc' && '-DUMF_USE_COVERAGE=ON' || '' }} - name: Build UMF - run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) + run: | + ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh' || true }} + cmake --build ${{env.BUILD_DIR}} -j $(nproc) - name: Run tests working-directory: ${{env.BUILD_DIR}} - run: > - ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh &&' || ''}} - ctest --output-on-failure --test-dir test + run: | + ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh' || true }} + ctest --output-on-failure # run all tests for better coverage + + - name: Check coverage + if: ${{ matrix.build_type == 'Debug' && matrix.compiler.c == 'gcc' }} + working-directory: ${{env.BUILD_DIR}} + run: | + export COVERAGE_FILE_NAME=${{env.COVERAGE_NAME}}-${{matrix.os}}-shared-${{matrix.shared_library}}-no_hwloc-${{matrix.disable_hwloc}} + echo "COVERAGE_FILE_NAME: $COVERAGE_FILE_NAME" + ../scripts/coverage/coverage_capture.sh $COVERAGE_FILE_NAME + mkdir -p ${{env.COVERAGE_DIR}} + mv ./$COVERAGE_FILE_NAME ${{env.COVERAGE_DIR}} + + - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + if: ${{ matrix.build_type == 'Debug' && matrix.compiler.c == 'gcc' }} + with: + name: ${{env.COVERAGE_NAME}}-${{matrix.os}}-shared-${{matrix.shared_library}}-no_hwloc-${{matrix.disable_hwloc}} + path: ${{env.COVERAGE_DIR}} - name: Remove the installation directory run: rm -rf ${{env.INSTL_DIR}} @@ -162,7 +210,7 @@ jobs: --build-type ${{matrix.build_type}} --disjoint-pool --jemalloc-pool - ${{ matrix.install_tbb == 'ON' && matrix.disable_hwloc != 'ON' && matrix.link_hwloc_statically != 'ON' && '--proxy' || '' }} + ${{ matrix.install_tbb == 'ON' && matrix.disable_hwloc != 'ON' && matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || '' }} @@ -177,23 +225,28 @@ jobs: compiler: [{c: cl, cxx: cl}] shared_library: ['ON', 'OFF'] level_zero_provider: ['ON'] + cuda_provider: ['ON'] include: - - os: 'windows-2022' - build_type: Release - compiler: {c: clang-cl, cxx: clang-cl} - shared_library: 'ON' - level_zero_provider: 'ON' - toolset: "-T ClangCL" + # temporarily disable failing CI job + #- os: 'windows-2022' + # build_type: Release + # compiler: {c: clang-cl, cxx: clang-cl} + # shared_library: 'ON' + # level_zero_provider: 'ON' + # cuda_provider: 'ON' + # toolset: "-T ClangCL" - os: 'windows-2022' build_type: Release compiler: {c: cl, cxx: cl} shared_library: 'ON' level_zero_provider: 'ON' + cuda_provider: 'ON' - os: 'windows-2022' build_type: Release compiler: {c: cl, cxx: cl} shared_library: 'ON' level_zero_provider: 'OFF' + cuda_provider: 'OFF' runs-on: ${{matrix.os}} @@ -229,6 +282,7 @@ jobs: -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}} + -DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}} -DUMF_TESTS_FAIL_ON_SKIP=ON - name: Build UMF @@ -247,7 +301,7 @@ jobs: --build-type ${{matrix.build_type}} --disjoint-pool --jemalloc-pool - --proxy + ${{matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || ''}} @@ -257,7 +311,7 @@ jobs: shell: pwsh - name: check /DEPENDENTLOADFLAG in umf_proxy.dll - if: ${{matrix.compiler.cxx == 'cl'}} + if: ${{matrix.shared_library == 'ON' && matrix.compiler.cxx == 'cl'}} run: ${{github.workspace}}/.github/scripts/check_dll_flags.ps1 ${{env.BUILD_DIR}}/src/proxy_lib/${{matrix.build_type}}/umf_proxy.dll shell: pwsh @@ -287,6 +341,7 @@ jobs: -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_LINK_HWLOC_STATICALLY=ON @@ -329,6 +384,7 @@ jobs: -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_LINK_HWLOC_STATICALLY=ON @@ -393,7 +449,7 @@ jobs: name: MacOS strategy: matrix: - os: ['macos-12', 'macos-13'] + os: ['macos-13', 'macos-14'] env: BUILD_TYPE : "Release" runs-on: ${{matrix.os}} @@ -404,8 +460,13 @@ jobs: with: fetch-depth: 0 - - name: Install Python requirements - run: python3 -m pip install -r third_party/requirements.txt + # Latest distros do not allow global pip installation + - name: Install Python requirements in venv + run: | + python3 -m venv .venv + . .venv/bin/activate + echo "$PATH" >> $GITHUB_PATH + python3 -m pip install -r third_party/requirements.txt - name: Install hwloc run: brew install hwloc jemalloc tbb diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/reusable_benchmarks.yml similarity index 98% rename from .github/workflows/benchmarks.yml rename to .github/workflows/reusable_benchmarks.yml index de48173bf..41710029c 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -63,6 +63,7 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON diff --git a/.github/workflows/reusable_checks.yml b/.github/workflows/reusable_checks.yml new file mode 100644 index 000000000..e3e264b0d --- /dev/null +++ b/.github/workflows/reusable_checks.yml @@ -0,0 +1,62 @@ +# Basic checks on the code, incl. coding style, spelling, bandit analysis. +# TODO: add license check +name: Basic checks + +on: workflow_call + +permissions: + contents: read + +jobs: + CodeChecks: + name: Basic code checks + runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} + + steps: + - name: Checkout repository + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y black cmake clang-format-15 cmake-format libhwloc-dev + + # Latest distros do not allow global pip installation + - name: Install Python requirements in venv + run: | + python3 -m venv .venv + . .venv/bin/activate + echo "$PATH" >> $GITHUB_PATH + python3 -m pip install bandit + + - name: Configure CMake + run: > + cmake + -B ${{github.workspace}}/build + -DUMF_FORMAT_CODE_STYLE=ON + -DUMF_BUILD_TESTS=OFF + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF + -DUMF_BUILD_CUDA_PROVIDER=OFF + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF + + - name: Check C/C++ formatting + run: cmake --build build --target clang-format-check + + - name: Check CMake formatting + run: | + cmake --build build --target cmake-format-apply + git diff --exit-code + + - name: Check Python formatting + run: cmake --build build --target black-format-check + + - name: Run a spell check + uses: crate-ci/typos@b63f421581dce830bda2f597a678cb7776b41877 # v1.18.2 + with: + config: ./.github/workflows/.spellcheck-conf.toml + + # Run Bandit recursively, but omit _deps directory (with 3rd party code) and python's venv + - name: Run Bandit + run: python3 -m bandit -r . -x '/_deps/,/.venv/' diff --git a/.github/workflows/codeql.yml b/.github/workflows/reusable_codeql.yml similarity index 72% rename from .github/workflows/codeql.yml rename to .github/workflows/reusable_codeql.yml index b449eb23e..e76456310 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/reusable_codeql.yml @@ -1,19 +1,7 @@ # CodeQL static analysis name: CodeQL -# Due to lower score on Scorecard we're running this separately from -# "PR/push" workflow. For some reason permissions weren't properly set -# or recognized (by Scorecard). If Scorecard changes its behavior we can -# go back to use 'workflow_call' trigger. -on: - push: - branches-ignore: - - 'dependabot/**' - pull_request: - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true +on: workflow_call permissions: contents: read @@ -46,32 +34,44 @@ jobs: with: fetch-depth: 0 + - name: Setup newer Python + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: "3.10" + - name: Initialize CodeQL uses: github/codeql-action/init@b7bf0a3ed3ecfa44160715d7c442788f65f0f923 # v3.23.2 with: languages: cpp - - name: Initialize vcpkg - if: ${{ matrix.os == 'windows-latest' }} + - name: "[Win] Initialize vcpkg" + if: matrix.os == 'windows-latest' uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 with: vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg vcpkgJsonGlob: '**/vcpkg.json' - - name: Install dependencies - if: ${{ matrix.os == 'windows-latest' }} - run: vcpkg install - shell: pwsh # Specifies PowerShell as the shell for running the script. + - name: "[Win] Install dependencies" + if: matrix.os == 'windows-latest' + run: | + vcpkg install + python3 -m pip install -r third_party/requirements.txt - - name: Install apt packages + - name: "[Lin] Install apt packages" if: matrix.os == 'ubuntu-latest' run: | sudo apt-get update sudo apt-get install -y cmake clang libhwloc-dev libnuma-dev libjemalloc-dev libtbb-dev - - name: Install pip packages - run: python3 -m pip install -r third_party/requirements.txt + # Latest distros do not allow global pip installation + - name: "[Lin] Install Python requirements in venv" + if: matrix.os == 'ubuntu-latest' + run: | + python3 -m venv .venv + . .venv/bin/activate + echo "$PATH" >> $GITHUB_PATH + python3 -m pip install -r third_party/requirements.txt - name: Configure CMake run: > @@ -84,6 +84,7 @@ jobs: -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON -DUMF_TESTS_FAIL_ON_SKIP=ON - name: Build diff --git a/.github/workflows/reusable_coverage.yml b/.github/workflows/reusable_coverage.yml new file mode 100644 index 000000000..c8dde20ec --- /dev/null +++ b/.github/workflows/reusable_coverage.yml @@ -0,0 +1,71 @@ +# Coverage build - gather artifacts from other builds and merge them into a single report +name: Coverage + +on: + workflow_call: + inputs: + trigger: + description: Type of workflow trigger + type: string + required: false + +permissions: + contents: read + +env: + COVERAGE_DIR : "${{github.workspace}}/coverage" + +jobs: + Coverage: + name: Coverage build + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y lcov + + - name: Download all coverage artifacts + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + pattern: exports-coverage-* + path: coverage + merge-multiple: true + + - name: Compute coverage + working-directory: ${{env.COVERAGE_DIR}} + id: coverage + run: | + echo "DIR: $(pwd)" && ls -al + ../scripts/coverage/merge_coverage_files.sh exports-coverage total_coverage + genhtml --no-function-coverage -o html_report total_coverage 2>&1 | tee output.txt + mkdir coverage_report + mv html_report ./coverage_report/ + tail -n2 output.txt >> $GITHUB_STEP_SUMMARY + echo "COV_OUT=$(tail -n1 output.txt | grep -oP "lines[.]+: [\d.]+%" | cut -d ' ' -f2 | tr -d '%')" >> $GITHUB_OUTPUT + + - name: Upload coverage report + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + with: + name: coverage_html_report + path: coverage/coverage_report + + # Only update the badge on push (event is passed only for total coverage) + - name: Update coverity badge + if: ${{ success() && inputs.trigger == 'push' }} + uses: Schneegans/dynamic-badges-action@e9a478b16159b4d31420099ba146cdc50f134483 # v1.7.0 + with: + auth: ${{ secrets.BB_GIST_TOKEN }} + gistID: 3f66c77d7035df39aa75dda8a2ac75b3 + filename: umf_coverage_badge.svg + label: Coverage + message: ${{ steps.coverage.outputs.COV_OUT }}% + valColorRange: ${{ steps.coverage.outputs.COV_OUT }} + minColorRange: 50 # <= this value = color: red + maxColorRange: 90 # >= this value = color: green diff --git a/.github/workflows/reusable_dax.yml b/.github/workflows/reusable_dax.yml new file mode 100644 index 000000000..f7c5d0d21 --- /dev/null +++ b/.github/workflows/reusable_dax.yml @@ -0,0 +1,147 @@ +# +# This workflow builds and tests the DEVDAX memory provider +# and the file memory provider with FSDAX. +# It requires: +# - a DAX device (e.g. /dev/dax0.0) and +# - a FSDAX device (e.g. /dev/pmem1) +# configured and mounted in the OS. +# +# The DAX device should be specified using the +# UMF_TESTS_DEVDAX_PATH and UMF_TESTS_DEVDAX_SIZE environment variables. +# +# The FSDAX device should be mounted in the OS (e.g. /mnt/pmem1) +# and the UMF_TESTS_FSDAX_PATH environment variable +# should contain a path to a file on this FSDAX device. +# + +name: Dax + +on: [workflow_call] + +permissions: + contents: read + +env: + DEVDAX_NAMESPACE : "0.0" + FSDAX_NAMESPACE : "1.0" + FSDAX_PMEM: "pmem1" + UMF_TESTS_FSDAX_PATH: "/mnt/pmem1/file" + UMF_TESTS_FSDAX_PATH_2: "/mnt/pmem1/file_2" + BUILD_DIR : "${{github.workspace}}/build" + INSTL_DIR : "${{github.workspace}}/../install-dir" + COVERAGE_DIR : "${{github.workspace}}/coverage" + COVERAGE_NAME : "exports-coverage-dax" + +jobs: + dax: + name: Build + # run only on upstream; forks may not have a DAX device + if: github.repository == 'oneapi-src/unified-memory-framework' + strategy: + matrix: + build_type: [Debug, Release] + shared_library: ['ON', 'OFF'] + + runs-on: ["DSS-DEVDAX", "DSS-Ubuntu"] + steps: + - name: Check configuration of the DEVDAX + run: | + echo DEVDAX_NAMESPACE="${{env.DEVDAX_NAMESPACE}}" + ndctl list --namespace=namespace${{env.DEVDAX_NAMESPACE}} --device-dax + ls -al /dev/dax${{env.DEVDAX_NAMESPACE}} + echo UMF_TESTS_DEVDAX_PATH="/dev/dax${{env.DEVDAX_NAMESPACE}}" + echo UMF_TESTS_DEVDAX_SIZE="$(ndctl list --namespace=namespace${{env.DEVDAX_NAMESPACE}} | grep size | cut -d':' -f2 | cut -d',' -f1)" + + - name: Check configuration of the FSDAX + run: | + echo FSDAX_NAMESPACE="${{env.FSDAX_NAMESPACE}}" + echo UMF_TESTS_FSDAX_PATH="${{env.UMF_TESTS_FSDAX_PATH}}" + echo UMF_TESTS_FSDAX_PATH_2="${{env.UMF_TESTS_FSDAX_PATH_2}}" + ndctl list --namespace=namespace${{env.FSDAX_NAMESPACE}} + ls -al /dev/${{env.FSDAX_PMEM}} /mnt/${{env.FSDAX_PMEM}} + mount | grep -e "/dev/${{env.FSDAX_PMEM}}" + touch ${{env.UMF_TESTS_FSDAX_PATH}} ${{env.UMF_TESTS_FSDAX_PATH_2}} + rm -f ${{env.UMF_TESTS_FSDAX_PATH}} ${{env.UMF_TESTS_FSDAX_PATH_2}} + + - name: Checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + + - name: Configure build + run: > + cmake + -B ${{env.BUILD_DIR}} + -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -DCMAKE_C_COMPILER=gcc + -DCMAKE_CXX_COMPILER=g++ + -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} + -DUMF_BUILD_BENCHMARKS=OFF + -DUMF_BUILD_TESTS=ON + -DUMF_BUILD_GPU_TESTS=OFF + -DUMF_BUILD_GPU_EXAMPLES=OFF + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF + -DUMF_TESTS_FAIL_ON_SKIP=ON + ${{ matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} + + - name: Build UMF + run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j $(nproc) + + - name: Run the DEVDAX tests + working-directory: ${{env.BUILD_DIR}} + run: > + UMF_TESTS_DEVDAX_PATH="/dev/dax${{env.DEVDAX_NAMESPACE}}" + UMF_TESTS_DEVDAX_SIZE="$(ndctl list --namespace=namespace${{env.DEVDAX_NAMESPACE}} | grep size | cut -d':' -f2 | cut -d',' -f1)" + ctest -C ${{matrix.build_type}} -V -R devdax + + - name: Run the FSDAX tests + working-directory: ${{env.BUILD_DIR}} + run: > + UMF_TESTS_FSDAX_PATH=${{env.UMF_TESTS_FSDAX_PATH}} + UMF_TESTS_FSDAX_PATH_2=${{env.UMF_TESTS_FSDAX_PATH_2}} + ctest -C ${{matrix.build_type}} -V -R "file|fsdax" + + # TODO: enable the provider_devdax_memory_ipc test when the IPC tests with the proxy library are fixed + # see the issue: https://github.com/oneapi-src/unified-memory-framework/issues/864 + - name: Run the DEVDAX tests with the proxy library + # proxy library is built only if libumf is a shared library + if: ${{ matrix.shared_library == 'ON' }} + working-directory: ${{env.BUILD_DIR}} + run: > + LD_PRELOAD=./lib/libumf_proxy.so + UMF_TESTS_DEVDAX_PATH="/dev/dax${{env.DEVDAX_NAMESPACE}}" + UMF_TESTS_DEVDAX_SIZE="$(ndctl list --namespace=namespace${{env.DEVDAX_NAMESPACE}} | grep size | cut -d':' -f2 | cut -d',' -f1)" + ctest -C ${{matrix.build_type}} -V -R devdax -E provider_devdax_memory_ipc + + # TODO: enable the provider_file_memory_ipc test when the IPC tests with the proxy library are fixed + # see the issue: https://github.com/oneapi-src/unified-memory-framework/issues/864 + - name: Run the FSDAX tests with the proxy library + # proxy library is built only if libumf is a shared library + if: ${{ matrix.shared_library == 'ON' }} + working-directory: ${{env.BUILD_DIR}} + run: > + LD_PRELOAD=./lib/libumf_proxy.so + UMF_TESTS_FSDAX_PATH=${{env.UMF_TESTS_FSDAX_PATH}} + UMF_TESTS_FSDAX_PATH_2=${{env.UMF_TESTS_FSDAX_PATH_2}} + ctest -C ${{matrix.build_type}} -V -R "file|fsdax" -E provider_file_memory_ipc + + - name: Check coverage + if: ${{ matrix.build_type == 'Debug' }} + working-directory: ${{env.BUILD_DIR}} + run: | + export COVERAGE_FILE_NAME=${{env.COVERAGE_NAME}}-shared-${{matrix.shared_library}} + echo "COVERAGE_FILE_NAME: $COVERAGE_FILE_NAME" + ../scripts/coverage/coverage_capture.sh $COVERAGE_FILE_NAME + mkdir -p ${{env.COVERAGE_DIR}} + mv ./$COVERAGE_FILE_NAME ${{env.COVERAGE_DIR}} + + - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + if: ${{ matrix.build_type == 'Debug' }} + with: + name: ${{env.COVERAGE_NAME}}-shared-${{matrix.shared_library}} + path: ${{env.COVERAGE_DIR}} diff --git a/.github/workflows/reusable_docs_build.yml b/.github/workflows/reusable_docs_build.yml new file mode 100644 index 000000000..269560c67 --- /dev/null +++ b/.github/workflows/reusable_docs_build.yml @@ -0,0 +1,34 @@ +name: Docs build + +on: workflow_call + +permissions: + contents: read + +jobs: + DocsBuild: + name: Docs build + runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} + + steps: + - name: Checkout repository + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + + - name: Install doxygen + run: | + sudo apt-get update + sudo apt-get install -y doxygen + + # Latest distros do not allow global pip installation + - name: Install Python requirements in venv + run: | + python3 -m venv .venv + . .venv/bin/activate + echo "$PATH" >> $GITHUB_PATH + python3 -m pip install -r third_party/requirements.txt + + - name: Build the documentation + working-directory: scripts + run: python3 generate_docs.py diff --git a/.github/workflows/fast.yml b/.github/workflows/reusable_fast.yml similarity index 89% rename from .github/workflows/fast.yml rename to .github/workflows/reusable_fast.yml index a42f0b694..e25de68a1 100644 --- a/.github/workflows/fast.yml +++ b/.github/workflows/reusable_fast.yml @@ -54,7 +54,7 @@ jobs: build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' - runs-on: ${{matrix.os}} + runs-on: ${{ (matrix.os == 'ubuntu-latest' && github.repository_owner == 'oneapi-src') && 'intel-ubuntu-22.04' || matrix.os }} steps: - name: Checkout repository @@ -70,18 +70,18 @@ jobs: vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg vcpkgJsonGlob: '**/vcpkg.json' - - name: Install dependencies + - name: Install dependencies (windows-latest) if: matrix.os == 'windows-latest' run: vcpkg install shell: pwsh # Specifies PowerShell as the shell for running the script. - - name: Install apt packages (ubuntu-latest) + - name: Install dependencies (ubuntu-latest) if: matrix.os == 'ubuntu-latest' run: | sudo apt-get update sudo apt-get install -y cmake libjemalloc-dev libhwloc-dev libnuma-dev libtbb-dev - - name: Install apt packages (ubuntu-20.04) + - name: Install dependencies (ubuntu-20.04) if: matrix.os == 'ubuntu-20.04' run: | sudo apt-get update @@ -106,6 +106,7 @@ jobs: -DUMF_BUILD_TESTS=${{matrix.build_tests}} -DUMF_BUILD_EXAMPLES=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_BUILD_SHARED_LIBRARY=ON ${{matrix.extra_build_options}} @@ -132,16 +133,6 @@ jobs: working-directory: ${{env.BUILD_DIR}} run: ctest --output-on-failure --test-dir test -C Release - - name: check /DEPENDENTLOADFLAG (Windows only) - if: matrix.os == 'windows-latest' - run: ${{github.workspace}}/.github/scripts/check_dll_flags.ps1 ${{env.BUILD_DIR}}/bin/Release/umf.dll - shell: pwsh - - - name: check /DEPENDENTLOADFLAG in umf_proxy.dll - if: matrix.os == 'windows-latest' - run: ${{github.workspace}}/.github/scripts/check_dll_flags.ps1 ${{env.BUILD_DIR}}/src/proxy_lib/Release/umf_proxy.dll - shell: pwsh - # TODO: We could add some script to verify metadata of dll's (selected fields, perhaps) # ref. https://superuser.com/questions/381276/what-are-some-nice-command-line-ways-to-inspect-dll-exe-details - name: Print metadata of our dll's diff --git a/.github/workflows/reusable_gpu.yml b/.github/workflows/reusable_gpu.yml new file mode 100644 index 000000000..913a0f0f1 --- /dev/null +++ b/.github/workflows/reusable_gpu.yml @@ -0,0 +1,133 @@ +# This workflow builds and tests providers using GPU memory. It requires properly +# labelled self-hosted runners on systems with the correct GPU and drivers. +name: GPU + +on: + workflow_call: + inputs: + name: + description: Provider name + type: string + required: true + os: + description: A list of OSes + type: string + default: "['Ubuntu', 'Windows']" + build_type: + description: A list of build types + type: string + default: "['Debug', 'Release']" + shared_lib: + description: A list of options for building shared library + type: string + default: "['ON', 'OFF']" + +permissions: + contents: read + +env: + BUILD_DIR : "${{github.workspace}}/build" + INSTL_DIR : "${{github.workspace}}/../install-dir" + COVERAGE_DIR : "${{github.workspace}}/coverage" + +jobs: + gpu: + name: "${{matrix.os}}, ${{matrix.build_type}}, shared=${{matrix.shared_library}}" + env: + VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows;" + CUDA_PATH: "C:/cuda" + COVERAGE_NAME : "exports-coverage-${{inputs.name}}" + # run only on upstream; forks will not have the HW + if: github.repository == 'oneapi-src/unified-memory-framework' + strategy: + fail-fast: false + matrix: + shared_library: ${{ fromJSON(inputs.shared_lib)}} + os: ${{ fromJSON(inputs.os)}} + build_type: ${{ fromJSON(inputs.build_type)}} + include: + - os: 'Ubuntu' + compiler: {c: gcc, cxx: g++} + number_of_processors: '$(nproc)' + - os: 'Windows' + compiler: {c: cl, cxx: cl} + number_of_processors: '$Env:NUMBER_OF_PROCESSORS' + + runs-on: ["DSS-${{inputs.name}}", "DSS-${{matrix.os}}"] + steps: + - name: Checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + + - name: Get information about platform + if: matrix.os == 'Ubuntu' + run: .github/scripts/get_system_info.sh + + - name: "[Win] Initialize vcpkg" + if: matrix.os == 'Windows' + uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 + with: + vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 + vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg + vcpkgJsonGlob: '**/vcpkg.json' + + - name: "[Win] Install dependencies" + if: matrix.os == 'Windows' + run: vcpkg install + + # note: disable all providers except the one being tested + - name: Configure build + run: > + cmake + -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}${{env.CUDA_PATH}}" + -B ${{env.BUILD_DIR}} + -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -DCMAKE_C_COMPILER=${{matrix.compiler.c}} + -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} + -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} + -DUMF_BUILD_BENCHMARKS=ON + -DUMF_BUILD_TESTS=ON + -DUMF_BUILD_GPU_TESTS=ON + -DUMF_BUILD_GPU_EXAMPLES=ON + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_CUDA_PROVIDER=OFF + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF + -DUMF_BUILD_${{inputs.name}}_PROVIDER=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + ${{ matrix.os == 'Ubuntu' && matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} + + - name: Build UMF + run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j ${{matrix.number_of_processors}} + + - name: Run tests + working-directory: ${{env.BUILD_DIR}} + run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test + + - name: Run examples + working-directory: ${{env.BUILD_DIR}} + run: ctest --output-on-failure --test-dir examples -C ${{matrix.build_type}} + + - name: Run benchmarks + if: matrix.build_type == 'Release' + working-directory: ${{env.BUILD_DIR}} + run: ctest --output-on-failure --test-dir benchmark -C ${{matrix.build_type}} --exclude-regex umf-bench-multithreaded + + - name: Check coverage + if: ${{ matrix.build_type == 'Debug' && matrix.os == 'Ubuntu' }} + working-directory: ${{env.BUILD_DIR}} + run: | + export COVERAGE_FILE_NAME=${{env.COVERAGE_NAME}}-shared-${{matrix.shared_library}} + echo "COVERAGE_FILE_NAME: $COVERAGE_FILE_NAME" + ../scripts/coverage/coverage_capture.sh $COVERAGE_FILE_NAME + mkdir -p ${{env.COVERAGE_DIR}} + mv ./$COVERAGE_FILE_NAME ${{env.COVERAGE_DIR}} + + - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + if: ${{ matrix.build_type == 'Debug' && matrix.os == 'Ubuntu' }} + with: + name: ${{env.COVERAGE_NAME}}-shared-${{matrix.shared_library}} + path: ${{env.COVERAGE_DIR}} diff --git a/.github/workflows/multi_numa.yml b/.github/workflows/reusable_multi_numa.yml similarity index 58% rename from .github/workflows/multi_numa.yml rename to .github/workflows/reusable_multi_numa.yml index a9433018e..8b30ed53e 100644 --- a/.github/workflows/multi_numa.yml +++ b/.github/workflows/reusable_multi_numa.yml @@ -6,15 +6,22 @@ on: [workflow_call] permissions: contents: read +env: + BUILD_DIR : "${{github.workspace}}/build" + COVERAGE_DIR : "${{github.workspace}}/coverage" + COVERAGE_NAME : "exports-coverage-multinuma" + jobs: multi_numa: - name: ${{matrix.os}} + name: "${{matrix.os}}, ${{matrix.build_type}}, shared=${{matrix.shared_library}}" # run only on upstream; forks will not have the HW if: github.repository == 'oneapi-src/unified-memory-framework' strategy: matrix: os: [ubuntu-22.04, rhel-9.1] + build_type: [Debug, Release] + shared_library: ['ON', 'OFF'] runs-on: ["DSS-MULTI-NUMA", "DSS-${{matrix.os}}"] steps: @@ -30,16 +37,17 @@ jobs: run: > cmake -B ${{github.workspace}}/build - -DCMAKE_BUILD_TYPE=Debug + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ - -DUMF_BUILD_SHARED_LIBRARY=OFF + -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} -DUMF_BUILD_BENCHMARKS=OFF -DUMF_BUILD_TESTS=ON -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_TESTS_FAIL_ON_SKIP=ON + ${{ matrix.build_type == 'Debug' && matrix.os == 'ubuntu-22.04' && '-DUMF_USE_COVERAGE=ON' || '' }} - name: Build UMF run: cmake --build ${{github.workspace}}/build -j $(nproc) @@ -59,3 +67,19 @@ jobs: ctest --output-on-failure --test-dir test -E "umf-provider_os_memory_multiple_numa_nodes" ./test/umf_test-provider_os_memory_multiple_numa_nodes \ --gtest_filter="-*checkModeLocal/*:*checkModePreferredEmptyNodeset/*:testNuma.checkModeInterleave" + + - name: Check coverage + if: ${{ matrix.build_type == 'Debug' && matrix.os == 'ubuntu-22.04' }} + working-directory: ${{env.BUILD_DIR}} + run: | + export COVERAGE_FILE_NAME=${{env.COVERAGE_NAME}}-${{matrix.os}}-shared-${{matrix.shared_library}} + echo "COVERAGE_FILE_NAME: $COVERAGE_FILE_NAME" + ../scripts/coverage/coverage_capture.sh $COVERAGE_FILE_NAME + mkdir -p ${{env.COVERAGE_DIR}} + mv ./$COVERAGE_FILE_NAME ${{env.COVERAGE_DIR}} + + - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + if: ${{ matrix.build_type == 'Debug' && matrix.os == 'ubuntu-22.04' }} + with: + name: ${{env.COVERAGE_NAME}}-${{matrix.os}}-shared-${{matrix.shared_library}} + path: ${{env.COVERAGE_DIR}} diff --git a/.github/workflows/proxy_lib.yml b/.github/workflows/reusable_proxy_lib.yml similarity index 51% rename from .github/workflows/proxy_lib.yml rename to .github/workflows/reusable_proxy_lib.yml index 8d73569f0..2a27161b3 100644 --- a/.github/workflows/proxy_lib.yml +++ b/.github/workflows/reusable_proxy_lib.yml @@ -9,6 +9,8 @@ permissions: env: BUILD_DIR : "${{github.workspace}}/build" INSTL_DIR : "${{github.workspace}}/../install-dir" + COVERAGE_DIR : "${{github.workspace}}/coverage" + COVERAGE_NAME : "exports-coverage-proxy" jobs: proxy-ubuntu: @@ -16,11 +18,10 @@ jobs: strategy: matrix: - os: ['ubuntu-22.04'] build_type: [Release, Debug] compiler: [{c: gcc, cxx: g++}] proxy_lib_pool: ['SCALABLE', 'JEMALLOC'] - runs-on: ${{matrix.os}} + runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-22.04' }} steps: - name: Checkout @@ -31,7 +32,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y cmake libhwloc-dev libjemalloc-dev libtbb-dev + sudo apt-get install -y cmake libhwloc-dev libjemalloc-dev libtbb-dev lcov - name: Set ptrace value for IPC test run: sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" @@ -45,7 +46,7 @@ jobs: -DCMAKE_C_COMPILER=${{matrix.compiler.c}} -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} -DUMF_BUILD_SHARED_LIBRARY=ON - -DUMF_BUILD_BENCHMARKS=ON + -DUMF_BUILD_BENCHMARKS=OFF -DUMF_BUILD_TESTS=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=OFF @@ -53,13 +54,16 @@ jobs: -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_PROXY_LIB_BASED_ON_POOL=${{matrix.proxy_lib_pool}} + ${{ matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) + # TODO enable the provider_file_memory_ipc test when the IPC tests with the proxy library are fixed + # see the issue: https://github.com/oneapi-src/unified-memory-framework/issues/864 - name: Run "ctest --output-on-failure" with proxy library working-directory: ${{env.BUILD_DIR}} - run: LD_PRELOAD=./lib/libumf_proxy.so ctest --output-on-failure + run: LD_PRELOAD=./lib/libumf_proxy.so ctest --output-on-failure -E provider_file_memory_ipc - name: Run "./test/umf_test-memoryPool" with proxy library working-directory: ${{env.BUILD_DIR}} @@ -67,8 +71,33 @@ jobs: - name: Run "/usr/bin/ls" with proxy library working-directory: ${{env.BUILD_DIR}} - run: LD_PRELOAD=./lib/libumf_proxy.so /usr/bin/ls + run: UMF_PROXY="page.disposition=shared-fd" LD_PRELOAD=./lib/libumf_proxy.so /usr/bin/ls - name: Run "/usr/bin/date" with proxy library working-directory: ${{env.BUILD_DIR}} - run: LD_PRELOAD=./lib/libumf_proxy.so /usr/bin/date + run: UMF_PROXY="page.disposition=shared-shm" LD_PRELOAD=./lib/libumf_proxy.so /usr/bin/date + + # TODO enable the provider_file_memory_ipc test when the IPC tests with the proxy library are fixed + # see the issue: https://github.com/oneapi-src/unified-memory-framework/issues/864 + - name: Run "ctest --output-on-failure" with proxy library and size.threshold=128 + working-directory: ${{env.BUILD_DIR}} + run: > + UMF_PROXY="page.disposition=shared-shm;size.threshold=128" + LD_PRELOAD=./lib/libumf_proxy.so + ctest --output-on-failure -E provider_file_memory_ipc + + - name: Check coverage + if: ${{ matrix.build_type == 'Debug' }} + working-directory: ${{env.BUILD_DIR}} + run: | + export COVERAGE_FILE_NAME=${{env.COVERAGE_NAME}}-proxy_lib_pool-${{matrix.proxy_lib_pool}} + echo "COVERAGE_FILE_NAME: $COVERAGE_FILE_NAME" + ../scripts/coverage/coverage_capture.sh $COVERAGE_FILE_NAME + mkdir -p ${{env.COVERAGE_DIR}} + mv ./$COVERAGE_FILE_NAME ${{env.COVERAGE_DIR}} + + - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + if: ${{ matrix.build_type == 'Debug' }} + with: + name: ${{env.COVERAGE_NAME}}-proxy_lib_pool-${{matrix.proxy_lib_pool}} + path: ${{env.COVERAGE_DIR}} diff --git a/.github/workflows/reusable_qemu.yml b/.github/workflows/reusable_qemu.yml new file mode 100644 index 000000000..257e90f62 --- /dev/null +++ b/.github/workflows/reusable_qemu.yml @@ -0,0 +1,155 @@ +# Builds project on qemu with custom hmat settings +name: Qemu + +on: + workflow_call: + inputs: + short_run: + description: Should the workflow run only basic tests? + type: boolean + default: false + os: + description: List of OSes + type: string + default: '["ubuntu-24.04"]' + +permissions: + contents: read + +jobs: + qemu-build: + name: QEMU + strategy: + matrix: + os: ${{ fromJson(inputs.os) }} + + # Host QEMU on any Linux platform + runs-on: ubuntu-22.04 + + steps: + - name: Checkout UMF + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + path: umf + + - name: Enable KVM + run: | + echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules + sudo udevadm control --reload-rules + sudo udevadm trigger --name-match=kvm + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y qemu-system genisoimage qemu-utils \ + libvirt-clients libvirt-daemon-system libvirt-daemon virtinst bridge-utils + + # Latest distros do not allow global pip installation + - name: Install Python requirements in venv + run: | + python3 -m venv .venv + . .venv/bin/activate + echo "$PATH" >> $GITHUB_PATH + python3 -m pip install -r umf/scripts/qemu/requirements.txt + + - name: Add user to kvm group + run: sudo usermod -a -G kvm,libvirt $USER + + - name: Run ssh-keygen + run: ssh-keygen -b 4096 -N '' -f ~/.ssh/id_rsa + + - name: Generate iso with user info + run: | + pub_key=$(cat ~/.ssh/id_rsa.pub) + + cat > user-data << EOF + #cloud-config + + # Add a 'testuser' user to the system with a password + users: + - default + - name: testuser + gecos: Test User + primary_group: wheel + groups: users + sudo: ALL=(ALL) NOPASSWD:ALL + lock_passwd: false + ssh-authorized-keys: + - $pub_key + shell: /usr/bin/bash + + # Set local logins + chpasswd: + list: | + root:password + testuser:password + expire: False + EOF + + cat > meta-data << EOF + instance-id: qemu-test + local-hostname: qemu-test + EOF + + sudo -Sk genisoimage -output ubuntu-cloud-init.iso -volid cidata -joliet -rock ./user-data ./meta-data + + - name: Set vars if short run + if: ${{ inputs.short_run == true }} + run: | + echo "SHORT_RUN=true" >> $GITHUB_ENV + declare -a short_configs=("default.xml" "sock_2_var3.xml" "sock_4_var1_hmat.xml") + echo "CONFIG_OPTIONS=${short_configs[@]}" >> $GITHUB_ENV + + - name: Set vars if long run + if: ${{ inputs.short_run == false }} + run: | + echo "SHORT_RUN=false" >> $GITHUB_ENV + echo "CONFIG_OPTIONS=umf/scripts/qemu/configs/*.xml" >> $GITHUB_ENV + + - name: Download Ubuntu image + run: | + OS_VER=$(echo ${{matrix.os}} | cut -d'-' -f2) + wget https://cloud-images.ubuntu.com/releases/${OS_VER}/release/${{matrix.os}}-server-cloudimg-amd64.img -O qemu_image.img + + - name: Resize image + run: qemu-img resize ./qemu_image.img +4G + + - name: Build UMF in QEMU + run: | + umf/scripts/qemu/start_qemu.sh default.xml + + # Copy UMF repository's content into the home dir in QEMU + rsync -az -e "ssh -p 2222" ${{github.workspace}}/umf/ testuser@127.0.0.1:/home/testuser/ + ssh testuser@127.0.0.1 -p 2222 -t "sudo chown -R testuser:users /home/testuser" + + ssh testuser@127.0.0.1 -p 2222 -t "bash /home/testuser/scripts/qemu/run-build.sh COVERAGE" + # ssh may return non-zero error code on closing the connection in Ubuntu 22.04 + ssh testuser@127.0.0.1 -p 2222 -t "sudo shutdown -h now" || true + + - name: Run tests in QEMU + run: | + echo "Running tests for: ${CONFIG_OPTIONS}" + + for config_file in ${CONFIG_OPTIONS}; do + config_name=$(basename $config_file) + + while ps -aux | grep qemu-system-x86_64 | grep -q -v grep; do + echo "Waiting for QEMU to shut down..." + sleep 5 + done + + echo "### Testing ${config_name} ###" + umf/scripts/qemu/start_qemu.sh ${config_name} + + ssh testuser@127.0.0.1 -p 2222 -t "export SHORT_RUN=${SHORT_RUN} OS_FULL_NAME=${{matrix.os}} && /home/testuser/scripts/qemu/run-tests.sh COVERAGE ${config_name}" + scp -r -P 2222 testuser@127.0.0.1:/home/testuser/coverage ./ + # ssh may return non-zero error code on closing the connection in Ubuntu 22.04 + ssh testuser@127.0.0.1 -p 2222 -t "sudo shutdown -h now" || true + done + ls -al ./coverage + + - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + with: + name: exports-coverage-qemu-${{matrix.os}} + path: coverage diff --git a/.github/workflows/sanitizers.yml b/.github/workflows/reusable_sanitizers.yml similarity index 91% rename from .github/workflows/sanitizers.yml rename to .github/workflows/reusable_sanitizers.yml index 2ca712543..3acda6833 100644 --- a/.github/workflows/sanitizers.yml +++ b/.github/workflows/reusable_sanitizers.yml @@ -55,6 +55,7 @@ jobs: -DCMAKE_C_COMPILER=${{matrix.compiler.c}} -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON @@ -66,12 +67,17 @@ jobs: -DUMF_TESTS_FAIL_ON_SKIP=ON - name: Build UMF - run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) + run: | + ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh' || true }} + cmake --build ${{env.BUILD_DIR}} -j $(nproc) - name: Run tests working-directory: ${{env.BUILD_DIR}} - run: > - ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh &&' || ''}} + env: + ASAN_OPTIONS: allocator_may_return_null=1 + TSAN_OPTIONS: allocator_may_return_null=1 + run: | + ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh' || true }} ctest --output-on-failure windows-build: @@ -130,6 +136,7 @@ jobs: -DUMF_USE_ASAN=${{matrix.sanitizers.asan}} -DUMF_BUILD_EXAMPLES=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF + -DUMF_BUILD_CUDA_PROVIDER=OFF -DUMF_TESTS_FAIL_ON_SKIP=ON - name: Build UMF @@ -137,4 +144,7 @@ jobs: - name: Run tests working-directory: ${{env.BUILD_DIR}} + env: + ASAN_OPTIONS: allocator_may_return_null=1 + TSAN_OPTIONS: allocator_may_return_null=1 run: ctest -C Debug --output-on-failure diff --git a/.github/workflows/trivy.yml b/.github/workflows/reusable_trivy.yml similarity index 66% rename from .github/workflows/trivy.yml rename to .github/workflows/reusable_trivy.yml index 1c3e63120..c10229276 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/reusable_trivy.yml @@ -1,22 +1,7 @@ # Runs linter for Docker files name: Trivy -# Due to lower score on Scorecard we're running this separately from -# "PR/push" workflow. For some reason permissions weren't properly set -# or recognized (by Scorecard). If Scorecard changes its behavior we can -# use 'workflow_call' trigger. -on: - push: - branches-ignore: - - 'dependabot/**' - pull_request: - paths: - - '.github/docker/*Dockerfile' - - '.github/workflows/trivy.yml' - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true +on: workflow_call permissions: contents: read @@ -24,7 +9,7 @@ permissions: jobs: trivy: name: Trivy - runs-on: ubuntu-latest + runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} permissions: security-events: write diff --git a/.github/workflows/valgrind.yml b/.github/workflows/reusable_valgrind.yml similarity index 67% rename from .github/workflows/valgrind.yml rename to .github/workflows/reusable_valgrind.yml index 53569385e..86ceb68c6 100644 --- a/.github/workflows/valgrind.yml +++ b/.github/workflows/reusable_valgrind.yml @@ -1,3 +1,4 @@ +# Run tests with valgrind intstrumentation tools: memcheck, drd, helgrind name: Valgrind on: workflow_call @@ -8,10 +9,6 @@ permissions: jobs: valgrind: name: Valgrind - strategy: - fail-fast: false - matrix: - tool: ['memcheck', 'drd', 'helgrind'] runs-on: ubuntu-latest steps: @@ -35,11 +32,18 @@ jobs: -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF + -DUMF_BUILD_CUDA_PROVIDER=OFF -DUMF_USE_VALGRIND=1 -DUMF_TESTS_FAIL_ON_SKIP=ON - name: Build run: cmake --build ${{github.workspace}}/build --config Debug -j$(nproc) - - name: Run tests under valgrind - run: ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{github.workspace}}/build ${{matrix.tool}} + - name: Run tests with 'memcheck' + run: ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{github.workspace}}/build memcheck + + - name: Run tests with 'drd' + run: ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{github.workspace}}/build drd + + - name: Run tests with 'helgrind' + run: ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{github.workspace}}/build helgrind diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml deleted file mode 100644 index 07265fc17..000000000 --- a/.github/workflows/spellcheck.yml +++ /dev/null @@ -1,23 +0,0 @@ -# Checks spelling issues in the repo -name: SpellCheck - -on: workflow_call - -permissions: - contents: read - -jobs: - analyze: - name: Run spell check - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - fetch-depth: 0 - - - name: Run a spell check - uses: crate-ci/typos@b63f421581dce830bda2f597a678cb7776b41877 # v1.18.2 - with: - config: ./.github/workflows/.spellcheck-conf.toml diff --git a/.mailmap b/.mailmap new file mode 100644 index 000000000..de589da03 --- /dev/null +++ b/.mailmap @@ -0,0 +1,7 @@ +Rafał Rudnicki +Igor Chorążewicz + <83662296+szadam@users.noreply.github.com> +Adam Szopiński +Adam Szopiński Adam +Adam Szopiński szadam +Sergei Vinogradov diff --git a/CMakeLists.txt b/CMakeLists.txt index cb4045776..5614684bd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,7 @@ find_package(PkgConfig) # Build Options option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON) +option(UMF_BUILD_CUDA_PROVIDER "Build CUDA memory provider" ON) option(UMF_BUILD_LIBUMF_POOL_DISJOINT "Build the libumf_pool_disjoint static library" OFF) option(UMF_BUILD_LIBUMF_POOL_JEMALLOC @@ -48,6 +49,10 @@ option(UMF_BUILD_EXAMPLES "Build UMF examples" ON) option(UMF_BUILD_FUZZTESTS "Build UMF fuzz tests" OFF) option(UMF_BUILD_GPU_EXAMPLES "Build UMF GPU examples" OFF) option(UMF_DEVELOPER_MODE "Enable additional developer checks" OFF) +option( + UMF_DISABLE_HWLOC + "Disable hwloc and UMF features requiring it (OS provider, memtargets, topology discovery)" + OFF) option( UMF_LINK_HWLOC_STATICALLY "Link UMF with HWLOC library statically (supported for Linux, MacOS and Release build on Windows)" @@ -58,6 +63,12 @@ option(UMF_FORMAT_CODE_STYLE set(UMF_HWLOC_NAME "hwloc" CACHE STRING "Custom name for hwloc library w/o extension") +set(UMF_INSTALL_RPATH + "" + CACHE + STRING + "Set the runtime search path to the directory with dependencies (e.g. hwloc)" +) # Only a part of skips is treated as a failure now. TODO: extend to all tests option(UMF_TESTS_FAIL_ON_SKIP "Treat skips in tests as fail" OFF) @@ -66,11 +77,7 @@ option(UMF_USE_UBSAN "Enable UndefinedBehaviorSanitizer checks" OFF) option(UMF_USE_TSAN "Enable ThreadSanitizer checks" OFF) option(UMF_USE_MSAN "Enable MemorySanitizer checks" OFF) option(UMF_USE_VALGRIND "Enable Valgrind instrumentation" OFF) -option(UMF_USE_GCOV "Enable gcov support" OFF) -option( - UMF_DISABLE_HWLOC - "Disable features that requires hwloc (OS provider, memory targets, topolgy discovery)" - OFF) +option(UMF_USE_COVERAGE "Build with coverage enabled (Linux only)" OFF) # set UMF_PROXY_LIB_BASED_ON_POOL to one of: SCALABLE or JEMALLOC set(KNOWN_PROXY_LIB_POOLS SCALABLE JEMALLOC) @@ -104,16 +111,19 @@ else() message(FATAL_ERROR "Unknown OS type") endif() -if(NOT DEFINED UMF_HWLOC_REPO) - set(UMF_HWLOC_REPO "https://github.com/open-mpi/hwloc.git") -endif() +if(UMF_DISABLE_HWLOC) + message(STATUS "hwloc is disabled, hence OS provider, memtargets, " + "topology discovery, examples won't be available!") +else() + if(NOT DEFINED UMF_HWLOC_REPO) + set(UMF_HWLOC_REPO "https://github.com/open-mpi/hwloc.git") + endif() -if(NOT DEFINED UMF_HWLOC_TAG) - set(UMF_HWLOC_TAG hwloc-2.10.0) -endif() + if(NOT DEFINED UMF_HWLOC_TAG) + set(UMF_HWLOC_TAG hwloc-2.10.0) + endif() -if(NOT UMF_LINK_HWLOC_STATICALLY) - if(NOT UMF_DISABLE_HWLOC) + if(NOT UMF_LINK_HWLOC_STATICALLY) pkg_check_modules(LIBHWLOC hwloc>=2.3.0) if(NOT LIBHWLOC_FOUND) find_package(LIBHWLOC 2.3.0 REQUIRED hwloc) @@ -122,104 +132,155 @@ if(NOT UMF_LINK_HWLOC_STATICALLY) # add PATH to DLL on Windows set(DLL_PATH_LIST "${DLL_PATH_LIST};PATH=path_list_append:${LIBHWLOC_DLL_DIRS}") - endif() - # add PATH to DLL on Windows - set(DLL_PATH_LIST - "${DLL_PATH_LIST};PATH=path_list_append:${LIBHWLOC_DLL_DIRS}") -elseif(WINDOWS AND NOT UMF_DISABLE_HWLOC) - include(FetchContent) - set(HWLOC_ENABLE_TESTING OFF) - set(HWLOC_SKIP_LSTOPO ON) - set(HWLOC_SKIP_TOOLS ON) - set(HWLOC_PATCH - git - apply - ${PROJECT_SOURCE_DIR}/cmake/fix_coverity_issues.patch - || - (exit 0)) - - message(STATUS "Will fetch hwloc from ${UMF_HWLOC_REPO}") + elseif(WINDOWS) + include(FetchContent) + set(HWLOC_ENABLE_TESTING OFF) + set(HWLOC_SKIP_LSTOPO ON) + set(HWLOC_SKIP_TOOLS ON) - FetchContent_Declare( - hwloc_targ - GIT_REPOSITORY ${UMF_HWLOC_REPO} - GIT_TAG ${UMF_HWLOC_TAG} - PATCH_COMMAND ${HWLOC_PATCH} SOURCE_SUBDIR contrib/windows-cmake/ - FIND_PACKAGE_ARGS) - - FetchContent_GetProperties(hwloc_targ) - if(NOT hwloc_targ_POPULATED) + message( + STATUS + "Will fetch hwloc from ${UMF_HWLOC_REPO} (tag: ${UMF_HWLOC_TAG})" + ) + + FetchContent_Declare( + hwloc_targ + GIT_REPOSITORY ${UMF_HWLOC_REPO} + GIT_TAG ${UMF_HWLOC_TAG} + SOURCE_SUBDIR contrib/windows-cmake/ FIND_PACKAGE_ARGS) FetchContent_MakeAvailable(hwloc_targ) - endif() - set(LIBHWLOC_INCLUDE_DIRS - ${hwloc_targ_SOURCE_DIR}/include;${hwloc_targ_BINARY_DIR}/include) - set(LIBHWLOC_LIBRARY_DIRS - ${hwloc_targ_BINARY_DIR}/Release;${hwloc_targ_BINARY_DIR}/Debug) + set(LIBHWLOC_INCLUDE_DIRS + ${hwloc_targ_SOURCE_DIR}/include;${hwloc_targ_BINARY_DIR}/include) + set(LIBHWLOC_LIBRARY_DIRS + ${hwloc_targ_BINARY_DIR}/Release;${hwloc_targ_BINARY_DIR}/Debug) + else() + include(FetchContent) + message( + STATUS + "Will fetch hwloc from ${UMF_HWLOC_REPO} (tag: ${UMF_HWLOC_TAG})" + ) + + FetchContent_Declare( + hwloc_targ + GIT_REPOSITORY ${UMF_HWLOC_REPO} + GIT_TAG ${UMF_HWLOC_TAG}) + FetchContent_MakeAvailable(hwloc_targ) + + add_custom_command( + COMMAND ./autogen.sh + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT ${hwloc_targ_SOURCE_DIR}/configure) + add_custom_command( + COMMAND + ./configure --prefix=${hwloc_targ_BINARY_DIR} + --enable-static=yes --enable-shared=no --disable-libxml2 + --disable-pci --disable-levelzero --disable-opencl + --disable-cuda --disable-nvml --disable-libudev --disable-rsmi + CFLAGS=-fPIC CXXFLAGS=-fPIC + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT ${hwloc_targ_SOURCE_DIR}/Makefile + DEPENDS ${hwloc_targ_SOURCE_DIR}/configure) + add_custom_command( + COMMAND make + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT ${hwloc_targ_SOURCE_DIR}/lib/libhwloc.la + DEPENDS ${hwloc_targ_SOURCE_DIR}/Makefile) + add_custom_command( + COMMAND make install + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a + DEPENDS ${hwloc_targ_SOURCE_DIR}/lib/libhwloc.la) + + add_custom_target(hwloc_prod + DEPENDS ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) + add_library(hwloc INTERFACE) + target_link_libraries(hwloc + INTERFACE ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) + add_dependencies(hwloc hwloc_prod) + + set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/lib) + set(LIBHWLOC_INCLUDE_DIRS ${hwloc_targ_BINARY_DIR}/include) + set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) + endif() message(STATUS " LIBHWLOC_LIBRARIES = ${LIBHWLOC_LIBRARIES}") message(STATUS " LIBHWLOC_INCLUDE_DIRS = ${LIBHWLOC_INCLUDE_DIRS}") message(STATUS " LIBHWLOC_LIBRARY_DIRS = ${LIBHWLOC_LIBRARY_DIRS}") -elseif(NOT UMF_DISABLE_HWLOC) +endif() + +if(hwloc_targ_SOURCE_DIR) + # apply security patch for HWLOC + execute_process( + COMMAND git apply ${PROJECT_SOURCE_DIR}/cmake/fix_coverity_issues.patch + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT_VARIABLE UMF_HWLOC_PATCH_OUTPUT + ERROR_VARIABLE UMF_HWLOC_PATCH_ERROR) + + if(UMF_HWLOC_PATCH_OUTPUT) + message(STATUS "HWLOC patch command output:\n${UMF_HWLOC_PATCH_OUTPUT}") + endif() + if(UMF_HWLOC_PATCH_ERROR) + message(WARNING "HWLOC patch command output:\n${UMF_HWLOC_PATCH_ERROR}") + endif() +endif() + +# Fetch L0 loader only if needed i.e.: if building L0 provider is ON and L0 +# headers are not provided by the user (via setting UMF_LEVEL_ZERO_INCLUDE_DIR). +if(UMF_BUILD_LEVEL_ZERO_PROVIDER AND (NOT UMF_LEVEL_ZERO_INCLUDE_DIR)) include(FetchContent) - set(HWLOC_PATCH - git - apply - ${PROJECT_SOURCE_DIR}/cmake/fix_coverity_issues.patch - || - (exit 0)) - message(STATUS "Will fetch hwloc from ${UMF_HWLOC_REPO}") + set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") + set(LEVEL_ZERO_LOADER_TAG v1.19.2) - FetchContent_Declare( - hwloc_targ - GIT_REPOSITORY ${UMF_HWLOC_REPO} - GIT_TAG ${UMF_HWLOC_TAG} - PATCH_COMMAND ${HWLOC_PATCH}) + message( + STATUS + "Fetching L0 loader (${LEVEL_ZERO_LOADER_TAG}) from ${LEVEL_ZERO_LOADER_REPO} ..." + ) - FetchContent_GetProperties(hwloc_targ) - if(NOT hwloc_targ_POPULATED) - FetchContent_MakeAvailable(hwloc_targ) - endif() + FetchContent_Declare( + level-zero-loader + GIT_REPOSITORY ${LEVEL_ZERO_LOADER_REPO} + GIT_TAG ${LEVEL_ZERO_LOADER_TAG} + EXCLUDE_FROM_ALL) + FetchContent_MakeAvailable(level-zero-loader) + + set(LEVEL_ZERO_INCLUDE_DIRS + ${level-zero-loader_SOURCE_DIR}/include + CACHE PATH "Path to Level Zero Headers") + message(STATUS "Level Zero include directory: ${LEVEL_ZERO_INCLUDE_DIRS}") +elseif(UMF_BUILD_LEVEL_ZERO_PROVIDER) + # Only header is needed to build UMF + set(LEVEL_ZERO_INCLUDE_DIRS ${UMF_LEVEL_ZERO_INCLUDE_DIR}) + message(STATUS "Level Zero include directory: ${LEVEL_ZERO_INCLUDE_DIRS}") +endif() - add_custom_command( - COMMAND ./autogen.sh - WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} - OUTPUT ${hwloc_targ_SOURCE_DIR}/configure) - add_custom_command( - COMMAND - ./configure --prefix=${hwloc_targ_BINARY_DIR} --enable-static=yes - --enable-shared=no --disable-libxml2 --disable-pciaccess - --disable-levelzero --disable-opencl --disable-cuda --disable-nvml - CFLAGS=-fPIC CXXFLAGS=-fPIC - WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} - OUTPUT ${hwloc_targ_SOURCE_DIR}/Makefile - DEPENDS ${hwloc_targ_SOURCE_DIR}/configure) - add_custom_command( - COMMAND make - WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} - OUTPUT ${hwloc_targ_SOURCE_DIR}/lib/libhwloc.la - DEPENDS ${hwloc_targ_SOURCE_DIR}/Makefile) - add_custom_command( - COMMAND make install - WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} - OUTPUT ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a - DEPENDS ${hwloc_targ_SOURCE_DIR}/lib/libhwloc.la) +# Fetch CUDA only if needed i.e.: if building CUDA provider is ON and CUDA +# headers are not provided by the user (via setting UMF_CUDA_INCLUDE_DIR). +if(UMF_BUILD_CUDA_PROVIDER AND (NOT UMF_CUDA_INCLUDE_DIR)) + include(FetchContent) - add_custom_target(hwloc_prod - DEPENDS ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) - add_library(hwloc INTERFACE) - target_link_libraries(hwloc - INTERFACE ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) - add_dependencies(hwloc hwloc_prod) + set(CUDA_REPO + "https://gitlab.com/nvidia/headers/cuda-individual/cudart.git") + set(CUDA_TAG cuda-12.5.1) - set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/lib) - set(LIBHWLOC_INCLUDE_DIRS ${hwloc_targ_BINARY_DIR}/include) - set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) + message(STATUS "Fetching CUDA ${CUDA_TAG} from ${CUDA_REPO} ...") - message(STATUS " LIBHWLOC_LIBRARIES = ${LIBHWLOC_LIBRARIES}") - message(STATUS " LIBHWLOC_INCLUDE_DIRS = ${LIBHWLOC_INCLUDE_DIRS}") - message(STATUS " LIBHWLOC_LIBRARY_DIRS = ${LIBHWLOC_LIBRARY_DIRS}") + FetchContent_Declare( + cuda-headers + GIT_REPOSITORY ${CUDA_REPO} + GIT_TAG ${CUDA_TAG} + EXCLUDE_FROM_ALL) + FetchContent_MakeAvailable(cuda-headers) + + set(CUDA_INCLUDE_DIRS + ${cuda-headers_SOURCE_DIR} + CACHE PATH "Path to CUDA headers") + message(STATUS "CUDA include directory: ${CUDA_INCLUDE_DIRS}") +elseif(UMF_BUILD_CUDA_PROVIDER) + # Only header is needed to build UMF + set(CUDA_INCLUDE_DIRS ${UMF_CUDA_INCLUDE_DIR}) + message(STATUS "CUDA include directory: ${CUDA_INCLUDE_DIRS}") endif() # This build type check is not possible on Windows when CMAKE_BUILD_TYPE is not @@ -259,7 +320,7 @@ endif() # compiler is required. Moreover, if these options are not set, CMake will set # up a strict C build, without C++ support. set(OPTIONS_REQUIRING_CXX "UMF_BUILD_TESTS" "UMF_BUILD_LIBUMF_POOL_DISJOINT" - "UMF_BUILD_BENCHMARKS_MT") + "UMF_BUILD_BENCHMARKS_MT" "UMF_BUILD_BENCHMARKS") foreach(option_name ${OPTIONS_REQUIRING_CXX}) if(${option_name}) enable_language(CXX) @@ -272,17 +333,11 @@ endforeach() set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_UMF_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) -if(MSVC) +if(CMAKE_GENERATOR MATCHES "Visual Studio" OR CMAKE_GENERATOR MATCHES + "Ninja Multi-Config") set(CMAKE_UMF_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/$) endif() -# Define a path for custom commands to work around MSVC -set(CUSTOM_COMMAND_BINARY_DIR ${CMAKE_UMF_OUTPUT_DIRECTORY}) -if(MSVC) - # MSVC implicitly adds $ to the output path - set(CUSTOM_COMMAND_BINARY_DIR ${CUSTOM_COMMAND_BINARY_DIR}/$) -endif() - # Sanitizer flags if(UMF_USE_ASAN) add_sanitizer_flag(address) @@ -306,8 +361,7 @@ if(UMF_BUILD_FUZZTESTS add_link_options("-fsanitize=fuzzer-no-link") endif() -# A header only library to specify include directories in transitive -# dependencies. +# A header-only lib to specify include directories in transitive dependencies add_library(umf_headers INTERFACE) # Alias target to support FetchContent. @@ -353,12 +407,14 @@ if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) if(NOT JEMALLOC_FOUND) find_package(JEMALLOC REQUIRED jemalloc) endif() - # add PATH to DLL on Windows - set(DLL_PATH_LIST - "${DLL_PATH_LIST};PATH=path_list_append:${JEMALLOC_DLL_DIRS}") + if(JEMALLOC_FOUND OR JEMALLOC_LIBRARIES) + set(UMF_POOL_JEMALLOC_ENABLED TRUE) + # add PATH to DLL on Windows + set(DLL_PATH_LIST + "${DLL_PATH_LIST};PATH=path_list_append:${JEMALLOC_DLL_DIRS}") + endif() endif() -# set UMF_PROXY_LIB_ENABLED if(WINDOWS) # TODO: enable the proxy library in the Debug build on Windows # @@ -377,7 +433,17 @@ if(WINDOWS) ) endif() endif() -if(UMF_PROXY_LIB_BASED_ON_POOL STREQUAL SCALABLE) + +# set UMF_PROXY_LIB_ENABLED +if(UMF_DISABLE_HWLOC) + message(STATUS "Disabling the proxy library, because HWLOC is disabled") +elseif(NOT UMF_BUILD_SHARED_LIBRARY) + # TODO enable this scenario + message( + STATUS + "Disabling the proxy library, because UMF is built as static library" + ) +elseif(UMF_PROXY_LIB_BASED_ON_POOL STREQUAL SCALABLE) if(UMF_POOL_SCALABLE_ENABLED) set(UMF_PROXY_LIB_ENABLED ON) set(PROXY_LIB_USES_SCALABLE_POOL ON) @@ -406,16 +472,16 @@ else() ) endif() -set(UMF_OPTIONAL_SYMBOLS_LINUX "") -set(UMF_OPTIONAL_SYMBOLS_WINDOWS "") - -# Conditional configuration for Level Zero provider -if(UMF_BUILD_LEVEL_ZERO_PROVIDER) - add_optional_symbol(umfLevelZeroMemoryProviderOps) -endif() - -if(NOT UMF_DISABLE_HWLOC) - add_optional_symbol(umfOsMemoryProviderOps) +if((UMF_BUILD_GPU_TESTS OR UMF_BUILD_GPU_EXAMPLES) AND UMF_BUILD_CUDA_PROVIDER) + find_package(CUDA REQUIRED cuda) + if(CUDA_LIBRARIES) + set(UMF_CUDA_ENABLED TRUE) + else() + message( + STATUS "Disabling tests and examples that use the CUDA provider " + "because the CUDA libraries they require were not found.") + endif() + # TODO do the same for ze_loader endif() add_subdirectory(src) @@ -428,12 +494,8 @@ if(UMF_BUILD_BENCHMARKS) add_subdirectory(benchmark) endif() -if(UMF_BUILD_EXAMPLES) - if(NOT UMF_DISABLE_HWLOC) - add_subdirectory(examples) - else() - message(WARNING "Examples cannot be build - hwloc disabled") - endif() +if(UMF_BUILD_EXAMPLES AND NOT UMF_DISABLE_HWLOC) + add_subdirectory(examples) endif() if(UMF_FORMAT_CODE_STYLE) @@ -566,12 +628,12 @@ if(UMF_FORMAT_CODE_STYLE) add_custom_target( black-format-check - COMMAND ${BLACK} --check --verbose ${CMAKE_SOURCE_DIR} + COMMAND ${BLACK} --check --verbose ${UMF_CMAKE_SOURCE_DIR} COMMENT "Check Python files formatting using black formatter") add_custom_target( black-format-apply - COMMAND ${BLACK} ${CMAKE_SOURCE_DIR} + COMMAND ${BLACK} ${UMF_CMAKE_SOURCE_DIR} COMMENT "Format Python files using black formatter") endif() diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e350cd8d0..cd4a2a790 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -89,14 +89,16 @@ $ cmake --build build --target format-apply # Remember to review introduced changes ``` -If you wish to use only `clang-format`, or `cmake-format`, or `black`, you can execute the corresponding -`clang-format-check` and `clang-format-apply` for C/C++ source files, or `cmake-format-check` and -`cmake-format-apply` for CMake files, or `black-format-check` and `black-format-apply` for Python -source files, respectively. +**NOTE**: The `format-check` and `format-apply` targets are only available if all of `clang-format`, +`cmake-format` and `black` are installed. Otherwise you can use them separately with: +- `clang-format-check` and `clang-format-apply` for C/C++ source files +- `cmake-format-check` and `cmake-format-apply` for CMake files +- `black-format-check` and `black-format-apply` for Python source files **NOTE**: We use specific versions of formatting tools to ensure consistency across the project. The required versions are: - clang-format version **15.0**, which can be installed with the command: `python -m pip install clang-format==15.0.7`. - cmake-format version **0.6**, which can be installed with the command: `python -m pip install cmake-format==0.6.13`. +- black (no specific version required), which can be installed with the command: `python -m pip install black`. Please ensure you have these specific versions installed before contributing to the project. @@ -207,16 +209,16 @@ origin: https://dependency_origin.com ## Code coverage After adding a new functionality add tests and check coverage before and after the change. -To do this, enable coverage instrumentation by turning on the UMF_USE_GCOV flag in CMake. +To do this, enable coverage instrumentation by turning on the UMF_USE_COVERAGE flag in CMake. Coverage instrumentation feature is supported only by GCC and Clang. An example flow might look like the following: ```bash -$ cmake -B build -DUMF_USE_GCOV=1 -DCMAKE_BUILD_TYPE=Debug +$ cmake -B build -DUMF_USE_COVERAGE=1 -DCMAKE_BUILD_TYPE=Debug $ cmake --build build -j $ cd build $ ctest $ apt install lcov $ lcov --capture --directory . --output-file coverage.info $ genhtml -o html_report coverage.info -``` \ No newline at end of file +``` diff --git a/ChangeLog b/ChangeLog index 867e59f0f..75b69fdeb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +Mon Dec 09 2024 Łukasz Stolarczuk + + * Version 0.10.0 + + In this release we introduced updates in several areas, listed below. + We still don't yet guarantee a fully stable API, though. + With new parameters' API we broke the compatibility, as we no longer + support direct access to UMF params via (now internal) structures. + + Significant updates: + - updated Level Zero Provider + - new API to handle UMF parameters (replacing previous struct's) + - extended IPC API testing + - new Memtarget and Memspace API + + Minor updates: + - multiple fixes in the source code + - extended code coverage reporting + - improved CI and testing + - new examples + - extended logging + - yet more fixes in the building system + Thu Sep 12 2024 Łukasz Stolarczuk * Version 0.9.0 diff --git a/README.md b/README.md index 528fe9c8b..6f1233c63 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,12 @@ # Unified Memory Framework -[![Basic builds](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/basic.yml/badge.svg?branch=main)](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/basic.yml) -[![CodeQL](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/codeql.yml/badge.svg?branch=main)](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/codeql.yml) -[![SpellCheck](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/spellcheck.yml/badge.svg?branch=main)](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/spellcheck.yml) +[![PR/push](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/pr_push.yml/badge.svg?branch=main&event=push)](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/pr_push.yml) +[![Coverage](https://gist.githubusercontent.com/bb-ur/3f66c77d7035df39aa75dda8a2ac75b3/raw/umf_coverage_badge.svg)](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/pr_push.yml?query=branch%3Amain) [![GitHubPages](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/docs.yml/badge.svg?branch=main)](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/docs.yml) -[![Benchmarks](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/benchmarks.yml/badge.svg?branch=main)](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/benchmarks.yml) [![Nightly](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/nightly.yml/badge.svg?branch=main)](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/nightly.yml) -[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/oneapi-src/unified-memory-framework/badge)](https://securityscorecards.dev/viewer/?uri=github.com/oneapi-src/unified-memory-framework) [![Coverity build](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/coverity.yml/badge.svg?branch=main)](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/coverity.yml) [![Coverity report](https://scan.coverity.com/projects/29761/badge.svg?flat=0)](https://scan.coverity.com/projects/oneapi-src-unified-memory-framework) -[![Bandit](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/bandit.yml/badge.svg?branch=main)](https://github.com/oneapi-src/unified-memory-framework/actions/workflows/bandit.yml) +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/oneapi-src/unified-memory-framework/badge)](https://securityscorecards.dev/viewer/?uri=github.com/oneapi-src/unified-memory-framework) ## Introduction @@ -20,10 +17,11 @@ The Unified Memory Framework (UMF) is a library for constructing allocators and For a quick introduction to UMF usage, please see [examples](https://oneapi-src.github.io/unified-memory-framework/examples.html) documentation, which includes the code of the -[basic example](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/basic.c) -and the more advanced one that allocates -[USM memory from the GPU device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/gpu_shared_memory.c) -using the Level Zero API and UMF Level Zero memory provider. +[basic example](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/basic.c). +The are also more advanced that allocates USM memory from the +[Level Zero device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/level_zero_shared_memory/level_zero_shared_memory.c) +using the Level Zero API and UMF Level Zero memory provider and [CUDA device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/cuda_shared_memory/cuda_shared_memory.c) +using the CUDA API and UMF CUDA memory provider. ## Build @@ -48,23 +46,22 @@ For Level Zero memory provider tests: ### Linux -Executable and binaries will be in **build/bin** +Executable and binaries will be in **build/bin**. +The `{build_config}` can be either `Debug` or `Release`. ```bash -$ mkdir build -$ cd build -$ cmake {path_to_source_dir} -$ make +$ cmake -B build -DCMAKE_BUILD_TYPE={build_config} +$ cmake --build build -j $(nproc) ``` ### Windows -Generating Visual Studio Project. EXE and binaries will be in **build/bin/{build_config}** +Generating Visual Studio Project. EXE and binaries will be in **build/bin/{build_config}**. +The `{build_config}` can be either `Debug` or `Release`. ```bash -$ mkdir build -$ cd build -$ cmake {path_to_source_dir} -G "Visual Studio 15 2017 Win64" +$ cmake -B build -G "Visual Studio 15 2017 Win64" +$ cmake --build build --config {build_config} -j $Env:NUMBER_OF_PROCESSORS ``` ### Benchmark @@ -102,6 +99,7 @@ List of options provided by CMake: | - | - | - | - | | UMF_BUILD_SHARED_LIBRARY | Build UMF as shared library | ON/OFF | OFF | | UMF_BUILD_LEVEL_ZERO_PROVIDER | Build Level Zero memory provider | ON/OFF | ON | +| UMF_BUILD_CUDA_PROVIDER | Build CUDA memory provider | ON/OFF | ON | | UMF_BUILD_LIBUMF_POOL_DISJOINT | Build the libumf_pool_disjoint static library | ON/OFF | OFF | | UMF_BUILD_LIBUMF_POOL_JEMALLOC | Build the libumf_pool_jemalloc static library | ON/OFF | OFF | | UMF_BUILD_TESTS | Build UMF tests | ON/OFF | ON | @@ -118,8 +116,9 @@ List of options provided by CMake: | UMF_USE_TSAN | Enable ThreadSanitizer checks | ON/OFF | OFF | | UMF_USE_MSAN | Enable MemorySanitizer checks | ON/OFF | OFF | | UMF_USE_VALGRIND | Enable Valgrind instrumentation | ON/OFF | OFF | -| UMF_USE_GCOV | Enable gcov support (Linux only) | ON/OFF | OFF | +| UMF_USE_COVERAGE | Build with coverage enabled (Linux only) | ON/OFF | OFF | | UMF_LINK_HWLOC_STATICALLY | Link UMF with HWLOC library statically (Windows+Release only) | ON/OFF | OFF | +| UMF_DISABLE_HWLOC | Disable features that requires hwloc (OS provider, memory targets, topology discovery) | ON/OFF | OFF | ## Architecture: memory pools and providers @@ -133,6 +132,13 @@ More detailed documentation is available here: https://oneapi-src.github.io/unif ### Memory providers +#### Coarse Provider + +A memory provider that can provide memory from: +1) a given pre-allocated buffer (the fixed-size memory provider option) or +2) from an additional upstream provider (e.g. provider that does not support the free() operation + like the File memory provider or the DevDax memory provider - see below). + #### OS memory provider A memory provider that provides memory from an operating system. @@ -141,6 +147,9 @@ OS memory provider supports two types of memory mappings (set by the `visibility 1) private memory mapping (`UMF_MEM_MAP_PRIVATE`) 2) shared memory mapping (`UMF_MEM_MAP_SHARED` - supported on Linux only yet) +IPC API requires the `UMF_MEM_MAP_SHARED` memory `visibility` mode +(`UMF_RESULT_ERROR_INVALID_ARGUMENT` is returned otherwise). + There are available two mechanisms for the shared memory mapping: 1) a named shared memory object (used if the `shm_name` parameter is not NULL) or 2) an anonymous file descriptor (used if the `shm_name` parameter is NULL) @@ -172,6 +181,58 @@ Additionally, required for tests: 5) Required packages: - liblevel-zero-dev (Linux) or level-zero-sdk (Windows) +#### DevDax memory provider (Linux only) + +A memory provider that provides memory from a device DAX (a character device file /dev/daxX.Y). +It can be used when large memory mappings are needed. + +The DevDax memory provider does not support the free operation +(`umfMemoryProviderFree()` always returns `UMF_RESULT_ERROR_NOT_SUPPORTED`), +so it should be used with a pool manager that will take over +the managing of the provided memory - for example the jemalloc pool +with the `disable_provider_free` parameter set to true. + +##### Requirements + +1) Linux OS +2) A character device file /dev/daxX.Y created in the OS. + +#### File memory provider (Linux only yet) + +A memory provider that provides memory by mapping a regular, extendable file. + +The file memory provider does not support the free operation +(`umfMemoryProviderFree()` always returns `UMF_RESULT_ERROR_NOT_SUPPORTED`), +so it should be used with a pool manager that will take over +the managing of the provided memory - for example the jemalloc pool +with the `disable_provider_free` parameter set to true. + +IPC API requires the `UMF_MEM_MAP_SHARED` memory `visibility` mode +(`UMF_RESULT_ERROR_INVALID_ARGUMENT` is returned otherwise). + +The memory visibility mode parameter must be set to `UMF_MEM_MAP_SHARED` in case of FSDAX. + +##### Requirements + +1) Linux OS +2) A length of a path of a file to be mapped can be `PATH_MAX` (4096) characters at most. + +#### CUDA memory provider + +A memory provider that provides memory from CUDA device. + +##### Requirements + +1) Linux or Windows OS +2) The `UMF_BUILD_CUDA_PROVIDER` option turned `ON` (by default) + +Additionally, required for tests: + +3) The `UMF_BUILD_GPU_TESTS` option turned `ON` +4) System with CUDA compatible GPU +5) Required packages: + - nvidia-cuda-dev (Linux) or cuda-sdk (Windows) + ### Memory pool managers #### Proxy pool (part of libumf) @@ -180,6 +241,8 @@ This memory pool is distributed as part of libumf. It forwards all requests to t memory provider. Currently umfPoolRealloc, umfPoolCalloc and umfPoolMallocUsableSize functions are not supported by the proxy pool. +To enable this feature, the `UMF_BUILD_SHARED_LIBRARY` option needs to be turned `ON`. + #### Disjoint pool TODO: Add a description @@ -208,7 +271,7 @@ It is distributed as part of libumf. To use this pool, TBB must be installed in ##### Requirements -Required packages: +Packages required for using this pool and executing tests/benchmarks (not required for build): - libtbb-dev (libtbbmalloc.so.2) on Linux or tbb (tbbmalloc.dll) on Windows ### Memspaces (Linux-only) @@ -254,6 +317,13 @@ The memory used by the proxy memory allocator is mmap'ed: - `page.disposition=shared-shm` - IPC uses the named shared memory. An SHM name is generated using the `umf_proxy_lib_shm_pid_$PID` pattern, where `$PID` is the PID of the process. It creates the `/dev/shm/umf_proxy_lib_shm_pid_$PID` file. - `page.disposition=shared-fd` - IPC uses the file descriptor duplication. It requires using `pidfd_getfd(2)` to obtain a duplicate of another process's file descriptor. Permission to duplicate another process's file descriptor is governed by a ptrace access mode `PTRACE_MODE_ATTACH_REALCREDS` check (see `ptrace(2)`) that can be changed using the `/proc/sys/kernel/yama/ptrace_scope` interface. `pidfd_getfd(2)` is supported since Linux 5.6. +**Size threshold** + +The **size threshold** feature (Linux only) causes that all allocations of size less than the given threshold value go to the default system allocator instead of the proxy library. +It can be enabled by adding the `size.threshold=` string to the `UMF_PROXY` environment variable (with `';'` as a separator), for example: `UMF_PROXY="page.disposition=shared-shm;size.threshold=64"`. + +**Remark:** changing a size of allocation (using `realloc()` ) does not change the allocator (`realloc(malloc(threshold - 1), threshold + 1)` still belongs to the default system allocator and `realloc(malloc(threshold + 1), threshold - 1)` still belongs to the proxy library pool allocator). + #### Windows In case of Windows it requires: diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 455b9bc06..5605519ee 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -1,7 +1,24 @@ -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2024 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +include(FetchContent) +FetchContent_Declare( + googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.9.0) + +set(BENCHMARK_ENABLE_GTEST_TESTS + OFF + CACHE BOOL "" FORCE) +set(BENCHMARK_ENABLE_TESTING + OFF + CACHE BOOL "" FORCE) +set(BENCHMARK_ENABLE_INSTALL + OFF + CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googlebenchmark) + # In MSVC builds, there is no way to determine the actual build type during the # CMake configuration step. Therefore, this message is printed in all MSVC # builds. @@ -32,7 +49,7 @@ function(add_umf_benchmark) "${multiValueArgs}" ${ARGN}) - set(BENCH_NAME umf-bench-${ARG_NAME}) + set(BENCH_NAME umf-${ARG_NAME}) set(BENCH_LIBS ${ARG_LIBS} umf) @@ -42,11 +59,8 @@ function(add_umf_benchmark) LIBS ${BENCH_LIBS}) target_include_directories( - ${BENCH_NAME} - PRIVATE ${UMF_CMAKE_SOURCE_DIR}/include - ${UMF_CMAKE_SOURCE_DIR}/src/utils - ${UMF_CMAKE_SOURCE_DIR}/test/common - ${UMF_CMAKE_SOURCE_DIR}/examples/common) + ${BENCH_NAME} PRIVATE ${UMF_CMAKE_SOURCE_DIR}/include + ${UMF_CMAKE_SOURCE_DIR}/src/utils) target_link_directories(${BENCH_NAME} PRIVATE ${ARG_LIBDIRS}) @@ -55,13 +69,17 @@ function(add_umf_benchmark) COMMAND ${BENCH_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - # Benchmark passes if it prints "PASSED" in the output, because ubench of - # scalable pool fails if the confidence interval exceeds maximum permitted - # 2.5%. - set_tests_properties( - ${BENCH_NAME} PROPERTIES - LABELS "benchmark" - PASS_REGULAR_EXPRESSION "PASSED") + if("${BENCH_NAME}" STREQUAL "umf-ubench") + # Benchmark passes if it prints "PASSED" in the output, because ubench + # of scalable pool fails if the confidence interval exceeds maximum + # permitted 2.5%. + set_tests_properties( + ${BENCH_NAME} PROPERTIES + LABELS "benchmark" + PASS_REGULAR_EXPRESSION "PASSED") + else() + set_tests_properties(${BENCH_NAME} PROPERTIES LABELS "benchmark") + endif() if(WINDOWS) # append PATH to DLLs @@ -71,11 +89,11 @@ function(add_umf_benchmark) if(UMF_BUILD_LIBUMF_POOL_DISJOINT) target_compile_definitions(${BENCH_NAME} - PRIVATE UMF_BUILD_LIBUMF_POOL_DISJOINT=1) + PRIVATE UMF_POOL_DISJOINT_ENABLED=1) endif() - if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) + if(UMF_POOL_JEMALLOC_ENABLED) target_compile_definitions(${BENCH_NAME} - PRIVATE UMF_BUILD_LIBUMF_POOL_JEMALLOC=1) + PRIVATE UMF_POOL_JEMALLOC_ENABLED=1) endif() if(UMF_POOL_SCALABLE_ENABLED) target_compile_definitions(${BENCH_NAME} @@ -83,7 +101,14 @@ function(add_umf_benchmark) endif() if(UMF_BUILD_LEVEL_ZERO_PROVIDER) target_compile_definitions(${BENCH_NAME} - PRIVATE UMF_BUILD_LEVEL_ZERO_PROVIDER=1) + PRIVATE UMF_PROVIDER_LEVEL_ZERO_ENABLED=1) + target_include_directories( + ${BENCH_NAME} PRIVATE ${UMF_CMAKE_SOURCE_DIR}/test/common + ${LEVEL_ZERO_INCLUDE_DIRS}) + endif() + if(UMF_BUILD_CUDA_PROVIDER) + target_compile_definitions(${BENCH_NAME} + PRIVATE UMF_BUILD_CUDA_PROVIDER=1) endif() if(UMF_BUILD_GPU_TESTS) target_compile_definitions(${BENCH_NAME} PRIVATE UMF_BUILD_GPU_TESTS=1) @@ -103,18 +128,26 @@ endif() if(LINUX) set(LIBS_OPTIONAL ${LIBS_OPTIONAL} m) endif() -if(UMF_BUILD_GPU_TESTS) +if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) + set(SRCS_OPTIONAL ${SRCS_OPTIONAL} ../src/utils/utils_level_zero.cpp) set(LIBS_OPTIONAL ${LIBS_OPTIONAL} ze_loader) + # TODO add CUDA endif() # BENCHMARKS add_umf_benchmark( NAME ubench - SRCS ubench.c + SRCS ubench.c ${SRCS_OPTIONAL} LIBS ${LIBS_OPTIONAL} LIBDIRS ${LIB_DIRS}) +add_umf_benchmark( + NAME benchmark + SRCS benchmark.cpp + LIBS ${LIBS_OPTIONAL} benchmark::benchmark + LIBDIRS ${LIB_DIRS}) + if(UMF_BUILD_BENCHMARKS_MT) add_umf_benchmark( NAME multithreaded diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp new file mode 100644 index 000000000..c10bbda87 --- /dev/null +++ b/benchmark/benchmark.cpp @@ -0,0 +1,362 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include +#ifdef UMF_POOL_SCALABLE_ENABLED +#include +#endif +#include + +#ifdef UMF_POOL_DISJOINT_ENABLED +#include +#endif + +#ifdef UMF_POOL_JEMALLOC_ENABLED +#include +#endif + +#include "benchmark.hpp" + +struct glibc_malloc : public allocator_interface { + unsigned SetUp([[maybe_unused]] ::benchmark::State &state, + unsigned argPos) override { + return argPos; + } + void TearDown([[maybe_unused]] ::benchmark::State &state) override{}; + void *benchAlloc(size_t size) override { return malloc(size); } + void benchFree(void *ptr, [[maybe_unused]] size_t size) override { + free(ptr); + } + static std::string name() { return "glibc"; } +}; + +struct os_provider : public provider_interface { + umf_os_memory_provider_params_handle_t params = NULL; + os_provider() { + umfOsMemoryProviderParamsCreate(¶ms); + return; + } + + ~os_provider() { + if (params != NULL) { + umfOsMemoryProviderParamsDestroy(params); + } + } + + void *getParams() override { return params; } + umf_memory_provider_ops_t *getOps() override { + return umfOsMemoryProviderOps(); + } + static std::string name() { return "os_provider"; } +}; + +template +struct proxy_pool : public pool_interface { + umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfProxyPoolOps(); + } + void *getParams([[maybe_unused]] ::benchmark::State &state) override { + return nullptr; + } + static std::string name() { return "proxy_pool<" + Provider::name() + ">"; } +}; + +#ifdef UMF_POOL_DISJOINT_ENABLED +template +struct disjoint_pool : public pool_interface { + umf_disjoint_pool_params_handle_t disjoint_memory_pool_params; + + disjoint_pool() { + disjoint_memory_pool_params = NULL; + auto ret = umfDisjointPoolParamsCreate(&disjoint_memory_pool_params); + if (ret != UMF_RESULT_SUCCESS) { + return; + } + + // those function should never fail, so error handling is minimal. + ret = umfDisjointPoolParamsSetSlabMinSize(disjoint_memory_pool_params, + 4096); + if (ret != UMF_RESULT_SUCCESS) { + goto err; + } + + ret = umfDisjointPoolParamsSetCapacity(disjoint_memory_pool_params, 4); + if (ret != UMF_RESULT_SUCCESS) { + goto err; + } + + ret = umfDisjointPoolParamsSetMinBucketSize(disjoint_memory_pool_params, + 4096); + if (ret != UMF_RESULT_SUCCESS) { + goto err; + } + + ret = umfDisjointPoolParamsSetMaxPoolableSize( + disjoint_memory_pool_params, 4096 * 16); + + if (ret != UMF_RESULT_SUCCESS) { + goto err; + } + return; + err: + + umfDisjointPoolParamsDestroy(disjoint_memory_pool_params); + disjoint_memory_pool_params = NULL; + } + + ~disjoint_pool() { + if (disjoint_memory_pool_params != NULL) { + umfDisjointPoolParamsDestroy(disjoint_memory_pool_params); + } + } + + umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfDisjointPoolOps(); + } + void *getParams([[maybe_unused]] ::benchmark::State &state) override { + + if (disjoint_memory_pool_params == NULL) { + state.SkipWithError("Failed to create disjoint pool params"); + } + + return disjoint_memory_pool_params; + } + static std::string name() { + return "disjoint_pool<" + Provider::name() + ">"; + } +}; +#endif + +#ifdef UMF_POOL_JEMALLOC_ENABLED +template +struct jemalloc_pool : public pool_interface { + umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfJemallocPoolOps(); + } + void *getParams([[maybe_unused]] ::benchmark::State &state) override { + return NULL; + } + static std::string name() { + return "jemalloc_pool<" + Provider::name() + ">"; + } +}; +#endif + +#ifdef UMF_POOL_SCALABLE_ENABLED +template +struct scalable_pool : public pool_interface { + virtual umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfScalablePoolOps(); + } + virtual void * + getParams([[maybe_unused]] ::benchmark::State &state) override { + return NULL; + } + static std::string name() { + return "scalable_pool<" + Provider::name() + ">"; + } +}; +#endif +// Benchmarks scenarios: + +UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_fix, fixed_alloc_size, + glibc_malloc); + +// The benchmark arguments specified in Args() are, in order: +// benchmark arguments, allocator arguments, size generator arguments. +// The exact meaning of each argument depends on the benchmark, allocator, and size components used. +// Refer to the 'argsName()' function in each component to find detailed descriptions of these arguments. +UMF_BENCHMARK_REGISTER_F(alloc_benchmark, glibc_fix) + ->Args({10000, 0, 4096}) + ->Args({10000, 100000, 4096}) + ->Threads(4) + ->Threads(1); + +UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_uniform, + uniform_alloc_size, glibc_malloc); +UMF_BENCHMARK_REGISTER_F(alloc_benchmark, glibc_uniform) + ->Args({10000, 0, 8, 64 * 1024, 8}) + ->Threads(4) + ->Threads(1); + +UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, os_provider, fixed_alloc_size, + provider_allocator); +UMF_BENCHMARK_REGISTER_F(alloc_benchmark, os_provider) + ->Args({10000, 0, 4096}) + ->Args({10000, 100000, 4096}) + ->Threads(4) + ->Threads(1); + +UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, proxy_pool, fixed_alloc_size, + pool_allocator>); + +UMF_BENCHMARK_REGISTER_F(alloc_benchmark, proxy_pool) + ->Args({1000, 0, 4096}) + ->Args({1000, 100000, 4096}) + ->Threads(4) + ->Threads(1); + +#ifdef UMF_POOL_DISJOINT_ENABLED +UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_fix, + fixed_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_fix) + ->Args({10000, 0, 4096}) + ->Args({10000, 100000, 4096}) + ->Threads(4) + ->Threads(1); + +// TODO: debug why this crashes +/*UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_uniform, + uniform_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_uniform) + ->Args({10000, 0, 8, 64 * 1024, 8}) + // ->Threads(4) + ->Threads(1); +*/ +#endif + +#ifdef UMF_POOL_JEMALLOC_ENABLED +UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_fix, + fixed_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(alloc_benchmark, jemalloc_pool_fix) + ->Args({10000, 0, 4096}) + ->Args({10000, 100000, 4096}) + ->Threads(4) + ->Threads(1); + +UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_uniform, + uniform_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(alloc_benchmark, jemalloc_pool_uniform) + ->Args({10000, 0, 8, 64 * 1024, 8}) + ->Threads(4) + ->Threads(1); + +#endif +#ifdef UMF_POOL_SCALABLE_ENABLED +UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, scalable_pool_fix, + fixed_alloc_size, + pool_allocator>); + +UMF_BENCHMARK_REGISTER_F(alloc_benchmark, scalable_pool_fix) + ->Args({10000, 0, 4096}) + ->Args({10000, 100000, 4096}) + ->Threads(4) + ->Threads(1); + +UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, scalable_pool_uniform, + uniform_alloc_size, + pool_allocator>); + +UMF_BENCHMARK_REGISTER_F(alloc_benchmark, scalable_pool_uniform) + ->Args({10000, 0, 8, 64 * 1024, 8}) + ->Threads(4) + ->Threads(1); +#endif +// Multiple allocs/free + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, glibc_fix, + fixed_alloc_size, glibc_malloc); + +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, glibc_fix) + ->Args({10000, 4096}) + ->Threads(4) + ->Threads(1); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, glibc_uniform, + uniform_alloc_size, glibc_malloc); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, glibc_uniform) + ->Args({10000, 8, 64 * 1024, 8}) + ->Threads(4) + ->Threads(1); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, proxy_pool, + fixed_alloc_size, + pool_allocator>); + +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, proxy_pool) + ->Args({10000, 4096}) + ->Threads(4) + ->Threads(1); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, os_provider, + fixed_alloc_size, + provider_allocator); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, os_provider) + ->Args({10000, 4096}) + ->Threads(4) + ->Threads(1); + +#ifdef UMF_POOL_DISJOINT_ENABLED +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix, + fixed_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_fix) + ->Args({10000, 4096}) + ->Threads(4) + ->Threads(1); + +// TODO: debug why this crashes +/*UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + disjoint_pool_uniform, uniform_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) + ->Args({10000, 0, 8, 64 * 1024, 8}) + ->Threads(4) + ->Threads(1); +*/ +#endif + +#ifdef UMF_POOL_JEMALLOC_ENABLED +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, + fixed_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_fix) + ->Args({10000, 4096}) + ->Threads(4) + ->Threads(1); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + jemalloc_pool_uniform, uniform_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_uniform) + ->Args({1000, 8, 64 * 1024, 8}) + ->Threads(4) + ->Threads(1); + +#endif + +#ifdef UMF_POOL_SCALABLE_ENABLED +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, scalable_pool_fix, + fixed_alloc_size, + pool_allocator>); + +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, scalable_pool_fix) + ->Args({10000, 4096}) + ->Threads(4) + ->Threads(1); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + scalable_pool_uniform, uniform_alloc_size, + pool_allocator>); + +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, scalable_pool_uniform) + ->Args({10000, 8, 64 * 1024, 8}) + ->Threads(4) + ->Threads(1); + +#endif +BENCHMARK_MAIN(); diff --git a/benchmark/benchmark.hpp b/benchmark/benchmark.hpp new file mode 100644 index 000000000..ead6b39e7 --- /dev/null +++ b/benchmark/benchmark.hpp @@ -0,0 +1,382 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +/* + * This file defines a benchmarking framework for evaluating memory allocation + * and deallocation performance using the Unified Memory Framework (UMF). The + * design is modular and extensible, allowing for flexible benchmarking of different + * allocation strategies, size distributions, and memory providers. + * + * **Key Design Features:** + * - **Modular Components**: The framework is built using interfaces and templates, + * which allows for easy extension and customization of allocation strategies, + * size distributions, and memory providers. + * - **Flexible Allocation Size Generators**: Includes classes like `fixed_alloc_size` + * and `uniform_alloc_size` that generate allocation sizes based on different + * strategies. These classes implement the `alloc_size_interface`. + * - **Abstract Allocator Interface**: The `allocator_interface` defines the basic + * methods for memory allocation and deallocation. Concrete allocators like + * `provider_allocator` and `pool_allocator` implement this interface to work + * with different memory providers and pools. + * - **Benchmarking Classes**: Classes like `alloc_benchmark` and `multiple_malloc_free_benchmark` + * templates the allocation size generator and allocator to perform benchmarks. + * It manages the setup, execution, and teardown of the benchmark. + * - **Threaded Execution Support**: The benchmarks support multi-threaded execution + * by maintaining thread-specific allocation data and synchronization. + * + * **Component Interactions:** + * - **Size Generators and Allocators**: The `alloc_benchmark` class uses a size + * generator (e.g., `fixed_alloc_size` or `uniform_alloc_size`) to determine the + * sizes of memory allocations, and an allocator (e.g., `provider_allocator` or + * `pool_allocator`) to perform the actual memory operations. + * - **Benchmark Execution**: During the benchmark, `alloc_benchmark` repeatedly + * calls the `bench` method, which performs allocations and deallocations using + * the allocator and size generator. + * - **Allocator Adapters**: The `provider_allocator` and `pool_allocator` adapt + * specific memory providers and pools to the `allocator_interface`, allowing + * them to be used interchangeably in the benchmark classes. This abstraction + * enables benchmarking different memory management strategies without changing + * the core benchmarking logic. + * - **Pre-allocations and Iterations**: The `alloc_benchmark` can perform a set + * number of pre-allocations before the benchmark starts, and manages allocation + * and deallocation cycles to simulate memory pressure and fragmentation. + * - **Derived Benchmarks**: `multiple_malloc_free_benchmark` extends + * `alloc_benchmark` to perform multiple random deallocations and reallocations + * in each iteration, using a uniform distribution to select which allocations + * to free and reallocate. This models workloads with frequent memory churn. + * + * **Execution Flow:** + * 1. **Setup Phase**: + * - The benchmark class initializes the size generator and allocator. + * - Pre-allocations are performed if specified. + * - Thread-specific data structures for allocations are prepared. + * 2. **Benchmark Loop**: + * - For each iteration, the `bench` method is called. + * - The size generator provides the next allocation size. + * - The allocator performs the allocation. + * - Allocations are tracked per thread. + * 3. **Teardown Phase**: + * - All remaining allocations are freed. + * - Allocator and size generator are cleaned up. + * + * **Customization and Extension:** + * - New size generators can be created by implementing the `alloc_size_interface`. + * - New allocators can be adapted by implementing the `allocator_interface`. + * - Additional benchmarking scenarios can be created by extending `benchmark_interface`. + */ + +#include +#include +#include +#include + +#include "benchmark_interfaces.hpp" + +struct alloc_data { + void *ptr; + size_t size; +}; + +#define UMF_BENCHMARK_TEMPLATE_DEFINE(BaseClass, Method, ...) \ + BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, __VA_ARGS__) \ + (benchmark::State & state) { \ + for (auto _ : state) { \ + bench(state); \ + } \ + } + +#define UMF_BENCHMARK_REGISTER_F(BaseClass, Method) \ + BENCHMARK_REGISTER_F(BaseClass, Method) \ + ->ArgNames( \ + BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::argsName()) \ + ->Name(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::name()) \ + ->MinWarmUpTime(1) + +class fixed_alloc_size : public alloc_size_interface { + public: + unsigned SetUp(::benchmark::State &state, unsigned argPos) override { + size = state.range(argPos); + return argPos + 1; + } + void TearDown([[maybe_unused]] ::benchmark::State &state) override {} + size_t nextSize() override { return size; }; + static std::vector argsName() { return {"size"}; } + + private: + size_t size; +}; + +class uniform_alloc_size : public alloc_size_interface { + using distribution = std::uniform_int_distribution; + + public: + unsigned SetUp(::benchmark::State &state, unsigned argPos) override { + auto min = state.range(argPos++); + auto max = state.range(argPos++); + auto gran = state.range(argPos++); + if (min % gran != 0 && max % gran != 0) { + state.SkipWithError("min and max must be divisible by granularity"); + return argPos; + } + + dist.param(distribution::param_type(min / gran, max / gran)); + multiplier = gran; + return argPos; + } + void TearDown([[maybe_unused]] ::benchmark::State &state) override {} + size_t nextSize() override { return dist(generator) * multiplier; } + static std::vector argsName() { + return {"min size", "max size", "granularity"}; + } + + private: + std::default_random_engine generator; + distribution dist; + size_t multiplier; +}; + +// This class benchmarks speed of alloc() operations. +template < + typename Size, typename Alloc, + typename = + std::enable_if_t::value>, + typename = + std::enable_if_t::value>> +class alloc_benchmark : public benchmark_interface { + public: + size_t max_allocs = 1000; + size_t pre_allocs = 0; + void SetUp(::benchmark::State &state) override { + if (state.thread_index() != 0) { + return; + } + + // unpack arguments + int argPos = 0; + max_allocs = state.range(argPos++); + pre_allocs = state.range(argPos++); + // pass rest of the arguments to "alloc_size" and "allocator" + argPos = base::alloc_size.SetUp(state, argPos); + base::allocator.SetUp(state, argPos); + + // initialize allocations tracking vectors (one per thread) + // and iterators for these vectors. + allocations.resize(state.threads()); + iters.resize(state.threads()); + + for (auto &i : iters) { + i = pre_allocs; + } + + // do "pre_alloc" allocations before actual benchmark. + for (auto &i : allocations) { + i.resize(max_allocs + pre_allocs); + + for (size_t j = 0; j < pre_allocs; j++) { + i[j].ptr = + base::allocator.benchAlloc(base::alloc_size.nextSize()); + if (i[j].ptr == NULL) { + state.SkipWithError("preallocation failed"); + return; + } + i[j].size = base::alloc_size.nextSize(); + } + } + } + + void TearDown(::benchmark::State &state) override { + if (state.thread_index() != 0) { + return; + } + for (auto &i : allocations) { + for (auto &j : i) { + if (j.ptr != NULL) { + base::allocator.benchFree(j.ptr, j.size); + j.ptr = NULL; + j.size = 0; + } + } + } + + base::TearDown(state); + } + + void bench(benchmark::State &state) override { + auto tid = state.thread_index(); + auto s = base::alloc_size.nextSize(); + auto &i = iters[tid]; + allocations[tid][i].ptr = base::allocator.benchAlloc(s); + if (allocations[tid][i].ptr == NULL) { + state.SkipWithError("allocation failed"); + return; + } + allocations[tid][i].size = s; + i++; + if (i >= max_allocs + pre_allocs) { + // This benchmark tests only allocations - + // if allocation tracker is full we pause benchmark to dealloc all allocations - + // excluding pre-allocated ones. + state.PauseTiming(); + while (i > pre_allocs) { + auto &allocation = allocations[tid][--i]; + base::allocator.benchFree(allocation.ptr, allocation.size); + allocation.ptr = NULL; + allocation.size = 0; + } + state.ResumeTiming(); + } + } + static std::vector argsName() { + auto n = benchmark_interface::argsName(); + std::vector res = {"max_allocs", "pre_allocs"}; + res.insert(res.end(), n.begin(), n.end()); + return res; + } + static std::string name() { return base::name() + "/alloc"; } + + protected: + using base = benchmark_interface; + std::vector> allocations; + std::vector iters; +}; + +// This class benchmarks performance of random deallocations and (re)allocations +template < + typename Size, typename Alloc, + typename = + std::enable_if_t::value>, + typename = + std::enable_if_t::value>> +class multiple_malloc_free_benchmark : public alloc_benchmark { + using distribution = std::uniform_int_distribution; + using base = alloc_benchmark; + + public: + int reallocs = 100; + void SetUp(::benchmark::State &state) override { + if (state.thread_index() != 0) { + return; + } + // unpack arguments + int argPos = 0; + base::max_allocs = state.range(argPos++); + + // pass rest of the arguments to "alloc_size" and "allocator" + argPos = base::alloc_size.SetUp(state, argPos); + base::allocator.SetUp(state, argPos); + + // perform initial allocations which will be later freed and reallocated + base::allocations.resize(state.threads()); + for (auto &i : base::allocations) { + i.resize(base::max_allocs); + + for (size_t j = 0; j < base::max_allocs; j++) { + i[j].ptr = + base::allocator.benchAlloc(base::alloc_size.nextSize()); + if (i[j].ptr == NULL) { + state.SkipWithError("preallocation failed"); + return; + } + i[j].size = base::alloc_size.nextSize(); + } + } + dist.param(distribution::param_type(0, base::max_allocs - 1)); + } + + void bench(benchmark::State &state) override { + auto tid = state.thread_index(); + auto &allocation = base::allocations[tid]; + std::vector to_alloc; + for (int j = 0; j < reallocs; j++) { + auto idx = dist(generator); + if (allocation[idx].ptr == NULL) { + continue; + } + to_alloc.push_back(idx); + + base::allocator.benchFree(allocation[idx].ptr, + allocation[idx].size); + allocation[idx].ptr = NULL; + allocation[idx].size = 0; + } + + for (auto idx : to_alloc) { + auto s = base::alloc_size.nextSize(); + allocation[idx].ptr = base::allocator.benchAlloc(s); + if (allocation[idx].ptr == NULL) { + state.SkipWithError("allocation failed"); + } + allocation[idx].size = s; + } + } + + static std::string name() { + return base::base::name() + "/multiple_malloc_free"; + } + static std::vector argsName() { + auto n = benchmark_interface::argsName(); + std::vector res = {"max_allocs"}; + res.insert(res.end(), n.begin(), n.end()); + return res; + } + std::default_random_engine generator; + distribution dist; +}; + +template ::value>> +class provider_allocator : public allocator_interface { + public: + unsigned SetUp(::benchmark::State &state, unsigned r) override { + provider.SetUp(state); + return r; + } + + void TearDown(::benchmark::State &state) override { + provider.TearDown(state); + } + + void *benchAlloc(size_t size) override { + void *ptr; + if (umfMemoryProviderAlloc(provider.provider, size, 0, &ptr) != + UMF_RESULT_SUCCESS) { + return NULL; + } + return ptr; + } + void benchFree(void *ptr, size_t size) override { + umfMemoryProviderFree(provider.provider, ptr, size); + } + static std::string name() { return Provider::name(); } + + private: + Provider provider; +}; + +// TODO: assert Pool to be a pool_interface. +template class pool_allocator : public allocator_interface { + public: + unsigned SetUp(::benchmark::State &state, unsigned r) override { + pool.SetUp(state); + return r; + } + + void TearDown(::benchmark::State &state) override { pool.TearDown(state); } + + virtual void *benchAlloc(size_t size) override { + return umfPoolMalloc(pool.pool, size); + } + virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) override { + umfPoolFree(pool.pool, ptr); + } + + static std::string name() { return Pool::name(); } + + private: + Pool pool; +}; diff --git a/benchmark/benchmark_interfaces.hpp b/benchmark/benchmark_interfaces.hpp new file mode 100644 index 000000000..868116062 --- /dev/null +++ b/benchmark/benchmark_interfaces.hpp @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include +#include + +#include +#include +#include + +class alloc_size_interface { + public: + virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state, + [[maybe_unused]] unsigned argPos) = 0; + virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0; + virtual size_t nextSize() = 0; + static std::vector argsName() { return {""}; }; +}; + +class allocator_interface { + public: + virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state, + [[maybe_unused]] unsigned argPos) = 0; + virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0; + virtual void *benchAlloc(size_t size) = 0; + virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) = 0; + static std::vector argsName() { return {}; } +}; + +template +struct benchmark_interface : public benchmark::Fixture { + void SetUp(::benchmark::State &state) { + int argPos = alloc_size.SetUp(state, 0); + allocator.SetUp(state, argPos); + } + void TearDown(::benchmark::State &state) { + alloc_size.TearDown(state); + allocator.TearDown(state); + } + + virtual void bench(::benchmark::State &state) = 0; + + static std::vector argsName() { + auto s = Size::argsName(); + auto a = Allocator::argsName(); + std::vector res = {}; + res.insert(res.end(), s.begin(), s.end()); + res.insert(res.end(), a.begin(), a.end()); + return res; + } + static std::string name() { return Allocator::name(); } + + Size alloc_size; + Allocator allocator; +}; + +struct provider_interface { + umf_memory_provider_handle_t provider = NULL; + virtual void SetUp(::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + auto umf_result = + umfMemoryProviderCreate(getOps(), getParams(), &provider); + if (umf_result != UMF_RESULT_SUCCESS) { + state.SkipWithError("umfMemoryProviderCreate() failed"); + } + } + + virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + + if (provider) { + umfMemoryProviderDestroy(provider); + } + } + + virtual umf_memory_provider_ops_t *getOps() { return nullptr; } + virtual void *getParams() { return nullptr; } +}; + +template ::value>> +struct pool_interface { + virtual void SetUp(::benchmark::State &state) { + provider.SetUp(state); + if (state.thread_index() != 0) { + return; + } + auto umf_result = umfPoolCreate(getOps(state), provider.provider, + getParams(state), 0, &pool); + if (umf_result != UMF_RESULT_SUCCESS) { + state.SkipWithError("umfPoolCreate() failed"); + } + } + virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + // TODO: The scalable pool destruction process can race with other threads + // performing TLS (Thread-Local Storage) destruction. + // As a temporary workaround, we introduce a delay (sleep) + // to ensure the pool is destroyed only after all threads have completed. + // Issue: #933 + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + if (pool) { + umfPoolDestroy(pool); + } + }; + + virtual umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) { + return nullptr; + } + virtual void *getParams([[maybe_unused]] ::benchmark::State &state) { + return nullptr; + } + T provider; + umf_memory_pool_handle_t pool; +}; diff --git a/benchmark/multithread.cpp b/benchmark/multithread.cpp index 8239c8cd4..4558942ec 100644 --- a/benchmark/multithread.cpp +++ b/benchmark/multithread.cpp @@ -91,7 +91,12 @@ static void mt_alloc_free(poolCreateExtParams params, } int main() { - auto osParams = umfOsMemoryProviderParamsDefault(); + umf_os_memory_provider_params_handle_t osParams = nullptr; + umf_result_t res = umfOsMemoryProviderParamsCreate(&osParams); + if (res != UMF_RESULT_SUCCESS) { + std::cerr << "os memory provider params create failed" << std::endl; + return -1; + } #if defined(UMF_POOL_SCALABLE_ENABLED) @@ -102,26 +107,31 @@ int main() { std::cout << "scalable_pool mt_alloc_free: "; mt_alloc_free(poolCreateExtParams{umfScalablePoolOps(), nullptr, - umfOsMemoryProviderOps(), &osParams}, + umfOsMemoryProviderOps(), osParams}, params); #else std::cout << "skipping scalable_pool mt_alloc_free" << std::endl; #endif -#if defined(UMF_BUILD_LIBUMF_POOL_JEMALLOC) +#if defined(UMF_POOL_JEMALLOC_ENABLED) std::cout << "jemalloc_pool mt_alloc_free: "; mt_alloc_free(poolCreateExtParams{umfJemallocPoolOps(), nullptr, - umfOsMemoryProviderOps(), &osParams}); + umfOsMemoryProviderOps(), osParams}); #else std::cout << "skipping jemalloc_pool mt_alloc_free" << std::endl; #endif -#if defined(UMF_BUILD_LIBUMF_POOL_DISJOINT) - auto disjointParams = umfDisjointPoolParamsDefault(); +#if defined(UMF_POOL_DISJOINT_ENABLED) + umf_disjoint_pool_params_handle_t hDisjointParams = nullptr; + umf_result_t ret = umfDisjointPoolParamsCreate(&hDisjointParams); + if (ret != UMF_RESULT_SUCCESS) { + std::cerr << "disjoint pool params create failed" << std::endl; + return -1; + } std::cout << "disjoint_pool mt_alloc_free: "; - mt_alloc_free(poolCreateExtParams{umfDisjointPoolOps(), &disjointParams, - umfOsMemoryProviderOps(), &osParams}); + mt_alloc_free(poolCreateExtParams{umfDisjointPoolOps(), hDisjointParams, + umfOsMemoryProviderOps(), osParams}); #else std::cout << "skipping disjoint_pool mt_alloc_free" << std::endl; #endif @@ -129,5 +139,11 @@ int main() { // ctest looks for "PASSED" in the output std::cout << "PASSED" << std::endl; + ret = umfDisjointPoolParamsDestroy(hDisjointParams); + if (ret != UMF_RESULT_SUCCESS) { + std::cerr << "disjoint pool params destroy failed" << std::endl; + return -1; + } + return 0; } diff --git a/benchmark/multithread.hpp b/benchmark/multithread.hpp index e642d2987..a3ba37541 100644 --- a/benchmark/multithread.hpp +++ b/benchmark/multithread.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/benchmark/ubench.c b/benchmark/ubench.c index 900b7b85c..5f1bfe9e4 100644 --- a/benchmark/ubench.c +++ b/benchmark/ubench.c @@ -20,17 +20,18 @@ #include #include -#ifdef UMF_BUILD_LIBUMF_POOL_DISJOINT +#ifdef UMF_POOL_DISJOINT_ENABLED #include #endif -#ifdef UMF_BUILD_LIBUMF_POOL_JEMALLOC +#ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif #include "utils_common.h" -#if (defined UMF_BUILD_GPU_TESTS) +#if (defined UMF_BUILD_LIBUMF_POOL_DISJOINT && \ + defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) #include "utils_level_zero.h" #endif @@ -52,7 +53,7 @@ #include "ubench.h" // BENCHMARK CONFIG #define N_ITERATIONS 1000 -#define ALLOC_SIZE (util_get_page_size()) +#define ALLOC_SIZE (utils_get_page_size()) // OS MEMORY PROVIDER CONFIG #define OS_MEMORY_PROVIDER_TRACE (0) @@ -123,22 +124,6 @@ UBENCH_EX(simple, glibc_malloc) { ////////////////// OS MEMORY PROVIDER -static umf_os_memory_provider_params_t UMF_OS_MEMORY_PROVIDER_PARAMS = { - /* .protection = */ UMF_PROTECTION_READ | UMF_PROTECTION_WRITE, - /* .visibility = */ UMF_MEM_MAP_PRIVATE, - /* .shm_name = */ NULL, - - // NUMA config - /* .numa_list = */ NULL, - /* .numa_list_len = */ 0, - - /* .numa_mode = */ UMF_NUMA_MODE_DEFAULT, - /* .part_size = */ 0, - - /* .partitions = */ NULL, - /* .partitions_len = */ 0, -}; - static void *w_umfMemoryProviderAlloc(void *provider, size_t size, size_t alignment) { void *ptr = NULL; @@ -170,9 +155,17 @@ UBENCH_EX(simple, os_memory_provider) { umf_result_t umf_result; umf_memory_provider_handle_t os_memory_provider = NULL; - umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), - &UMF_OS_MEMORY_PROVIDER_PARAMS, + umf_os_memory_provider_params_handle_t os_params = NULL; + + umf_result = umfOsMemoryProviderParamsCreate(&os_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "error: umfOsMemoryProviderParamsCreate() failed\n"); + exit(-1); + } + + umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), os_params, &os_memory_provider); + umfOsMemoryProviderParamsDestroy(os_params); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "error: umfMemoryProviderCreate() failed\n"); exit(-1); @@ -214,9 +207,17 @@ UBENCH_EX(simple, proxy_pool_with_os_memory_provider) { umf_result_t umf_result; umf_memory_provider_handle_t os_memory_provider = NULL; - umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), - &UMF_OS_MEMORY_PROVIDER_PARAMS, + umf_os_memory_provider_params_handle_t os_params = NULL; + + umf_result = umfOsMemoryProviderParamsCreate(&os_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "error: umfOsMemoryProviderParamsCreate() failed\n"); + exit(-1); + } + + umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), os_params, &os_memory_provider); + umfOsMemoryProviderParamsDestroy(os_params); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "error: umfMemoryProviderCreate() failed\n"); exit(-1); @@ -243,7 +244,7 @@ UBENCH_EX(simple, proxy_pool_with_os_memory_provider) { free(array); } -#if (defined UMF_BUILD_LIBUMF_POOL_DISJOINT) +#if (defined UMF_POOL_DISJOINT_ENABLED) ////////////////// DISJOINT POOL WITH OS MEMORY PROVIDER UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { @@ -251,24 +252,63 @@ UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { umf_result_t umf_result; umf_memory_provider_handle_t os_memory_provider = NULL; - umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), - &UMF_OS_MEMORY_PROVIDER_PARAMS, + umf_os_memory_provider_params_handle_t os_params = NULL; + + umf_result = umfOsMemoryProviderParamsCreate(&os_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "error: umfOsMemoryProviderParamsCreate() failed\n"); + exit(-1); + } + + umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), os_params, &os_memory_provider); + umfOsMemoryProviderParamsDestroy(os_params); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "error: umfMemoryProviderCreate() failed\n"); exit(-1); } - umf_disjoint_pool_params_t disjoint_memory_pool_params = {0}; - disjoint_memory_pool_params.SlabMinSize = DISJOINT_POOL_SLAB_MIN_SIZE; - disjoint_memory_pool_params.MaxPoolableSize = - DISJOINT_POOL_MAX_POOLABLE_SIZE; - disjoint_memory_pool_params.Capacity = DISJOINT_POOL_CAPACITY; - disjoint_memory_pool_params.MinBucketSize = DISJOINT_POOL_MIN_BUCKET_SIZE; + umf_disjoint_pool_params_handle_t disjoint_memory_pool_params = NULL; + umf_result = umfDisjointPoolParamsCreate(&disjoint_memory_pool_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "ERROR: umfDisjointPoolParamsCreate failed\n"); + exit(-1); + } + + umf_result = umfDisjointPoolParamsSetSlabMinSize( + disjoint_memory_pool_params, DISJOINT_POOL_SLAB_MIN_SIZE); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "error: umfDisjointPoolParamsSetSlabMinSize() failed\n"); + exit(-1); + } + + umf_result = umfDisjointPoolParamsSetMaxPoolableSize( + disjoint_memory_pool_params, DISJOINT_POOL_MAX_POOLABLE_SIZE); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "error: umfDisjointPoolParamsSetMaxPoolableSize() failed\n"); + exit(-1); + } + + umf_result = umfDisjointPoolParamsSetCapacity(disjoint_memory_pool_params, + DISJOINT_POOL_CAPACITY); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "error: umfDisjointPoolParamsSetCapacity() failed\n"); + exit(-1); + } + + umf_result = umfDisjointPoolParamsSetMinBucketSize( + disjoint_memory_pool_params, DISJOINT_POOL_MIN_BUCKET_SIZE); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "error: umfDisjointPoolParamsSetMinBucketSize() failed\n"); + exit(-1); + } umf_memory_pool_handle_t disjoint_pool; umf_result = umfPoolCreate(umfDisjointPoolOps(), os_memory_provider, - &disjoint_memory_pool_params, 0, &disjoint_pool); + disjoint_memory_pool_params, 0, &disjoint_pool); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "error: umfPoolCreate() failed\n"); exit(-1); @@ -283,12 +323,13 @@ UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { } umfPoolDestroy(disjoint_pool); + umfDisjointPoolParamsDestroy(disjoint_memory_pool_params); umfMemoryProviderDestroy(os_memory_provider); free(array); } -#endif /* (defined UMF_BUILD_LIBUMF_POOL_DISJOINT) */ +#endif /* (defined UMF_POOL_DISJOINT_ENABLED) */ -#if (defined UMF_BUILD_LIBUMF_POOL_JEMALLOC) +#if (defined UMF_POOL_JEMALLOC_ENABLED) ////////////////// JEMALLOC POOL WITH OS MEMORY PROVIDER UBENCH_EX(simple, jemalloc_pool_with_os_memory_provider) { @@ -296,9 +337,17 @@ UBENCH_EX(simple, jemalloc_pool_with_os_memory_provider) { umf_result_t umf_result; umf_memory_provider_handle_t os_memory_provider = NULL; - umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), - &UMF_OS_MEMORY_PROVIDER_PARAMS, + umf_os_memory_provider_params_handle_t os_params = NULL; + + umf_result = umfOsMemoryProviderParamsCreate(&os_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "error: umfOsMemoryProviderParamsCreate() failed\n"); + exit(-1); + } + + umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), os_params, &os_memory_provider); + umfOsMemoryProviderParamsDestroy(os_params); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "error: umfMemoryProviderCreate() failed\n"); exit(-1); @@ -324,7 +373,7 @@ UBENCH_EX(simple, jemalloc_pool_with_os_memory_provider) { umfMemoryProviderDestroy(os_memory_provider); free(array); } -#endif /* (defined UMF_BUILD_LIBUMF_POOL_JEMALLOC) */ +#endif /* (defined UMF_POOL_JEMALLOC_ENABLED) */ #if (defined UMF_POOL_SCALABLE_ENABLED) ////////////////// SCALABLE (TBB) POOL WITH OS MEMORY PROVIDER @@ -334,9 +383,17 @@ UBENCH_EX(simple, scalable_pool_with_os_memory_provider) { umf_result_t umf_result; umf_memory_provider_handle_t os_memory_provider = NULL; - umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), - &UMF_OS_MEMORY_PROVIDER_PARAMS, + umf_os_memory_provider_params_handle_t os_params = NULL; + + umf_result = umfOsMemoryProviderParamsCreate(&os_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "error: umfOsMemoryProviderParamsCreate() failed\n"); + exit(-1); + } + + umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), os_params, &os_memory_provider); + umfOsMemoryProviderParamsDestroy(os_params); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "error: umfMemoryProviderCreate() failed\n"); exit(-1); @@ -364,7 +421,7 @@ UBENCH_EX(simple, scalable_pool_with_os_memory_provider) { } #endif /* (defined UMF_POOL_SCALABLE_ENABLED) */ -#if (defined UMF_BUILD_LIBUMF_POOL_DISJOINT && \ +#if (defined UMF_POOL_DISJOINT_ENABLED && \ defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) static void do_ipc_get_put_benchmark(alloc_t *allocs, size_t num_allocs, size_t repeats, @@ -388,54 +445,85 @@ static void do_ipc_get_put_benchmark(alloc_t *allocs, size_t num_allocs, } } -int create_level_zero_params(level_zero_memory_provider_params_t *params) { +int create_level_zero_params(ze_context_handle_t *context, + ze_device_handle_t *device) { uint32_t driver_idx = 0; ze_driver_handle_t driver = NULL; - ze_context_handle_t context = NULL; - ze_device_handle_t device = NULL; - int ret = init_level_zero(); + int ret = utils_ze_init_level_zero(); if (ret != 0) { fprintf(stderr, "Failed to init Level 0!\n"); return ret; } - ret = find_driver_with_gpu(&driver_idx, &driver); + ret = utils_ze_find_driver_with_gpu(&driver_idx, &driver); if (ret || driver == NULL) { fprintf(stderr, "Cannot find L0 driver with GPU device!\n"); return ret; } - ret = create_context(driver, &context); + ret = utils_ze_create_context(driver, context); if (ret != 0) { fprintf(stderr, "Failed to create L0 context!\n"); return ret; } - ret = find_gpu_device(driver, &device); - if (ret || device == NULL) { + ret = utils_ze_find_gpu_device(driver, device); + if (ret) { fprintf(stderr, "Cannot find GPU device!\n"); - destroy_context(context); + utils_ze_destroy_context(*context); return ret; } - params->level_zero_context_handle = context; - params->level_zero_device_handle = device; - params->memory_type = UMF_MEMORY_TYPE_DEVICE; - return ret; } UBENCH_EX(ipc, disjoint_pool_with_level_zero_provider) { const size_t BUFFER_SIZE = 100; const size_t N_BUFFERS = 1000; - level_zero_memory_provider_params_t level_zero_params; + umf_result_t umf_result; + ze_context_handle_t context = NULL; + ze_device_handle_t device = NULL; + umf_level_zero_memory_provider_params_handle_t level_zero_params = NULL; - int ret = create_level_zero_params(&level_zero_params); + int ret = create_level_zero_params(&context, &device); if (ret != 0) { + fprintf(stderr, "error: create_level_zero_params() failed\n"); exit(-1); } + umf_result = umfLevelZeroMemoryProviderParamsCreate(&level_zero_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "error: umfLevelZeroMemoryProviderParamsCreate() failed\n"); + goto err_destroy_context; + } + + umf_result = + umfLevelZeroMemoryProviderParamsSetContext(level_zero_params, context); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "error: umfLevelZeroMemoryProviderParamsSetContext() failed\n"); + goto err_destroy_params; + } + + umf_result = + umfLevelZeroMemoryProviderParamsSetDevice(level_zero_params, device); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "error: umfLevelZeroMemoryProviderParamsSetDevice() failed\n"); + goto err_destroy_params; + } + + umf_result = umfLevelZeroMemoryProviderParamsSetMemoryType( + level_zero_params, UMF_MEMORY_TYPE_DEVICE); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf( + stderr, + "error: umfLevelZeroMemoryProviderParamsSetMemoryType() failed\n"); + goto err_destroy_params; + } + alloc_t *allocs = alloc_array(N_BUFFERS); if (allocs == NULL) { fprintf(stderr, "error: alloc_array() failed\n"); @@ -448,28 +536,58 @@ UBENCH_EX(ipc, disjoint_pool_with_level_zero_provider) { goto err_free_allocs; } - umf_result_t umf_result; umf_memory_provider_handle_t provider = NULL; umf_result = umfMemoryProviderCreate(umfLevelZeroMemoryProviderOps(), - &level_zero_params, &provider); + level_zero_params, &provider); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "error: umfMemoryProviderCreate() failed\n"); goto err_free_ipc_handles; } - umf_disjoint_pool_params_t disjoint_params = {0}; - disjoint_params.SlabMinSize = BUFFER_SIZE * 10; - disjoint_params.MaxPoolableSize = 4ull * 1024ull * 1024ull; - disjoint_params.Capacity = 64ull * 1024ull; - disjoint_params.MinBucketSize = 64; - umf_pool_create_flags_t flags = UMF_POOL_CREATE_FLAG_OWN_PROVIDER; + umf_disjoint_pool_params_handle_t disjoint_params = NULL; + umf_result = umfDisjointPoolParamsCreate(&disjoint_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "ERROR: umfDisjointPoolParamsCreate failed\n"); + goto err_provider_destroy; + } + + umf_result = + umfDisjointPoolParamsSetSlabMinSize(disjoint_params, BUFFER_SIZE * 10); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "error: umfDisjointPoolParamsSetSlabMinSize() failed\n"); + goto err_params_destroy; + } + + umf_result = umfDisjointPoolParamsSetMaxPoolableSize( + disjoint_params, 4ull * 1024ull * 1024ull); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "error: umfDisjointPoolParamsSetMaxPoolableSize() failed\n"); + goto err_params_destroy; + } + + umf_result = + umfDisjointPoolParamsSetCapacity(disjoint_params, 64ull * 1024ull); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "error: umfDisjointPoolParamsSetCapacity() failed\n"); + goto err_params_destroy; + } + + umf_result = umfDisjointPoolParamsSetMinBucketSize(disjoint_params, 64); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "error: umfDisjointPoolParamsSetMinBucketSize() failed\n"); + goto err_params_destroy; + } + + umf_pool_create_flags_t flags = UMF_POOL_CREATE_FLAG_NONE; umf_memory_pool_handle_t pool; - umf_result = umfPoolCreate(umfDisjointPoolOps(), provider, &disjoint_params, + umf_result = umfPoolCreate(umfDisjointPoolOps(), provider, disjoint_params, flags, &pool); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "error: umfPoolCreate() failed\n"); - umfMemoryProviderDestroy(provider); - goto err_free_ipc_handles; + goto err_params_destroy; } for (size_t i = 0; i < N_BUFFERS; ++i) { @@ -494,16 +612,27 @@ UBENCH_EX(ipc, disjoint_pool_with_level_zero_provider) { umfPoolDestroy(pool); +err_params_destroy: + umfDisjointPoolParamsDestroy(disjoint_params); + +err_provider_destroy: + umfMemoryProviderDestroy(provider); + err_free_ipc_handles: free(ipc_handles); err_free_allocs: free(allocs); +err_destroy_params: + umfLevelZeroMemoryProviderParamsDestroy(level_zero_params); + err_destroy_context: - destroy_context(level_zero_params.level_zero_context_handle); + utils_ze_destroy_context(context); } -#endif /* (defined UMF_BUILD_LIBUMF_POOL_DISJOINT && defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */ +#endif /* (defined UMF_POLL_DISJOINT_ENABLED && defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */ + +// TODO add IPC benchmark for CUDA UBENCH_MAIN() diff --git a/cmake/FindCUDA.cmake b/cmake/FindCUDA.cmake new file mode 100644 index 000000000..5e4e2eead --- /dev/null +++ b/cmake/FindCUDA.cmake @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +message(STATUS "Checking for module 'cuda' using find_library()") + +find_library(CUDA_LIBRARY NAMES libcuda cuda) +set(CUDA_LIBRARIES ${CUDA_LIBRARY}) + +get_filename_component(CUDA_LIB_DIR ${CUDA_LIBRARIES} DIRECTORY) +set(CUDA_LIBRARY_DIRS ${CUDA_LIB_DIR}) + +if(WINDOWS) + find_file(CUDA_DLL NAMES "nvcuda.dll") + get_filename_component(CUDA_DLL_DIR ${CUDA_DLL} DIRECTORY) + set(CUDA_DLL_DIRS ${CUDA_DLL_DIR}) +endif() + +if(CUDA_LIBRARY) + message(STATUS " Found cuda using find_library()") + message(STATUS " CUDA_LIBRARIES = ${CUDA_LIBRARIES}") + message(STATUS " CUDA_INCLUDE_DIRS = ${CUDA_INCLUDE_DIRS}") + message(STATUS " CUDA_LIBRARY_DIRS = ${CUDA_LIBRARY_DIRS}") + if(WINDOWS) + message(STATUS " CUDA_DLL_DIRS = ${CUDA_DLL_DIRS}") + endif() +else() + set(MSG_NOT_FOUND "cuda NOT found (set CMAKE_PREFIX_PATH to point the " + "location)") + if(CUDA_FIND_REQUIRED) + message(FATAL_ERROR ${MSG_NOT_FOUND}) + else() + message(WARNING ${MSG_NOT_FOUND}) + endif() +endif() diff --git a/cmake/FindLIBHWLOC.cmake b/cmake/FindLIBHWLOC.cmake index a7c9201a0..8d7998f8d 100644 --- a/cmake/FindLIBHWLOC.cmake +++ b/cmake/FindLIBHWLOC.cmake @@ -67,7 +67,8 @@ if(LIBHWLOC_LIBRARY) endif() else() set(MSG_NOT_FOUND - "libhwloc NOT found (set CMAKE_PREFIX_PATH to point the location)") + "libhwloc NOT found (set CMAKE_PREFIX_PATH to point the location or disable with -DUMF_DISABLE_HWLOC=ON)" + ) if(LIBHWLOC_FIND_REQUIRED) message(FATAL_ERROR ${MSG_NOT_FOUND}) else() diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index 1d3e175fa..2544a1518 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -121,6 +121,22 @@ function(set_version_variables) return() endif() + # v1.5.0-dev - we're on a development tag -> UMF ver: "1.5.0-dev" + string(REGEX MATCHALL "\^v([0-9]+\.[0-9]+\.[0-9]+)-dev\$" MATCHES + ${GIT_VERSION}) + if(MATCHES) + set(UMF_VERSION + "${CMAKE_MATCH_1}-dev" + PARENT_SCOPE) + set(UMF_CMAKE_VERSION + "${CMAKE_MATCH_1}" + PARENT_SCOPE) + set(UMF_VERSION_PRIVATE + 0 + PARENT_SCOPE) + return() + endif() + # v1.5.0-rc1-19-gb8f7a32 -> UMF ver: "1.5.0-rc1.git19.gb8f7a32" string(REGEX MATCHALL "v([0-9.]*)-rc([0-9]*)-([0-9]*)-([0-9a-g]*)" MATCHES ${GIT_VERSION}) @@ -141,6 +157,19 @@ function(set_version_variables) return() endif() + # v1.5.0-dev-19-gb8f7a32 -> UMF ver: "1.5.0-dev.git19.gb8f7a32" + string(REGEX MATCHALL "v([0-9.]*)-dev-([0-9]*)-([0-9a-g]*)" MATCHES + ${GIT_VERSION}) + if(MATCHES) + set(UMF_VERSION + "${CMAKE_MATCH_1}-dev.git${CMAKE_MATCH_2}.${CMAKE_MATCH_3}" + PARENT_SCOPE) + set(UMF_CMAKE_VERSION + "${CMAKE_MATCH_1}" + PARENT_SCOPE) + return() + endif() + # v1.5.0-19-gb8f7a32 -> UMF ver: "1.5.0-git19.gb8f7a32" string(REGEX MATCHALL "v([0-9.]*)-([0-9]*)-([0-9a-g]*)" MATCHES ${GIT_VERSION}) @@ -204,32 +233,39 @@ function(add_umf_target_compile_options name) -Wall -Wextra -Wpedantic - -Wempty-body - -Wunused-parameter - -Wformat -Wformat-security -Wcast-qual - -Wunused-result $<$:-fdiagnostics-color=auto>) if(CMAKE_BUILD_TYPE STREQUAL "Release") target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2) endif() if(UMF_DEVELOPER_MODE) - target_compile_options(${name} PRIVATE -fno-omit-frame-pointer - -fstack-protector-strong) + target_compile_options( + ${name} PRIVATE -fno-omit-frame-pointer + -fstack-protector-strong -Werror) endif() - if(UMF_USE_GCOV) + if(UMF_USE_COVERAGE) if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") - message(FATAL_ERROR "To use gcov, the build type must be Debug") + message( + FATAL_ERROR + "To use the --coverage flag, the build type must be Debug" + ) endif() target_compile_options(${name} PRIVATE --coverage) + if(${CMAKE_C_COMPILER} MATCHES "gcc") + # Fix for the following error: geninfo: ERROR: Unexpected + # negative count '-1' for provider_os_memory.c:1037. Perhaps you + # need to compile with '-fprofile-update=atomic + target_compile_options(${name} PRIVATE -fprofile-update=atomic + -g -O0) + endif() endif() elseif(MSVC) target_compile_options( ${name} PRIVATE /MD$<$:d> $<$:/sdl> - /analyze + $<$:/analyze> /DYNAMICBASE /W4 /Gy @@ -240,6 +276,9 @@ function(add_umf_target_compile_options name) # disable 4200 warning: nonstandard extension used: # zero-sized array in struct/union /wd4200) + if(UMF_DEVELOPER_MODE) + target_compile_options(${name} PRIVATE /WX) + endif() if(${CMAKE_C_COMPILER_ID} MATCHES "MSVC") target_compile_options( ${name} @@ -254,10 +293,12 @@ function(add_umf_target_link_options name) if(NOT MSVC) if(NOT APPLE) target_link_options(${name} PRIVATE "LINKER:-z,relro,-z,now") - if(UMF_USE_GCOV) + if(UMF_USE_COVERAGE) if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") message( - FATAL_ERROR "To use gcov, the build type must be Debug") + FATAL_ERROR + "To use the --coverage flag, the build type must be Debug" + ) endif() target_link_options(${name} PRIVATE --coverage) endif() @@ -266,17 +307,17 @@ function(add_umf_target_link_options name) target_link_options( ${name} PRIVATE - /DYNAMICBASE - /HIGHENTROPYVA + LINKER:/DYNAMICBASE + LINKER:/HIGHENTROPYVA $<$:/DEPENDENTLOADFLAG:0x2000> $<$:/DEPENDENTLOADFLAG:0x2000> - /NXCOMPAT) + LINKER:/NXCOMPAT) endif() endfunction() function(add_umf_target_exec_options name) if(MSVC) - target_link_options(${name} PRIVATE /ALLOWISOLATION) + target_link_options(${name} PRIVATE LINKER:/ALLOWISOLATION) endif() endfunction() @@ -333,7 +374,7 @@ function(add_umf_library) if(WINDOWS) target_link_options(${ARG_NAME} PRIVATE - /DEF:${ARG_WINDOWS_DEF_FILE}) + LINKER:/DEF:${ARG_WINDOWS_DEF_FILE}) elseif(LINUX) target_link_options(${ARG_NAME} PRIVATE "-Wl,--version-script=${ARG_LINUX_MAP_FILE}") @@ -417,12 +458,3 @@ macro(add_sanitizer_flag flag) set(CMAKE_REQUIRED_FLAGS ${SAVED_CMAKE_REQUIRED_FLAGS}) endmacro() - -function(add_optional_symbol symbol) - set(UMF_OPTIONAL_SYMBOLS_WINDOWS - "${UMF_OPTIONAL_SYMBOLS_WINDOWS} \n ${symbol}" - PARENT_SCOPE) - set(UMF_OPTIONAL_SYMBOLS_LINUX - "${UMF_OPTIONAL_SYMBOLS_LINUX} \n ${symbol};" - PARENT_SCOPE) -endfunction() diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 918dc2809..942579a30 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -44,16 +44,17 @@ endif() if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_LEVEL_ZERO_PROVIDER) - set(EXAMPLE_NAME umf_example_gpu_shared_memory) + set(EXAMPLE_NAME umf_example_level_zero_shared_memory) add_umf_executable( NAME ${EXAMPLE_NAME} - SRCS gpu_shared_memory/gpu_shared_memory.c + SRCS level_zero_shared_memory/level_zero_shared_memory.c + common/examples_level_zero_helpers.c LIBS disjoint_pool ze_loader umf) target_include_directories( ${EXAMPLE_NAME} - PRIVATE ${UMF_CMAKE_SOURCE_DIR}/src/utils + PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS} ${UMF_CMAKE_SOURCE_DIR}/src/utils ${UMF_CMAKE_SOURCE_DIR}/include ${UMF_CMAKE_SOURCE_DIR}/examples/common) @@ -66,6 +67,44 @@ if(UMF_BUILD_GPU_EXAMPLES set_tests_properties(${EXAMPLE_NAME} PROPERTIES LABELS "example") + if(WINDOWS) + # append PATH to DLLs + set_property(TEST ${EXAMPLE_NAME} PROPERTY ENVIRONMENT_MODIFICATION + "${DLL_PATH_LIST}") + endif() +else() + message(STATUS "GPU Level Zero shared memory example requires " + "UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and " + "UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping") +endif() + +if(UMF_BUILD_GPU_EXAMPLES + AND UMF_BUILD_LIBUMF_POOL_DISJOINT + AND UMF_BUILD_CUDA_PROVIDER + AND UMF_CUDA_ENABLED) + set(EXAMPLE_NAME umf_example_cuda_shared_memory) + + add_umf_executable( + NAME ${EXAMPLE_NAME} + SRCS cuda_shared_memory/cuda_shared_memory.c + LIBS disjoint_pool cuda umf) + + target_include_directories( + ${EXAMPLE_NAME} + PRIVATE ${CUDA_INCLUDE_DIRS} ${UMF_CMAKE_SOURCE_DIR}/src/utils + ${UMF_CMAKE_SOURCE_DIR}/include + ${UMF_CMAKE_SOURCE_DIR}/examples/common) + + target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBHWLOC_LIBRARY_DIRS} + ${CUDA_LIBRARY_DIRS}) + + add_test( + NAME ${EXAMPLE_NAME} + COMMAND ${EXAMPLE_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + set_tests_properties(${EXAMPLE_NAME} PROPERTIES LABELS "example") + if(WINDOWS) # append PATH to DLLs set_property(TEST ${EXAMPLE_NAME} PROPERTY ENVIRONMENT_MODIFICATION @@ -74,9 +113,8 @@ if(UMF_BUILD_GPU_EXAMPLES else() message( STATUS - "GPU shared memory example requires UMF_BUILD_GPU_EXAMPLES, " - "UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT " - "to be turned ON - skipping") + "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_CUDA_PROVIDER, UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON and installed CUDA libraries - skipping" + ) endif() # TODO: it looks like there is some problem with IPC implementation in Level @@ -90,6 +128,7 @@ if(UMF_BUILD_GPU_EXAMPLES add_umf_executable( NAME ${EXAMPLE_NAME} SRCS ipc_level_zero/ipc_level_zero.c + common/examples_level_zero_helpers.c LIBS disjoint_pool ze_loader umf) target_include_directories( @@ -134,7 +173,8 @@ function(build_umf_ipc_example name) ${EX_NAME} PRIVATE ${UMF_CMAKE_SOURCE_DIR}/src/utils ${UMF_CMAKE_SOURCE_DIR}/include) - target_link_directories(${EX_NAME} PRIVATE ${LIBHWLOC_LIBRARY_DIRS}) + target_link_directories(${EX_NAME} PRIVATE ${LIBHWLOC_LIBRARY_DIRS} + ${TBB_LIBRARY_DIRS}) endforeach(loop_var) endfunction() @@ -155,7 +195,7 @@ function(add_umf_ipc_example script) endif() endfunction() -if(LINUX) +if(LINUX AND UMF_POOL_SCALABLE_ENABLED) build_umf_ipc_example(ipc_ipcapi) add_umf_ipc_example(ipc_ipcapi_anon_fd) add_umf_ipc_example(ipc_ipcapi_shm) @@ -165,3 +205,96 @@ else() "IPC examples with UMF pool API are supported on Linux only - skipping" ) endif() + +if(LINUX) + set(UMF_TEST_SKIP_RETURN_CODE 125) + + set(EXAMPLE_NAME umf_example_memspace_numa) + + add_umf_executable( + NAME ${EXAMPLE_NAME} + SRCS memspace_numa/memspace_numa.c + LIBS umf ${LIBHWLOC_LIBRARIES} numa) + + target_include_directories( + ${EXAMPLE_NAME} + PRIVATE ${UMF_CMAKE_SOURCE_DIR}/src/utils + ${UMF_CMAKE_SOURCE_DIR}/include + ${UMF_CMAKE_SOURCE_DIR}/examples/common) + + target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBHWLOC_LIBRARY_DIRS}) + + add_test( + NAME ${EXAMPLE_NAME} + COMMAND ${EXAMPLE_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + set_tests_properties(${EXAMPLE_NAME} PROPERTIES + SKIP_RETURN_CODE ${UMF_TEST_SKIP_RETURN_CODE}) + + set(EXAMPLE_NAME umf_example_memspace_hmat) + + add_umf_executable( + NAME ${EXAMPLE_NAME} + SRCS memspace_hmat/memspace_hmat.c + LIBS umf ${LIBHWLOC_LIBRARIES} numa) + + target_include_directories( + ${EXAMPLE_NAME} + PRIVATE ${UMF_CMAKE_SOURCE_DIR}/src/utils + ${UMF_CMAKE_SOURCE_DIR}/include + ${UMF_CMAKE_SOURCE_DIR}/examples/common) + + target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBHWLOC_LIBRARY_DIRS}) + + add_test( + NAME ${EXAMPLE_NAME} + COMMAND ${EXAMPLE_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + set_tests_properties(${EXAMPLE_NAME} PROPERTIES + SKIP_RETURN_CODE ${UMF_TEST_SKIP_RETURN_CODE}) + + if(UMF_POOL_SCALABLE_ENABLED) + set(EXAMPLE_NAME umf_example_custom_file_provider) + + add_umf_executable( + NAME ${EXAMPLE_NAME} + SRCS custom_file_provider/custom_file_provider.c + LIBS umf ${LIBHWLOC_LIBRARIES}) + + target_include_directories( + ${EXAMPLE_NAME} PRIVATE ${UMF_CMAKE_SOURCE_DIR}/src/utils + ${UMF_CMAKE_SOURCE_DIR}/include) + + target_link_directories(${EXAMPLE_NAME} PRIVATE + ${LIBHWLOC_LIBRARY_DIRS} ${TBB_LIBRARY_DIRS}) + + add_test( + NAME ${EXAMPLE_NAME} + COMMAND ${EXAMPLE_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + endif() + + if(UMF_POOL_JEMALLOC_ENABLED) + set(EXAMPLE_NAME umf_example_dram_and_fsdax) + + add_umf_executable( + NAME ${EXAMPLE_NAME} + SRCS dram_and_fsdax/dram_and_fsdax.c + LIBS umf jemalloc_pool) + + target_link_directories(${EXAMPLE_NAME} PRIVATE + ${LIBHWLOC_LIBRARY_DIRS}) + + add_test( + NAME ${EXAMPLE_NAME} + COMMAND ${EXAMPLE_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + endif() +else() + message( + STATUS "Memspace examples API are supported on Linux only - skipping") + message( + STATUS "File provider example is supported on Linux only - skipping") +endif() diff --git a/examples/basic/basic.c b/examples/basic/basic.c index d886c4af8..846e71eda 100644 --- a/examples/basic/basic.c +++ b/examples/basic/basic.c @@ -23,10 +23,17 @@ int main(void) { // in an mmap call like this: // mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0) umf_memory_provider_ops_t *provider_ops = umfOsMemoryProviderOps(); - umf_os_memory_provider_params_t params = umfOsMemoryProviderParamsDefault(); + umf_os_memory_provider_params_handle_t params = NULL; umf_memory_provider_handle_t provider; - res = umfMemoryProviderCreate(provider_ops, ¶ms, &provider); + res = umfOsMemoryProviderParamsCreate(¶ms); + if (res != UMF_RESULT_SUCCESS) { + printf("Failed to create OS memory provider params!\n"); + return -1; + } + + res = umfMemoryProviderCreate(provider_ops, params, &provider); + umfOsMemoryProviderParamsDestroy(params); if (res != UMF_RESULT_SUCCESS) { printf("Failed to create a memory provider!\n"); return -1; diff --git a/examples/cmake/FindCUDA.cmake b/examples/cmake/FindCUDA.cmake new file mode 100644 index 000000000..5e4e2eead --- /dev/null +++ b/examples/cmake/FindCUDA.cmake @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +message(STATUS "Checking for module 'cuda' using find_library()") + +find_library(CUDA_LIBRARY NAMES libcuda cuda) +set(CUDA_LIBRARIES ${CUDA_LIBRARY}) + +get_filename_component(CUDA_LIB_DIR ${CUDA_LIBRARIES} DIRECTORY) +set(CUDA_LIBRARY_DIRS ${CUDA_LIB_DIR}) + +if(WINDOWS) + find_file(CUDA_DLL NAMES "nvcuda.dll") + get_filename_component(CUDA_DLL_DIR ${CUDA_DLL} DIRECTORY) + set(CUDA_DLL_DIRS ${CUDA_DLL_DIR}) +endif() + +if(CUDA_LIBRARY) + message(STATUS " Found cuda using find_library()") + message(STATUS " CUDA_LIBRARIES = ${CUDA_LIBRARIES}") + message(STATUS " CUDA_INCLUDE_DIRS = ${CUDA_INCLUDE_DIRS}") + message(STATUS " CUDA_LIBRARY_DIRS = ${CUDA_LIBRARY_DIRS}") + if(WINDOWS) + message(STATUS " CUDA_DLL_DIRS = ${CUDA_DLL_DIRS}") + endif() +else() + set(MSG_NOT_FOUND "cuda NOT found (set CMAKE_PREFIX_PATH to point the " + "location)") + if(CUDA_FIND_REQUIRED) + message(FATAL_ERROR ${MSG_NOT_FOUND}) + else() + message(WARNING ${MSG_NOT_FOUND}) + endif() +endif() diff --git a/examples/cmake/FindJEMALLOC.cmake b/examples/cmake/FindJEMALLOC.cmake new file mode 100644 index 000000000..89d488ecc --- /dev/null +++ b/examples/cmake/FindJEMALLOC.cmake @@ -0,0 +1,52 @@ +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +message(STATUS "Checking for module 'jemalloc' using find_library()") + +find_library(JEMALLOC_LIBRARY NAMES libjemalloc jemalloc) +set(JEMALLOC_LIBRARIES ${JEMALLOC_LIBRARY}) + +get_filename_component(JEMALLOC_LIB_DIR ${JEMALLOC_LIBRARIES} DIRECTORY) +set(JEMALLOC_LIBRARY_DIRS ${JEMALLOC_LIB_DIR}) + +find_file(JEMALLOC_HEADER NAMES "jemalloc/jemalloc.h") +if(JEMALLOC_HEADER) + get_filename_component(JEMALLOC_INCLUDE_DIR_TBB ${JEMALLOC_HEADER} + DIRECTORY) + get_filename_component(JEMALLOC_INCLUDE_DIR ${JEMALLOC_INCLUDE_DIR_TBB} + DIRECTORY) + set(JEMALLOC_INCLUDE_DIRS ${JEMALLOC_INCLUDE_DIR}) +else() + set(MSG_NOT_FOUND " header NOT found " + "(set CMAKE_PREFIX_PATH to point the location)") + if(JEMALLOC_FIND_REQUIRED) + message(FATAL_ERROR ${MSG_NOT_FOUND}) + else() + message(WARNING ${MSG_NOT_FOUND}) + endif() +endif() + +if(WINDOWS) + find_file(JEMALLOC_DLL NAMES "bin/jemalloc.dll" "jemalloc.dll") + get_filename_component(JEMALLOC_DLL_DIR ${JEMALLOC_DLL} DIRECTORY) + set(JEMALLOC_DLL_DIRS ${JEMALLOC_DLL_DIR}) +endif() + +if(JEMALLOC_LIBRARY) + message(STATUS " Found jemalloc using find_library()") + message(STATUS " JEMALLOC_LIBRARIES = ${JEMALLOC_LIBRARIES}") + message(STATUS " JEMALLOC_INCLUDE_DIRS = ${JEMALLOC_INCLUDE_DIRS}") + message(STATUS " JEMALLOC_LIBRARY_DIRS = ${JEMALLOC_LIBRARY_DIRS}") + if(WINDOWS) + message(STATUS " JEMALLOC_DLL_DIRS = ${JEMALLOC_DLL_DIRS}") + endif() +else() + set(MSG_NOT_FOUND + "jemalloc NOT found (set CMAKE_PREFIX_PATH to point the location)") + if(JEMALLOC_FIND_REQUIRED) + message(FATAL_ERROR ${MSG_NOT_FOUND}) + else() + message(WARNING ${MSG_NOT_FOUND}) + endif() +endif() diff --git a/examples/cmake/FindLIBNUMA.cmake b/examples/cmake/FindLIBNUMA.cmake new file mode 100644 index 000000000..8c23f481c --- /dev/null +++ b/examples/cmake/FindLIBNUMA.cmake @@ -0,0 +1,20 @@ +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +message(STATUS "Checking for module 'libnuma' using find_library()") + +find_library(LIBNUMA_LIBRARY NAMES libnuma numa) +set(LIBNUMA_LIBRARIES ${LIBNUMA_LIBRARY}) + +if(LIBNUMA_LIBRARY) + message(STATUS " Found libnuma using find_library()") +else() + set(MSG_NOT_FOUND + "libnuma NOT found (set CMAKE_PREFIX_PATH to point the location)") + if(LIBNUMA_FIND_REQUIRED) + message(FATAL_ERROR ${MSG_NOT_FOUND}) + else() + message(WARNING ${MSG_NOT_FOUND}) + endif() +endif() diff --git a/examples/common/utils_level_zero.h b/examples/common/examples_level_zero_helpers.c similarity index 95% rename from examples/common/utils_level_zero.h rename to examples/common/examples_level_zero_helpers.c index 46f892278..5e00838c2 100644 --- a/examples/common/utils_level_zero.h +++ b/examples/common/examples_level_zero_helpers.c @@ -7,21 +7,12 @@ * */ -#ifndef UMF_EXAMPLE_UTILS_LEVEL_ZERO_H -#define UMF_EXAMPLE_UTILS_LEVEL_ZERO_H - #include #include -// To use the Level Zero API, the Level Zero SDK has to be installed -// on the system -#ifdef _WIN32 -#include -#else -#include -#endif +#include "examples_level_zero_helpers.h" -static int init_level_zero(void) { +int init_level_zero(void) { ze_init_flag_t flags = ZE_INIT_FLAG_GPU_ONLY; ze_result_t result = zeInit(flags); if (result != ZE_RESULT_SUCCESS) { @@ -118,8 +109,7 @@ static inline int get_devices(ze_driver_handle_t driver, uint32_t *devices_num_, return ret; } -static inline int find_driver_with_gpu(uint32_t *driver_idx, - ze_driver_handle_t *driver_) { +int find_driver_with_gpu(uint32_t *driver_idx, ze_driver_handle_t *driver_) { int ret = 0; ze_result_t ze_result; uint32_t drivers_num = 0; @@ -184,8 +174,7 @@ static inline int find_driver_with_gpu(uint32_t *driver_idx, return ret; } -static inline int find_gpu_device(ze_driver_handle_t driver, - ze_device_handle_t *device_) { +int find_gpu_device(ze_driver_handle_t driver, ze_device_handle_t *device_) { int ret = -1; uint32_t devices_num = 0; ze_device_handle_t *devices = NULL; @@ -415,5 +404,3 @@ int destroy_context(ze_context_handle_t context) { return 0; } - -#endif // UMF_EXAMPLE_UTILS_LEVEL_ZERO_H diff --git a/test/providers/level_zero_helpers.h b/examples/common/examples_level_zero_helpers.h similarity index 52% rename from test/providers/level_zero_helpers.h rename to examples/common/examples_level_zero_helpers.h index 6cd452c1c..2d8e92ff2 100644 --- a/test/providers/level_zero_helpers.h +++ b/examples/common/examples_level_zero_helpers.h @@ -1,25 +1,28 @@ /* + * * Copyright (C) 2024 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * */ -#ifndef UMF_TEST_LEVEL_ZERO_HELPERS_H -#define UMF_TEST_LEVEL_ZERO_HELPERS_H - -#include - -#include "ze_api.h" +#ifndef UMF_EXAMPLES_LEVEL_ZERO_H +#define UMF_EXAMPLES_LEVEL_ZERO_H -#ifdef __cplusplus -extern "C" { +// To use the Level Zero API, the Level Zero SDK has to be installed +// on the system +#ifdef _WIN32 +#include +#else +#include #endif -int get_drivers(uint32_t *drivers_num_, ze_driver_handle_t **drivers_); +int init_level_zero(void); -int get_devices(ze_driver_handle_t driver, uint32_t *devices_num_, - ze_device_handle_t **devices_); +int create_context(ze_driver_handle_t driver, ze_context_handle_t *context); + +int destroy_context(ze_context_handle_t context); int find_driver_with_gpu(uint32_t *driver_idx, ze_driver_handle_t *driver_); @@ -30,19 +33,6 @@ int level_zero_fill(ze_context_handle_t context, ze_device_handle_t device, size_t pattern_size); int level_zero_copy(ze_context_handle_t context, ze_device_handle_t device, - void *dst_ptr, const void *src_ptr, size_t size); - -int create_context(ze_driver_handle_t driver, ze_context_handle_t *context); - -int destroy_context(ze_context_handle_t context); - -ze_memory_type_t get_mem_type(ze_context_handle_t context, void *ptr); - -level_zero_memory_provider_params_t -create_level_zero_prov_params(umf_usm_memory_type_t memory_type); - -#ifdef __cplusplus -} -#endif + void *dst_ptr, void *src_ptr, size_t size); -#endif // UMF_TEST_LEVEL_ZERO_HELPERS_H +#endif /* UMF_EXAMPLES_LEVEL_ZERO_H */ diff --git a/examples/common/examples_utils.h b/examples/common/examples_utils.h new file mode 100644 index 000000000..9e4a93bcf --- /dev/null +++ b/examples/common/examples_utils.h @@ -0,0 +1,16 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#ifndef UMF_EXAMPLE_UTILS_H +#define UMF_EXAMPLE_UTILS_H + +// Needed for CI +#define TEST_SKIP_ERROR_CODE 125 + +#endif /* UMF_EXAMPLE_UTILS_H */ diff --git a/examples/cuda_shared_memory/CMakeLists.txt b/examples/cuda_shared_memory/CMakeLists.txt new file mode 100644 index 000000000..dd8567c14 --- /dev/null +++ b/examples/cuda_shared_memory/CMakeLists.txt @@ -0,0 +1,78 @@ +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR) +project(umf_example_cuda_shared_memory LANGUAGES C) +enable_testing() + +set(UMF_EXAMPLE_DIR "${CMAKE_SOURCE_DIR}/..") +list(APPEND CMAKE_MODULE_PATH "${UMF_EXAMPLE_DIR}/cmake") +message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}") + +find_package(PkgConfig) +pkg_check_modules(LIBUMF libumf) +if(NOT LIBUMF_FOUND) + find_package(LIBUMF REQUIRED libumf) +endif() + +pkg_check_modules(LIBHWLOC hwloc>=2.3.0) +if(NOT LIBHWLOC_FOUND) + find_package(LIBHWLOC 2.3.0 REQUIRED hwloc) +endif() + +find_package(CUDA REQUIRED cuda) + +include(FetchContent) + +set(CUDA_REPO "https://gitlab.com/nvidia/headers/cuda-individual/cudart.git") +set(CUDA_TAG cuda-12.5.1) + +message(STATUS "Fetching CUDA ${CUDA_TAG} from ${CUDA_REPO} ...") + +FetchContent_Declare( + cuda-headers + GIT_REPOSITORY ${CUDA_REPO} + GIT_TAG ${CUDA_TAG} + EXCLUDE_FROM_ALL) +FetchContent_MakeAvailable(cuda-headers) + +set(CUDA_INCLUDE_DIRS + ${cuda-headers_SOURCE_DIR} + CACHE PATH "Path to CUDA headers") +message(STATUS "CUDA include directory: ${CUDA_INCLUDE_DIRS}") + +# build the example +set(EXAMPLE_NAME umf_example_cuda_shared_memory) +add_executable(${EXAMPLE_NAME} cuda_shared_memory.c) +target_include_directories( + ${EXAMPLE_NAME} PRIVATE ${CUDA_INCLUDE_DIRS} ${LIBUMF_INCLUDE_DIRS} + ${UMF_EXAMPLE_DIR}/common) +target_link_directories( + ${EXAMPLE_NAME} + PRIVATE + ${LIBUMF_LIBRARY_DIRS} + ${LIBHWLOC_LIBRARY_DIRS} + ${CUDA_LIBRARY_DIRS}) +target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") +target_link_libraries( + ${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a ${CUDA_LIBRARIES} + ${LIBUMF_LIBRARIES}) + +# an optional part - adds a test of this example +add_test( + NAME ${EXAMPLE_NAME} + COMMAND ${EXAMPLE_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +set_tests_properties(${EXAMPLE_NAME} PROPERTIES LABELS "example-standalone") + +if(LINUX) + # set LD_LIBRARY_PATH + set_property( + TEST ${EXAMPLE_NAME} + PROPERTY + ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS}" + ) +endif() diff --git a/examples/cuda_shared_memory/cuda_shared_memory.c b/examples/cuda_shared_memory/cuda_shared_memory.c new file mode 100644 index 000000000..50c8f9240 --- /dev/null +++ b/examples/cuda_shared_memory/cuda_shared_memory.c @@ -0,0 +1,183 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include + +#include +#include +#include + +// disable warning 4201: nonstandard extension used: nameless struct/union +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4201) +#endif // _MSC_VER + +#include + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif // _MSC_VER + +int main(void) { + // A result object for storing UMF API result status + umf_result_t res; + + CUdevice cuDevice; + CUcontext cuContext; + int ret = 0; + + // Initialize the CUDA driver API + cuInit(0); + + // Get the handle to the first CUDA device + cuDeviceGet(&cuDevice, 0); + + // Create a context on the device + cuCtxCreate(&cuContext, 0, cuDevice); + + // Setup parameters for the CUDA Memory Provider. It will be used for + // allocating memory from CUDA devices. + umf_cuda_memory_provider_params_handle_t cu_memory_provider_params = NULL; + res = umfCUDAMemoryProviderParamsCreate(&cu_memory_provider_params); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create memory provider params!\n"); + ret = -1; + goto cuda_destroy; + } + + res = umfCUDAMemoryProviderParamsSetContext(cu_memory_provider_params, + cuContext); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set context in memory provider params!\n"); + ret = -1; + goto provider_params_destroy; + } + + res = umfCUDAMemoryProviderParamsSetDevice(cu_memory_provider_params, + cuDevice); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set device in memory provider params!\n"); + ret = -1; + goto provider_params_destroy; + } + // Set the memory type to shared to allow the memory to be accessed on both + // CPU and GPU. + res = umfCUDAMemoryProviderParamsSetMemoryType(cu_memory_provider_params, + UMF_MEMORY_TYPE_SHARED); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "Failed to set memory type in memory provider params!\n"); + ret = -1; + goto provider_params_destroy; + } + + // Create CUDA memory provider + umf_memory_provider_handle_t cu_memory_provider; + res = + umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), + cu_memory_provider_params, &cu_memory_provider); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create a memory provider!\n"); + ret = -1; + goto provider_params_destroy; + } + + printf("CUDA memory provider created at %p\n", (void *)cu_memory_provider); + + // Setup parameters for the Disjoint Pool. It will be used for managing the + // memory allocated using memory provider. + umf_disjoint_pool_params_handle_t hDisjointParams = NULL; + res = umfDisjointPoolParamsCreate(&hDisjointParams); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "disjoint pool params create failed\n"); + ret = -1; + goto memory_provider_destroy; + } + // Set the Slab Min Size to 64KB - the page size for GPU allocations + res = umfDisjointPoolParamsSetSlabMinSize(hDisjointParams, 64 * 1024L); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set the slab min size!\n"); + ret = -1; + goto pool_params_destroy; + } + // We would keep only single slab per each allocation bucket + res = umfDisjointPoolParamsSetCapacity(hDisjointParams, 1); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set the capacity!\n"); + ret = -1; + goto pool_params_destroy; + } + // Set the maximum poolable size to 64KB - objects with size above this + // limit will not be stored/allocated from the pool. + res = umfDisjointPoolParamsSetMaxPoolableSize(hDisjointParams, 64 * 1024L); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set the max poolable size!\n"); + ret = -1; + goto pool_params_destroy; + } + // Enable tracing + res = umfDisjointPoolParamsSetTrace(hDisjointParams, 1); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set the pool trace!\n"); + ret = -1; + goto pool_params_destroy; + } + + // Create Disjoint Pool memory pool. + umf_memory_pool_handle_t cu_disjoint_memory_pool; + res = + umfPoolCreate(umfDisjointPoolOps(), cu_memory_provider, hDisjointParams, + UMF_POOL_CREATE_FLAG_NONE, &cu_disjoint_memory_pool); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create a memory pool!\n"); + ret = -1; + goto memory_provider_destroy; + } + + printf("Disjoint Pool created at %p\n", (void *)cu_disjoint_memory_pool); + + // Allocate some memory from the pool + int *ptr = umfPoolMalloc(cu_disjoint_memory_pool, sizeof(int)); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to allocate memory from the memory pool!\n"); + ret = -1; + goto memory_pool_destroy; + } + + // Use allocated memory + *ptr = 1; + + // Free allocated memory + res = umfFree(ptr); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to free memory to the pool!\n"); + ret = -1; + goto memory_pool_destroy; + } + printf("Freed memory at %p\n", (void *)ptr); + + // Cleanup +memory_pool_destroy: + umfPoolDestroy(cu_disjoint_memory_pool); + +pool_params_destroy: + umfDisjointPoolParamsDestroy(hDisjointParams); + +memory_provider_destroy: + umfMemoryProviderDestroy(cu_memory_provider); + +provider_params_destroy: + umfCUDAMemoryProviderParamsDestroy(cu_memory_provider_params); + +cuda_destroy: + ret = cuCtxDestroy(cuContext); + return ret; +} diff --git a/examples/custom_file_provider/CMakeLists.txt b/examples/custom_file_provider/CMakeLists.txt new file mode 100644 index 000000000..9d4e336c7 --- /dev/null +++ b/examples/custom_file_provider/CMakeLists.txt @@ -0,0 +1,52 @@ +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR) +project(umf_example_custom_file_provider LANGUAGES C) +enable_testing() + +set(UMF_EXAMPLE_DIR "${CMAKE_SOURCE_DIR}/..") +list(APPEND CMAKE_MODULE_PATH "${UMF_EXAMPLE_DIR}/cmake") +message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}") + +find_package(PkgConfig) +pkg_check_modules(LIBUMF libumf) +if(NOT LIBUMF_FOUND) + find_package(LIBUMF REQUIRED libumf) +endif() + +pkg_check_modules(LIBHWLOC hwloc>=2.3.0) +if(NOT LIBHWLOC_FOUND) + find_package(LIBHWLOC 2.3.0 REQUIRED hwloc) +endif() + +pkg_check_modules(TBB tbb) +if(NOT TBB_FOUND) + find_package(TBB REQUIRED tbb) +endif() + +# build the example +set(EXAMPLE_NAME umf_example_custom_file_provider) +add_executable(${EXAMPLE_NAME} custom_file_provider.c) +target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS}) +target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBHWLOC_LIBRARY_DIRS}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARIES} + ${LIBHWLOC_LIBRARIES}) + +add_test( + NAME ${EXAMPLE_NAME} + COMMAND ${EXAMPLE_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +set_tests_properties(${EXAMPLE_NAME} PROPERTIES LABELS "example-standalone") + +if(LINUX) + # set LD_LIBRARY_PATH + set_property( + TEST ${EXAMPLE_NAME} + PROPERTY + ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS}" + ) +endif() diff --git a/examples/custom_file_provider/custom_file_provider.c b/examples/custom_file_provider/custom_file_provider.c new file mode 100644 index 000000000..ffa61d63f --- /dev/null +++ b/examples/custom_file_provider/custom_file_provider.c @@ -0,0 +1,345 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ +#define _GNU_SOURCE 1 + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +// Define the size for address reservation +#define ADDRESS_RESERVATION ((size_t)16 * 1024 * 1024 * 1024) + +// Macro to align a value up to the nearest multiple of align +#define ALIGN_UP(value, align) (((value) + (align)-1) & ~((align)-1)) + +// Struct to represent the file provider +typedef struct file_provider_t { + void *ptr; // Pointer to the reserved memory + size_t poffset; // Offset for the next allocation + int fd; // File descriptor for the backing file + size_t foffset; // Offset within the file for the next allocation + size_t page_size; // System page size +} file_provider_t; + +// Struct to represent the file parameters +typedef struct file_params_t { + const char *filename; // Filename for the backing file +} file_params_t; + +// Function to initialize the file provider +static umf_result_t file_init(void *params, void **provider) { + file_provider_t *file_provider = NULL; + + if (params == NULL || provider == NULL) { + fprintf(stderr, "Params or provider cannot be null\n"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + file_params_t *file_params = (file_params_t *)params; + int page_size = 0; + umf_result_t ret = UMF_RESULT_SUCCESS; + + // Allocate memory for the file provider + file_provider = malloc(sizeof(*file_provider)); + if (!file_provider) { + fprintf(stderr, "Failed to allocate memory for file provider\n"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + // Open the file + file_provider->fd = open(file_params->filename, O_RDWR | O_CREAT, 0666); + if (file_provider->fd < 0) { + perror("Failed to open file"); + ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto cleanup_malloc; + } + + // Reserve address space for subsequent allocations. + // This simplifies translation between addresses and offset in the file. + file_provider->ptr = mmap(NULL, ADDRESS_RESERVATION, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (file_provider->ptr == MAP_FAILED) { + perror("Failed to memory map anonymous memory"); + ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + goto cleanup_fd; + } + + // Get the page size + page_size = sysconf(_SC_PAGESIZE); + if (page_size < 0) { + perror("Failed to get system page size"); + ret = UMF_RESULT_ERROR_UNKNOWN; + goto cleanup_mmap; + } + + // Initialize the file provider fields + file_provider->poffset = 0; + file_provider->foffset = 0; + file_provider->page_size = (size_t)page_size; + *provider = file_provider; + + return UMF_RESULT_SUCCESS; + +cleanup_mmap: + munmap(file_provider->ptr, ADDRESS_RESERVATION); +cleanup_fd: + close(file_provider->fd); +cleanup_malloc: + free(file_provider); + return ret; +} + +// Function to deinitialize the file provider +static void file_deinit(void *provider) { + file_provider_t *file_provider = (file_provider_t *)provider; + munmap(file_provider->ptr, ADDRESS_RESERVATION); + close(file_provider->fd); + free(file_provider); +} + +// Function to allocate memory from the file provider +static umf_result_t file_alloc(void *provider, size_t size, size_t alignment, + void **ptr) { + file_provider_t *file_provider = (file_provider_t *)provider; + size_t page_size = file_provider->page_size; + + if (alignment && (alignment % page_size) && (page_size % alignment)) { + fprintf(stderr, + "Wrong alignment: %zu (not a multiple or a divider of the " + "minimum page size (%zu))", + alignment, page_size); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + size = ALIGN_UP(size, page_size); + + // calculate address for new allocation. All allocation are page aligned so + // if alignment is bigger than page size we have to adjust the address + uintptr_t ptr_offset = + (uintptr_t)file_provider->ptr + file_provider->poffset; + uintptr_t aligned_ptr = + alignment > page_size ? ALIGN_UP(ptr_offset, alignment) : ptr_offset; + + size_t new_offset = aligned_ptr + size - (uintptr_t)file_provider->ptr; + if (new_offset + size > ADDRESS_RESERVATION) { + fprintf(stderr, "This example limits allocation up to 10GB\n"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + // Ensure the file is large enough to hold the new allocation + if (fallocate(file_provider->fd, 0, file_provider->foffset, size)) { + perror("Fallocate failed"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + // Map the file in place of the reservation + void *ret = mmap((void *)aligned_ptr, size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE, file_provider->fd, + file_provider->foffset); + if (ret == MAP_FAILED) { + perror("Memory map failed"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + file_provider->poffset = new_offset; + file_provider->foffset += size; + *ptr = ret; + return UMF_RESULT_SUCCESS; +} + +// Function to free allocated memory from the file provider +static umf_result_t file_free(void *provider, void *ptr, size_t size) { + if (ptr == NULL) { + fprintf(stderr, "ptr cannot be null\n"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + file_provider_t *file_provider = (file_provider_t *)provider; + if (size == 0) { + fprintf(stderr, "Size cannot be 0\n"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (ptr < file_provider->ptr || + (uintptr_t)ptr >= + (uintptr_t)file_provider->ptr + file_provider->poffset) { + fprintf(stderr, "Ptr is not within the provider's memory\n"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + size = ALIGN_UP(size, file_provider->page_size); + + // Replace allocation with a reservation to free memory + void *ptr2 = mmap(ptr, size, PROT_NONE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (ptr2 == MAP_FAILED) { + perror("Failed to free memory"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + // Free allocated blocks to the filesystem + if (fallocate(file_provider->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + (uintptr_t)ptr - (uintptr_t)file_provider->ptr, size)) { + perror("Fallocate failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + return UMF_RESULT_SUCCESS; +} + +// Function to get the name of the file provider +static const char *file_get_name(void *provider) { + (void)provider; // Unused parameter + return "File Provider"; +} + +// Function to get the last native error of the file provider +// This function is needed only if the provider returns UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC +static void file_get_last_native_error(void *provider, const char **ppMessage, + int32_t *pError) { + (void)provider; // Unused parameter + *ppMessage = ""; + *pError = 0; +} + +// Function to get the recommended page size of the file provider +static umf_result_t file_get_recommended_page_size(void *provider, size_t size, + size_t *pageSize) { + (void)size; // Unused parameter + file_provider_t *file_provider = (file_provider_t *)provider; + *pageSize = file_provider->page_size; + return UMF_RESULT_SUCCESS; +} + +// Function to get the minimum page size of the file provider +static umf_result_t file_get_min_page_size(void *provider, void *ptr, + size_t *pageSize) { + (void)ptr; // Unused parameter + file_provider_t *file_provider = (file_provider_t *)provider; + *pageSize = file_provider->page_size; + return UMF_RESULT_SUCCESS; +} + +// File provider operations +static umf_memory_provider_ops_t file_ops = { + .version = UMF_VERSION_CURRENT, + .initialize = file_init, + .finalize = file_deinit, + .alloc = file_alloc, + .get_name = file_get_name, + .get_last_native_error = file_get_last_native_error, + .get_recommended_page_size = file_get_recommended_page_size, + .get_min_page_size = file_get_min_page_size, + .ext.free = file_free, +}; + +// Main function +int main(void) { + // A result object for storing UMF API result status + umf_result_t res; + umf_memory_provider_handle_t provider; + file_params_t params; + params.filename = "/tmp/file_provider_example"; + + // Create a memory provider + res = umfMemoryProviderCreate(&file_ops, ¶ms, &provider); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create a memory provider!\n"); + return -1; + } + printf("OS memory provider created at %p\n", (void *)provider); + + // Allocate memory from the memory provider + size_t alloc_size = 5000; + size_t alignment = 0; + void *ptr_provider = NULL; + + res = + umfMemoryProviderAlloc(provider, alloc_size, alignment, &ptr_provider); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "Failed to allocate memory from the memory provider!\n"); + goto memory_provider_destroy; + } + + const char *strSource = "Allocated memory at"; + + // Write to the allocated memory + memset(ptr_provider, '\0', alloc_size); + strncpy(ptr_provider, strSource, alloc_size); + printf("%s %p with the memory provider at %p\n", (char *)ptr_provider, + (void *)ptr_provider, (void *)provider); + + // Free the allocated memory + res = umfMemoryProviderFree(provider, ptr_provider, alloc_size); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to free memory to the provider!\n"); + goto memory_provider_destroy; + } + printf("Freed memory at %p\n", ptr_provider); + + // Create a memory pool + umf_memory_pool_ops_t *pool_ops = umfScalablePoolOps(); + void *pool_params = NULL; + umf_pool_create_flags_t flags = 0; + umf_memory_pool_handle_t pool; + + res = umfPoolCreate(pool_ops, provider, pool_params, flags, &pool); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create a pool!\n"); + goto memory_provider_destroy; + } + printf("Scalable memory pool created at %p\n", (void *)pool); + + // Allocate some memory in the pool + size_t num = 1; + alloc_size = 128; + + char *ptr = umfPoolCalloc(pool, num, alloc_size); + if (!ptr) { + fprintf(stderr, "Failed to allocate memory in the pool!\n"); + goto memory_pool_destroy; + } + + // Write a string to the allocated memory + strncpy(ptr, strSource, alloc_size); + printf("%s %p\n", ptr, (void *)ptr); + + // Retrieve a memory pool from a pointer, available with memory tracking + umf_memory_pool_handle_t check_pool = umfPoolByPtr(ptr); + printf("Memory at %p has been allocated from the pool at %p\n", (void *)ptr, + (void *)check_pool); + + // Retrieve a memory provider from a pool + umf_memory_provider_handle_t check_provider; + res = umfPoolGetMemoryProvider(pool, &check_provider); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to retrieve a memory provider for the pool!\n"); + goto memory_pool_destroy; + } + printf("Pool at %p has been allocated from the provider at %p\n", + (void *)pool, (void *)check_provider); + + // Clean up + umfFree(ptr); + umfPoolDestroy(pool); + umfMemoryProviderDestroy(provider); + return 0; + +memory_pool_destroy: + umfPoolDestroy(pool); +memory_provider_destroy: + umfMemoryProviderDestroy(provider); + return -1; +} diff --git a/examples/dram_and_fsdax/CMakeLists.txt b/examples/dram_and_fsdax/CMakeLists.txt new file mode 100644 index 000000000..0d0bf2593 --- /dev/null +++ b/examples/dram_and_fsdax/CMakeLists.txt @@ -0,0 +1,61 @@ +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR) +project(umf_example_dram_and_fsdax LANGUAGES C) +enable_testing() + +set(UMF_EXAMPLE_DIR "${CMAKE_SOURCE_DIR}/..") +list(APPEND CMAKE_MODULE_PATH "${UMF_EXAMPLE_DIR}/cmake") +message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}") + +find_package(PkgConfig) +pkg_check_modules(LIBUMF libumf) +if(NOT LIBUMF_FOUND) + find_package(LIBUMF REQUIRED libumf) +endif() + +pkg_check_modules(LIBHWLOC hwloc>=2.3.0) +if(NOT LIBHWLOC_FOUND) + find_package(LIBHWLOC 2.3.0 REQUIRED hwloc) +endif() + +pkg_check_modules(JEMALLOC jemalloc) +if(NOT JEMALLOC_FOUND) + find_package(JEMALLOC REQUIRED jemalloc) +endif() + +# build the example +set(EXAMPLE_NAME umf_example_dram_and_fsdax) +add_executable(${EXAMPLE_NAME} dram_and_fsdax.c) +target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS}) + +target_link_directories( + ${EXAMPLE_NAME} + PRIVATE + ${LIBUMF_LIBRARY_DIRS} + ${LIBHWLOC_LIBRARY_DIRS} + ${JEMALLOC_LIBRARY_DIRS}) + +target_link_libraries( + ${EXAMPLE_NAME} PRIVATE hwloc jemalloc_pool ${JEMALLOC_LIBRARIES} + ${LIBUMF_LIBRARIES}) + +# an optional part - adds a test of this example +add_test( + NAME ${EXAMPLE_NAME} + COMMAND ${EXAMPLE_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +set_tests_properties(${EXAMPLE_NAME} PROPERTIES LABELS "example-standalone") + +if(LINUX) + # set LD_LIBRARY_PATH + set_property( + TEST ${EXAMPLE_NAME} + PROPERTY + ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${JEMALLOC_LIBRARY_DIRS}" + ) +endif() diff --git a/examples/dram_and_fsdax/dram_and_fsdax.c b/examples/dram_and_fsdax/dram_and_fsdax.c new file mode 100644 index 000000000..26f451728 --- /dev/null +++ b/examples/dram_and_fsdax/dram_and_fsdax.c @@ -0,0 +1,197 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +static umf_memory_pool_handle_t create_dram_pool(void) { + umf_memory_provider_handle_t provider_dram = NULL; + umf_memory_pool_handle_t pool_dram; + umf_result_t umf_result; + + umf_os_memory_provider_params_handle_t params_dram = NULL; + umf_result = umfOsMemoryProviderParamsCreate(¶ms_dram); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create OS memory provider params!\n"); + return NULL; + } + + umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), params_dram, + &provider_dram); + umfOsMemoryProviderParamsDestroy(params_dram); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Creation of the OS memory provider failed"); + return NULL; + } + + // Create a DRAM memory pool + umf_result = umfPoolCreate(umfJemallocPoolOps(), provider_dram, NULL, + UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &pool_dram); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create a DRAM memory pool!\n"); + umfMemoryProviderDestroy(provider_dram); + return NULL; + } + + return pool_dram; +} + +static umf_memory_pool_handle_t create_fsdax_pool(const char *path) { + umf_memory_provider_handle_t provider_fsdax = NULL; + umf_memory_pool_handle_t pool_fsdax; + umf_result_t umf_result; + + umf_file_memory_provider_params_handle_t params_fsdax = NULL; + umf_result = umfFileMemoryProviderParamsCreate(¶ms_fsdax, path); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create the File Memory Provider params"); + return NULL; + } + // FSDAX requires mapping the UMF_MEM_MAP_SHARED flag + umf_result = umfFileMemoryProviderParamsSetVisibility(params_fsdax, + UMF_MEM_MAP_SHARED); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "Failed to set the visibility of the FSDAX file provider"); + umfFileMemoryProviderParamsDestroy(params_fsdax); + return NULL; + } + + umf_result = umfMemoryProviderCreate(umfFileMemoryProviderOps(), + params_fsdax, &provider_fsdax); + umfFileMemoryProviderParamsDestroy(params_fsdax); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create the FSDAX file provider"); + return NULL; + } + + // Create an FSDAX memory pool + // + // The file memory provider does not support the free operation + // (`umfMemoryProviderFree()` always returns `UMF_RESULT_ERROR_NOT_SUPPORTED`), + // so it should be used with a pool manager that will take over + // the managing of the provided memory - for example the jemalloc pool + // with the `disable_provider_free` parameter set to true. + umf_jemalloc_pool_params_handle_t pool_params; + umf_result = umfJemallocPoolParamsCreate(&pool_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create jemalloc params!\n"); + umfMemoryProviderDestroy(provider_fsdax); + return NULL; + } + umf_result = umfJemallocPoolParamsSetKeepAllMemory(pool_params, true); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set KeepAllMemory!\n"); + umfMemoryProviderDestroy(provider_fsdax); + return NULL; + } + + // Create an FSDAX memory pool + umf_result = + umfPoolCreate(umfJemallocPoolOps(), provider_fsdax, pool_params, + UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &pool_fsdax); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create an FSDAX memory pool!\n"); + umfMemoryProviderDestroy(provider_fsdax); + return NULL; + } + + umf_result = umfJemallocPoolParamsDestroy(pool_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to destroy jemalloc params!\n"); + } + + return pool_fsdax; +} + +int main(void) { + int ret = -1; + + // This example requires: + // - the FSDAX device to be mounted in the OS (e.g. /mnt/pmem1) and + // - the UMF_TESTS_FSDAX_PATH environment variable to contain + // a path to a file on this FSDAX device. + char *path = getenv("UMF_TESTS_FSDAX_PATH"); + if (path == NULL || path[0] == 0) { + fprintf( + stderr, + "Warning: UMF_TESTS_FSDAX_PATH is not set, skipping testing ...\n"); + return 0; + } + + umf_memory_pool_handle_t dram_pool = create_dram_pool(); + if (dram_pool == NULL) { + fprintf(stderr, "Failed to create a DRAM memory pool!\n"); + return -1; + } + + fprintf(stderr, "Created a DRAM memory pool\n"); + + umf_memory_pool_handle_t fsdax_pool = create_fsdax_pool(path); + if (fsdax_pool == NULL) { + fprintf(stderr, "Failed to create an FSDAX memory pool!\n"); + goto err_destroy_dram_pool; + } + + fprintf(stderr, "Created an FSDAX memory pool\n"); + + size_t size = 2 * 1024 * 1024; // == 2 MB + + // Allocate from the DRAM memory pool + char *dram_buf = umfPoolCalloc(dram_pool, 1, size); + if (dram_buf == NULL) { + fprintf(stderr, + "Failed to allocate memory from the DRAM memory pool!\n"); + goto err_destroy_pools; + } + + fprintf(stderr, "Allocated memory from the DRAM memory pool\n"); + + // Allocate from the FSDAX memory pool + char *fsdax_buf = umfPoolCalloc(fsdax_pool, 1, size); + if (fsdax_buf == NULL) { + fprintf(stderr, + "Failed to allocate memory from the FSDAX memory pool!\n"); + goto err_free_dram; + } + + fprintf(stderr, "Allocated memory from the FSDAX memory pool\n"); + + // Use the allocation from DRAM + dram_buf[0] = '.'; + + // Use the allocation from FSDAX + fsdax_buf[0] = '.'; + + // success + ret = 0; + + // The file memory provider does not support the free() operation, + // so we do not need to call: umfPoolFree(fsdax_pool, fsdax_buf); + +err_free_dram: + fprintf(stderr, "Freeing the allocation from the DRAM memory pool ...\n"); + umfPoolFree(dram_pool, dram_buf); + +err_destroy_pools: + fprintf(stderr, "Destroying the FSDAX memory pool ...\n"); + umfPoolDestroy(fsdax_pool); + +err_destroy_dram_pool: + fprintf(stderr, "Destroying the DRAM memory pool ...\n"); + umfPoolDestroy(dram_pool); + + return ret; +} diff --git a/examples/ipc_ipcapi/ipc_ipcapi_consumer.c b/examples/ipc_ipcapi/ipc_ipcapi_consumer.c index 05596bd16..2f55c473f 100644 --- a/examples/ipc_ipcapi/ipc_ipcapi_consumer.c +++ b/examples/ipc_ipcapi/ipc_ipcapi_consumer.c @@ -99,17 +99,33 @@ int main(int argc, char *argv[]) { int port = atoi(argv[1]); umf_memory_provider_handle_t OS_memory_provider = NULL; - umf_os_memory_provider_params_t os_params; + umf_os_memory_provider_params_handle_t os_params = NULL; enum umf_result_t umf_result; - os_params = umfOsMemoryProviderParamsDefault(); - os_params.visibility = UMF_MEM_MAP_SHARED; + umf_result = umfOsMemoryProviderParamsCreate(&os_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf( + stderr, + "[consumer] ERROR: creating OS memory provider params failed\n"); + return -1; + } + umf_result = + umfOsMemoryProviderParamsSetVisibility(os_params, UMF_MEM_MAP_SHARED); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[consumer] ERROR: setting visibility mode failed\n"); + goto err_destroy_OS_params; + } if (argc >= 3) { - os_params.shm_name = argv[2]; + umf_result = umfOsMemoryProviderParamsSetShmName(os_params, argv[2]); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "[consumer] ERROR: setting shared memory name failed\n"); + goto err_destroy_OS_params; + } } // create OS memory provider - umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), &os_params, + umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), os_params, &OS_memory_provider); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, @@ -126,6 +142,13 @@ int main(int argc, char *argv[]) { goto err_destroy_OS_memory_provider; } + umf_ipc_handler_handle_t ipc_handler; + umf_result = umfPoolGetIPCHandler(scalable_pool, &ipc_handler); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[producer] ERROR: get IPC handler failed\n"); + goto err_destroy_scalable_pool; + } + // connect to the producer producer_socket = consumer_connect_to_producer(port); if (producer_socket < 0) { @@ -193,7 +216,7 @@ int main(int argc, char *argv[]) { len); void *SHM_ptr; - umf_result = umfOpenIPCHandle(scalable_pool, IPC_handle, &SHM_ptr); + umf_result = umfOpenIPCHandle(ipc_handler, IPC_handle, &SHM_ptr); if (umf_result == UMF_RESULT_ERROR_NOT_SUPPORTED) { fprintf(stderr, "[consumer] SKIP: opening the IPC handle is not supported\n"); @@ -267,6 +290,9 @@ int main(int argc, char *argv[]) { err_destroy_OS_memory_provider: umfMemoryProviderDestroy(OS_memory_provider); +err_destroy_OS_params: + umfOsMemoryProviderParamsDestroy(os_params); + if (ret == 0) { fprintf(stderr, "[consumer] Shutting down (status OK) ...\n"); } else if (ret == 1) { diff --git a/examples/ipc_ipcapi/ipc_ipcapi_producer.c b/examples/ipc_ipcapi/ipc_ipcapi_producer.c index fcb73650f..4157e8284 100644 --- a/examples/ipc_ipcapi/ipc_ipcapi_producer.c +++ b/examples/ipc_ipcapi/ipc_ipcapi_producer.c @@ -70,17 +70,33 @@ int main(int argc, char *argv[]) { int port = atoi(argv[1]); umf_memory_provider_handle_t OS_memory_provider = NULL; - umf_os_memory_provider_params_t os_params; + umf_os_memory_provider_params_handle_t os_params = NULL; enum umf_result_t umf_result; - os_params = umfOsMemoryProviderParamsDefault(); - os_params.visibility = UMF_MEM_MAP_SHARED; + umf_result = umfOsMemoryProviderParamsCreate(&os_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf( + stderr, + "[producer] ERROR: creating OS memory provider params failed\n"); + return -1; + } + umf_result = + umfOsMemoryProviderParamsSetVisibility(os_params, UMF_MEM_MAP_SHARED); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[producer] ERROR: setting visibility mode failed\n"); + goto err_destroy_OS_params; + } if (argc >= 3) { - os_params.shm_name = argv[2]; + umf_result = umfOsMemoryProviderParamsSetShmName(os_params, argv[2]); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "[producer] ERROR: setting shared memory name failed\n"); + goto err_destroy_OS_params; + } } // create OS memory provider - umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), &os_params, + umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), os_params, &OS_memory_provider); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, @@ -240,6 +256,9 @@ int main(int argc, char *argv[]) { err_destroy_OS_memory_provider: umfMemoryProviderDestroy(OS_memory_provider); +err_destroy_OS_params: + umfOsMemoryProviderParamsDestroy(os_params); + if (ret == 0) { fprintf(stderr, "[producer] Shutting down (status OK) ...\n"); } else if (ret == 1) { diff --git a/examples/ipc_ipcapi/ipc_ipcapi_shm.sh b/examples/ipc_ipcapi/ipc_ipcapi_shm.sh index db310d08d..57a344c1e 100755 --- a/examples/ipc_ipcapi/ipc_ipcapi_shm.sh +++ b/examples/ipc_ipcapi/ipc_ipcapi_shm.sh @@ -20,7 +20,7 @@ UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" rm -f /dev/shm/${SHM_NAME} echo "Starting ipc_ipcapi_shm CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_example_ipc_ipcapi_consumer $PORT $SHM_NAME & +UMF_LOG=$UMF_LOG_VAL ./umf_example_ipc_ipcapi_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 diff --git a/examples/ipc_level_zero/CMakeLists.txt b/examples/ipc_level_zero/CMakeLists.txt index e38adf25f..5c17d4c9c 100644 --- a/examples/ipc_level_zero/CMakeLists.txt +++ b/examples/ipc_level_zero/CMakeLists.txt @@ -24,11 +24,11 @@ endif() include(FetchContent) set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") -set(LEVEL_ZERO_LOADER_TAG v1.16.1) +set(LEVEL_ZERO_LOADER_TAG v1.19.2) message( STATUS - "Installing level-zero ${LEVEL_ZERO_LOADER_TAG} from ${LEVEL_ZERO_LOADER_REPO} ..." + "Fetching L0 loader (${LEVEL_ZERO_LOADER_TAG}) from ${LEVEL_ZERO_LOADER_REPO} ..." ) FetchContent_Declare( @@ -36,11 +36,7 @@ FetchContent_Declare( GIT_REPOSITORY ${LEVEL_ZERO_LOADER_REPO} GIT_TAG ${LEVEL_ZERO_LOADER_TAG} EXCLUDE_FROM_ALL) - -FetchContent_GetProperties(level-zero-loader) -if(NOT level-zero-loader_POPULATED) - FetchContent_Populate(level-zero-loader) -endif() +FetchContent_MakeAvailable(level-zero-loader) set(LEVEL_ZERO_INCLUDE_DIRS ${level-zero-loader_SOURCE_DIR}/include @@ -49,7 +45,9 @@ message(STATUS "Level Zero include directory: ${LEVEL_ZERO_INCLUDE_DIRS}") # build the example set(EXAMPLE_NAME umf_example_ipc_level_zero) -add_executable(${EXAMPLE_NAME} ipc_level_zero.c) +add_executable( + ${EXAMPLE_NAME} ipc_level_zero.c + ${UMF_EXAMPLE_DIR}/common/examples_level_zero_helpers.c) target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} ${UMF_EXAMPLE_DIR}/common) target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} diff --git a/examples/ipc_level_zero/ipc_level_zero.c b/examples/ipc_level_zero/ipc_level_zero.c index e7991fce6..9579244ab 100644 --- a/examples/ipc_level_zero/ipc_level_zero.c +++ b/examples/ipc_level_zero/ipc_level_zero.c @@ -8,44 +8,90 @@ */ #include +#include #include "umf/ipc.h" #include "umf/memory_pool.h" #include "umf/pools/pool_disjoint.h" #include "umf/providers/provider_level_zero.h" -#include "utils_level_zero.h" +#include "examples_level_zero_helpers.h" int create_level_zero_pool(ze_context_handle_t context, ze_device_handle_t device, umf_memory_pool_handle_t *pool) { // setup params - level_zero_memory_provider_params_t params; - params.level_zero_context_handle = context; - params.level_zero_device_handle = device; - params.memory_type = UMF_MEMORY_TYPE_DEVICE; + umf_level_zero_memory_provider_params_handle_t provider_params = NULL; + + umf_result_t umf_result = + umfLevelZeroMemoryProviderParamsCreate(&provider_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "ERROR: Failed to create Level Zero memory provider params!\n"); + return -1; + } + + umf_result = + umfLevelZeroMemoryProviderParamsSetContext(provider_params, context); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "ERROR: Failed to set context in Level Zero memory " + "provider params!\n"); + umfLevelZeroMemoryProviderParamsDestroy(provider_params); + return -1; + } + + umf_result = + umfLevelZeroMemoryProviderParamsSetDevice(provider_params, device); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "ERROR: Failed to set device in Level Zero memory " + "provider params!\n"); + umfLevelZeroMemoryProviderParamsDestroy(provider_params); + return -1; + } + + umf_result = umfLevelZeroMemoryProviderParamsSetMemoryType( + provider_params, UMF_MEMORY_TYPE_DEVICE); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "ERROR: Failed to set memory type in Level Zero memory " + "provider params!\n"); + umfLevelZeroMemoryProviderParamsDestroy(provider_params); + return -1; + } // create Level Zero provider umf_memory_provider_handle_t provider = 0; - umf_result_t umf_result = umfMemoryProviderCreate( - umfLevelZeroMemoryProviderOps(), ¶ms, &provider); + umf_result = umfMemoryProviderCreate(umfLevelZeroMemoryProviderOps(), + provider_params, &provider); + umfLevelZeroMemoryProviderParamsDestroy(provider_params); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "ERROR: Failed to create Level Zero memory provider!\n"); return -1; } + umf_disjoint_pool_params_handle_t disjoint_params = NULL; + umf_result = umfDisjointPoolParamsCreate(&disjoint_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "ERROR: Failed to create pool params!\n"); + goto provider_destroy; + } + // create pool umf_pool_create_flags_t flags = UMF_POOL_CREATE_FLAG_OWN_PROVIDER; - umf_disjoint_pool_params_t disjoint_params = umfDisjointPoolParamsDefault(); - umf_result = umfPoolCreate(umfDisjointPoolOps(), provider, &disjoint_params, + umf_result = umfPoolCreate(umfDisjointPoolOps(), provider, disjoint_params, flags, pool); + umfDisjointPoolParamsDestroy(disjoint_params); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "ERROR: Failed to create pool!\n"); - return -1; + goto provider_destroy; } return 0; + +provider_destroy: + umfMemoryProviderDestroy(provider); + + return -1; } int main(void) { @@ -134,14 +180,21 @@ int main(void) { fprintf(stdout, "Consumer pool created.\n"); + umf_ipc_handler_handle_t ipc_handler = 0; + umf_result = umfPoolGetIPCHandler(consumer_pool, &ipc_handler); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "ERROR: Failed to get IPC handler!\n"); + return -1; + } + void *mapped_buf = NULL; - umf_result = umfOpenIPCHandle(consumer_pool, ipc_handle, &mapped_buf); + umf_result = umfOpenIPCHandle(ipc_handler, ipc_handle, &mapped_buf); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "ERROR: Failed to open IPC handle!\n"); return -1; } - fprintf(stdout, "IPC handle opened in the consumer pool.\n"); + fprintf(stdout, "IPC handle opened.\n"); size_t *tmp_buf = malloc(BUFFER_SIZE); ret = level_zero_copy(consumer_context, device, tmp_buf, mapped_buf, diff --git a/examples/gpu_shared_memory/CMakeLists.txt b/examples/level_zero_shared_memory/CMakeLists.txt similarity index 83% rename from examples/gpu_shared_memory/CMakeLists.txt rename to examples/level_zero_shared_memory/CMakeLists.txt index 259b47d08..3711b4094 100644 --- a/examples/gpu_shared_memory/CMakeLists.txt +++ b/examples/level_zero_shared_memory/CMakeLists.txt @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR) -project(umf_example_gpu_shared_memory LANGUAGES C) +project(umf_example_level_zero_shared_memory LANGUAGES C) enable_testing() set(UMF_EXAMPLE_DIR "${CMAKE_SOURCE_DIR}/..") @@ -24,11 +24,11 @@ endif() include(FetchContent) set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") -set(LEVEL_ZERO_LOADER_TAG v1.16.1) +set(LEVEL_ZERO_LOADER_TAG v1.19.2) message( STATUS - "Installing level-zero ${LEVEL_ZERO_LOADER_TAG} from ${LEVEL_ZERO_LOADER_REPO} ..." + "Fetching L0 loader (${LEVEL_ZERO_LOADER_TAG}) from ${LEVEL_ZERO_LOADER_REPO} ..." ) FetchContent_Declare( @@ -36,11 +36,7 @@ FetchContent_Declare( GIT_REPOSITORY ${LEVEL_ZERO_LOADER_REPO} GIT_TAG ${LEVEL_ZERO_LOADER_TAG} EXCLUDE_FROM_ALL) - -FetchContent_GetProperties(level-zero-loader) -if(NOT level-zero-loader_POPULATED) - FetchContent_Populate(level-zero-loader) -endif() +FetchContent_MakeAvailable(level-zero-loader) set(LEVEL_ZERO_INCLUDE_DIRS ${level-zero-loader_SOURCE_DIR}/include @@ -48,8 +44,10 @@ set(LEVEL_ZERO_INCLUDE_DIRS message(STATUS "Level Zero include directory: ${LEVEL_ZERO_INCLUDE_DIRS}") # build the example -set(EXAMPLE_NAME umf_example_gpu_shared_memory) -add_executable(${EXAMPLE_NAME} gpu_shared_memory.c) +set(EXAMPLE_NAME umf_example_level_zero_shared_memory) +add_executable( + ${EXAMPLE_NAME} level_zero_shared_memory.c + ${UMF_EXAMPLE_DIR}/common/examples_level_zero_helpers.c) target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} ${UMF_EXAMPLE_DIR}/common) target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} diff --git a/examples/gpu_shared_memory/gpu_shared_memory.c b/examples/level_zero_shared_memory/level_zero_shared_memory.c similarity index 52% rename from examples/gpu_shared_memory/gpu_shared_memory.c rename to examples/level_zero_shared_memory/level_zero_shared_memory.c index 8d34e0f59..b0f646861 100644 --- a/examples/gpu_shared_memory/gpu_shared_memory.c +++ b/examples/level_zero_shared_memory/level_zero_shared_memory.c @@ -7,11 +7,13 @@ * */ +#include + #include #include #include -#include "utils_level_zero.h" +#include "examples_level_zero_helpers.h" int main(void) { // A result object for storing UMF API result status @@ -49,22 +51,51 @@ int main(void) { // Setup parameters for the Level Zero memory provider. It will be used for // allocating memory from Level Zero devices. - level_zero_memory_provider_params_t ze_memory_provider_params; - ze_memory_provider_params.level_zero_context_handle = hContext; - ze_memory_provider_params.level_zero_device_handle = hDevice; + umf_level_zero_memory_provider_params_handle_t ze_memory_provider_params = + NULL; + res = umfLevelZeroMemoryProviderParamsCreate(&ze_memory_provider_params); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create memory provider params!\n"); + ret = -1; + goto level_zero_destroy; + } + + res = umfLevelZeroMemoryProviderParamsSetContext(ze_memory_provider_params, + hContext); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set context in memory provider params!\n"); + ret = -1; + goto provider_params_destroy; + } + + res = umfLevelZeroMemoryProviderParamsSetDevice(ze_memory_provider_params, + hDevice); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set device in memory provider params!\n"); + ret = -1; + goto provider_params_destroy; + } + // Set the memory type to shared to allow the memory to be accessed on both // CPU and GPU. - ze_memory_provider_params.memory_type = UMF_MEMORY_TYPE_SHARED; + res = umfLevelZeroMemoryProviderParamsSetMemoryType( + ze_memory_provider_params, UMF_MEMORY_TYPE_SHARED); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "Failed to set memory type in memory provider params!\n"); + ret = -1; + goto provider_params_destroy; + } // Create Level Zero memory provider umf_memory_provider_handle_t ze_memory_provider; - res = umfMemoryProviderCreate(umfLevelZeroMemoryProviderOps(), - &ze_memory_provider_params, - &ze_memory_provider); + res = + umfMemoryProviderCreate(umfLevelZeroMemoryProviderOps(), + ze_memory_provider_params, &ze_memory_provider); if (res != UMF_RESULT_SUCCESS) { fprintf(stderr, "Failed to create a memory provider!\n"); ret = -1; - goto level_zero_destroy; + goto provider_params_destroy; } printf("Level Zero memory provider created at %p\n", @@ -72,27 +103,54 @@ int main(void) { // Setup parameters for the Disjoint Pool. It will be used for managing the // memory allocated using memory provider. - umf_disjoint_pool_params_t disjoint_memory_pool_params = - umfDisjointPoolParamsDefault(); + umf_disjoint_pool_params_handle_t disjoint_memory_pool_params = NULL; + res = umfDisjointPoolParamsCreate(&disjoint_memory_pool_params); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create pool params!\n"); + ret = -1; + goto memory_provider_destroy; + } // Set the Slab Min Size to 64KB - the page size for GPU allocations - disjoint_memory_pool_params.SlabMinSize = 64 * 1024L; + res = umfDisjointPoolParamsSetSlabMinSize(disjoint_memory_pool_params, + 64 * 1024L); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set Slab Min Size!\n"); + ret = -1; + goto disjoint_params_destroy; + } // We would keep only single slab per each allocation bucket - disjoint_memory_pool_params.Capacity = 1; + res = umfDisjointPoolParamsSetCapacity(disjoint_memory_pool_params, 1); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set Capacity!\n"); + ret = -1; + goto disjoint_params_destroy; + } // Set the maximum poolable size to 64KB - objects with size above this // limit will not be stored/allocated from the pool. - disjoint_memory_pool_params.MaxPoolableSize = 64 * 1024L; + res = umfDisjointPoolParamsSetMaxPoolableSize(disjoint_memory_pool_params, + 64 * 1024L); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set Max Poolable Size!\n"); + ret = -1; + goto disjoint_params_destroy; + } // Enable tracing - disjoint_memory_pool_params.PoolTrace = 1; + res = umfDisjointPoolParamsSetTrace(disjoint_memory_pool_params, 1); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set Trace!\n"); + ret = -1; + goto disjoint_params_destroy; + } // Create Disjoint Pool memory pool. umf_memory_pool_handle_t ze_disjoint_memory_pool; res = umfPoolCreate(umfDisjointPoolOps(), ze_memory_provider, - &disjoint_memory_pool_params, UMF_POOL_CREATE_FLAG_NONE, + disjoint_memory_pool_params, UMF_POOL_CREATE_FLAG_NONE, &ze_disjoint_memory_pool); if (res != UMF_RESULT_SUCCESS) { fprintf(stderr, "Failed to create a memory pool!\n"); ret = -1; - goto memory_provider_destroy; + goto disjoint_params_destroy; } printf("Disjoint Pool created at %p\n", (void *)ze_disjoint_memory_pool); @@ -121,9 +179,15 @@ int main(void) { memory_pool_destroy: umfPoolDestroy(ze_disjoint_memory_pool); +disjoint_params_destroy: + umfDisjointPoolParamsDestroy(disjoint_memory_pool_params); + memory_provider_destroy: umfMemoryProviderDestroy(ze_memory_provider); +provider_params_destroy: + umfLevelZeroMemoryProviderParamsDestroy(ze_memory_provider_params); + level_zero_destroy: ret = destroy_context(hContext); return ret; diff --git a/examples/memspace_hmat/CMakeLists.txt b/examples/memspace_hmat/CMakeLists.txt new file mode 100644 index 000000000..5f0fffaa1 --- /dev/null +++ b/examples/memspace_hmat/CMakeLists.txt @@ -0,0 +1,58 @@ +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR) +project(umf_example_memspace_hmat LANGUAGES C) +enable_testing() + +set(UMF_EXAMPLE_DIR "${CMAKE_SOURCE_DIR}/..") +list(APPEND CMAKE_MODULE_PATH "${UMF_EXAMPLE_DIR}/cmake") +message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}") + +find_package(PkgConfig) +pkg_check_modules(LIBUMF libumf) +if(NOT LIBUMF_FOUND) + find_package(LIBUMF REQUIRED libumf) +endif() + +pkg_check_modules(LIBHWLOC hwloc>=2.3.0) +if(NOT LIBHWLOC_FOUND) + find_package(LIBHWLOC 2.3.0 REQUIRED hwloc) +endif() + +pkg_check_modules(LIBNUMA numa) +if(NOT LIBNUMA_FOUND) + find_package(LIBNUMA REQUIRED numa) +endif() + +# build the example +set(EXAMPLE_NAME umf_example_memspace_hmat) +add_executable(${EXAMPLE_NAME} memspace_hmat.c) +target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} + ../common) +target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBHWLOC_LIBRARY_DIRS} + ${LIBNUMA_LIBRARY_DIRS}) +target_link_libraries( + ${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARIES} ${LIBHWLOC_LIBRARIES} + ${LIBNUMA_LIBRARIES}) + +add_test( + NAME ${EXAMPLE_NAME} + COMMAND ${EXAMPLE_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +set(UMF_TEST_SKIP_RETURN_CODE 125) +set_tests_properties(${EXAMPLE_NAME} PROPERTIES LABELS "example-standalone") +set_tests_properties(${EXAMPLE_NAME} PROPERTIES SKIP_RETURN_CODE + ${UMF_TEST_SKIP_RETURN_CODE}) + +if(LINUX) + # set LD_LIBRARY_PATH + set_property( + TEST ${EXAMPLE_NAME} + PROPERTY + ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBNUMA_LIBRARY_DIRS}" + ) +endif() diff --git a/examples/memspace_hmat/memspace_hmat.c b/examples/memspace_hmat/memspace_hmat.c new file mode 100644 index 000000000..9f3f8d17e --- /dev/null +++ b/examples/memspace_hmat/memspace_hmat.c @@ -0,0 +1,135 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include + +#include +#include +#include +#include + +#include "examples_utils.h" + +// Function to create a memory provider which allocates memory from the specified NUMA node +int createMemoryProvider(umf_memory_provider_handle_t *hProvider, + umf_const_memspace_handle_t hMemspace) { + int ret = 0; + umf_result_t result; + umf_mempolicy_handle_t hPolicy = NULL; + if (hMemspace == NULL) { + fprintf(stderr, "Memspace is NULL - do you have HMAT enabled?\n"); + return 1; + } + // Create a mempolicy - mempolicy defines how we want to use memory from memspace. + // In this example, we want to bind memory to the best node in the memspace, + // for the thread that allocates memory. + result = umfMempolicyCreate(UMF_MEMPOLICY_BIND, &hPolicy); + if (result != UMF_RESULT_SUCCESS) { + ret = -1; + fprintf(stderr, "umfMempolicyCreate failed.\n"); + goto error_mempolicy; + } + + // Create a memory provider using the memory space and memory policy + result = umfMemoryProviderCreateFromMemspace(hMemspace, hPolicy, hProvider); + if (result != UMF_RESULT_SUCCESS) { + ret = -1; + fprintf(stderr, "umfMemoryProviderCreateFromMemspace failed.\n"); + goto error_provider; + } + + // After creating the memory provider, we can destroy the mempolicy +error_provider: + umfMempolicyDestroy(hPolicy); +error_mempolicy: + return ret; +} + +int main(void) { + umf_memory_provider_handle_t hProvider = NULL; + umf_result_t ret; + void *ptr = NULL; + size_t size = 1024; + size_t alignment = 64; + + // Check if NUMA is available + if (numa_available() < 0) { + fprintf(stderr, "NUMA is not available on this system.\n"); + return TEST_SKIP_ERROR_CODE; + } + + // Create the memory provider that allocates memory from the highest bandwidth numa nodes + ret = createMemoryProvider(&hProvider, umfMemspaceHighestBandwidthGet()); + if (ret != UMF_RESULT_SUCCESS) { + return ret == 1 ? TEST_SKIP_ERROR_CODE : -1; + } + + // Allocate memory from the memory provider + ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + fprintf(stderr, "umfMemoryProviderAlloc failed.\n"); + umfMemoryProviderDestroy(hProvider); + return -1; + } + + // Use the allocated memory (ptr) here + memset(ptr, 1, size); + + // Lets check the NUMA node of the allocated memory + int nodeId; + int retm = get_mempolicy(&nodeId, NULL, 0, ptr, MPOL_F_ADDR | MPOL_F_NODE); + if (retm != 0) { + fprintf(stderr, "get_mempolicy failed.\n"); + umfMemoryProviderFree(hProvider, ptr, size); + umfMemoryProviderDestroy(hProvider); + return -1; + } + + printf("Allocated memory at %p from the highest bandwidth node: %d\n", ptr, + nodeId); + + // Free the allocated memory + umfMemoryProviderFree(hProvider, ptr, size); + + umfMemoryProviderDestroy(hProvider); + + // Lets now allocate memory from the lowest latency node + ret = createMemoryProvider(&hProvider, umfMemspaceLowestLatencyGet()); + if (ret != UMF_RESULT_SUCCESS) { + return -1; + } + + ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); + + if (ret != UMF_RESULT_SUCCESS) { + fprintf(stderr, "umfMemoryProviderAlloc failed.\n"); + umfMemoryProviderDestroy(hProvider); + return -1; + } + + memset(ptr, 1, size); + + retm = get_mempolicy(&nodeId, NULL, 0, ptr, MPOL_F_ADDR | MPOL_F_NODE); + if (retm != 0) { + fprintf(stderr, "get_mempolicy failed.\n"); + umfMemoryProviderFree(hProvider, ptr, size); + umfMemoryProviderDestroy(hProvider); + return -1; + } + printf("Allocated memory at %p from the lowest latency node: %d\n", ptr, + nodeId); + + // Free the allocated memory + umfMemoryProviderFree(hProvider, ptr, size); + + umfMemoryProviderDestroy(hProvider); + + return 0; +} diff --git a/examples/memspace_numa/CMakeLists.txt b/examples/memspace_numa/CMakeLists.txt new file mode 100644 index 000000000..d9c41a843 --- /dev/null +++ b/examples/memspace_numa/CMakeLists.txt @@ -0,0 +1,58 @@ +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR) +project(umf_example_memspace_numa LANGUAGES C) +enable_testing() + +set(UMF_EXAMPLE_DIR "${CMAKE_SOURCE_DIR}/..") +list(APPEND CMAKE_MODULE_PATH "${UMF_EXAMPLE_DIR}/cmake") +message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}") + +find_package(PkgConfig) +pkg_check_modules(LIBUMF libumf) +if(NOT LIBUMF_FOUND) + find_package(LIBUMF REQUIRED libumf) +endif() + +pkg_check_modules(LIBHWLOC hwloc>=2.3.0) +if(NOT LIBHWLOC_FOUND) + find_package(LIBHWLOC 2.3.0 REQUIRED hwloc) +endif() + +pkg_check_modules(LIBNUMA numa) +if(NOT LIBNUMA_FOUND) + find_package(LIBNUMA REQUIRED numa) +endif() + +# build the example +set(EXAMPLE_NAME umf_example_memspace_numa) +add_executable(${EXAMPLE_NAME} memspace_numa.c) +target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} + ../common) +target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBHWLOC_LIBRARY_DIRS} + ${LIBNUMA_LIBRARY_DIRS}) +target_link_libraries( + ${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARIES} ${LIBHWLOC_LIBRARIES} + ${LIBNUMA_LIBRARIES}) + +add_test( + NAME ${EXAMPLE_NAME} + COMMAND ${EXAMPLE_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +set(UMF_TEST_SKIP_RETURN_CODE 125) +set_tests_properties(${EXAMPLE_NAME} PROPERTIES LABELS "example-standalone") +set_tests_properties(${EXAMPLE_NAME} PROPERTIES SKIP_RETURN_CODE + ${UMF_TEST_SKIP_RETURN_CODE}) + +if(LINUX) + # set LD_LIBRARY_PATH + set_property( + TEST ${EXAMPLE_NAME} + PROPERTY + ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBNUMA_LIBRARY_DIRS}" + ) +endif() diff --git a/examples/memspace_numa/memspace_numa.c b/examples/memspace_numa/memspace_numa.c new file mode 100644 index 000000000..4f225cd69 --- /dev/null +++ b/examples/memspace_numa/memspace_numa.c @@ -0,0 +1,185 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include + +#include +#include +#include +#include + +#include "examples_utils.h" + +// Function to create a memory provider which allocates memory from the specified NUMA node +// by using umfMemspaceCreateFromNumaArray +int createMemoryProviderFromArray(umf_memory_provider_handle_t *hProvider, + unsigned numa) { + int ret = 0; + umf_result_t result; + umf_memspace_handle_t hMemspace = NULL; + umf_mempolicy_handle_t hPolicy = NULL; + + // Create a memspace - memspace is a list of memory sources. + // In this example, we create a memspace that contains single numa node; + result = umfMemspaceCreateFromNumaArray(&numa, 1, &hMemspace); + if (result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "umfMemspaceCreateFromNumaArray() failed.\n"); + return -1; + } + + // Create a mempolicy - mempolicy defines how we want to use memory from memspace. + // In this example, we want to bind memory to the specified numa node. + result = umfMempolicyCreate(UMF_MEMPOLICY_BIND, &hPolicy); + if (result != UMF_RESULT_SUCCESS) { + ret = -1; + fprintf(stderr, "umfMempolicyCreate failed().\n"); + goto error_memspace; + } + + // Create a memory provider using the memory space and memory policy + result = umfMemoryProviderCreateFromMemspace(hMemspace, hPolicy, hProvider); + if (result != UMF_RESULT_SUCCESS) { + ret = -1; + fprintf(stderr, "umfMemoryProviderCreateFromMemspace failed().\n"); + goto error_mempolicy; + } + + // After creating the memory provider, we can destroy the memspace and mempolicy +error_mempolicy: + umfMempolicyDestroy(hPolicy); +error_memspace: + umfMemspaceDestroy(hMemspace); + return ret; +} + +// Function to create a memory provider which allocates memory from the specified NUMA node +// by using filter function. +int createMemoryProviderByFilter(umf_memory_provider_handle_t *hProvider, + unsigned numa) { + int ret = 0; + umf_result_t result; + umf_memspace_handle_t hMemspace = NULL; + umf_mempolicy_handle_t hPolicy = NULL; + + umf_const_memspace_handle_t hostAll = umfMemspaceHostAllGet(); + if (!hostAll) { + fprintf(stderr, "umfMemspaceHostAllGet() failed\n"); + return -1; + } + + // umfMemspaceHostAllGet() return immutable memspace, so we need to create a mutable copy + result = umfMemspaceClone(hostAll, &hMemspace); + if (result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "umfMempolicyClone() failed.\n"); + return -1; + } + + // Filter the memspace to contain only the specified numa node + result = umfMemspaceFilterById(hMemspace, &numa, 1); + if (result != UMF_RESULT_SUCCESS) { + ret = -1; + fprintf(stderr, "umfMemspaceFilterById() failed.\n"); + goto error_memspace; + } + + // Create a mempolicy - mempolicy defines how we want to use memory from memspace. + // In this example, we want to bind memory to the specified numa node. + result = umfMempolicyCreate(UMF_MEMPOLICY_BIND, &hPolicy); + if (result != UMF_RESULT_SUCCESS) { + ret = -1; + fprintf(stderr, "umfMempolicyCreate() failed.\n"); + goto error_memspace; + } + // Create a memory provider using the memory space and memory policy + result = umfMemoryProviderCreateFromMemspace(hMemspace, hPolicy, hProvider); + if (result != UMF_RESULT_SUCCESS) { + ret = -1; + fprintf(stderr, "umfMemoryProviderCreateFromMemspace() failed.\n"); + goto error_mempolicy; + } + + // After creating the memory provider, we can destroy the memspace and mempolicy +error_mempolicy: + umfMempolicyDestroy(hPolicy); +error_memspace: + umfMemspaceDestroy(hMemspace); + return ret; +} + +int use_memory_provider(umf_memory_provider_handle_t hProvider) { + // Allocate memory from the memory provider + void *ptr = NULL; + size_t size = 1024; + size_t alignment = 64; + + umf_result_t ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + fprintf(stderr, "umfMemoryProviderAlloc failed.\n"); + return 1; + } + + // Use the allocated memory (ptr) here + memset(ptr, 1, size); + + // Lets check the NUMA node of the allocated memory + int nodeId; + int retm = get_mempolicy(&nodeId, NULL, 0, ptr, MPOL_F_ADDR | MPOL_F_NODE); + if (retm != 0) { + umfMemoryProviderFree(hProvider, ptr, size); + fprintf(stderr, "get_mempolicy failed.\n"); + return 1; + } + printf("Allocated memory at %p from numa_node %d\n", ptr, nodeId); + // Free the allocated memory + umfMemoryProviderFree(hProvider, ptr, size); + + return 0; +} + +int main(void) { + umf_memory_provider_handle_t hProvider = NULL; + umf_result_t ret; + + // Check if NUMA is available + if (numa_available() < 0) { + fprintf(stderr, "NUMA is not available on this system.\n"); + return TEST_SKIP_ERROR_CODE; + } + + // Create the memory provider that allocates memory from the specified NUMA node + // In this example, we allocate memory from the NUMA node 0 + ret = createMemoryProviderFromArray(&hProvider, 0); + if (ret != UMF_RESULT_SUCCESS) { + return -1; + } + + if (use_memory_provider(hProvider)) { + goto error; + } + + umfMemoryProviderDestroy(hProvider); + + // We can achieve the same result by using filter functions + ret = createMemoryProviderByFilter(&hProvider, 0); + if (ret != UMF_RESULT_SUCCESS) { + return -1; + } + + if (use_memory_provider(hProvider)) { + goto error; + } + + umfMemoryProviderDestroy(hProvider); + return 0; +error: + umfMemoryProviderDestroy(hProvider); + + return 1; +} diff --git a/include/umf/base.h b/include/umf/base.h index ecc0abf55..53378195d 100644 --- a/include/umf/base.h +++ b/include/umf/base.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -28,7 +28,7 @@ extern "C" { #define UMF_MINOR_VERSION(_ver) (_ver & 0x0000ffff) /// @brief Current version of the UMF headers -#define UMF_VERSION_CURRENT UMF_MAKE_VERSION(0, 9) +#define UMF_VERSION_CURRENT UMF_MAKE_VERSION(0, 10) /// @brief Operation results typedef enum umf_result_t { @@ -43,7 +43,8 @@ typedef enum umf_result_t { UMF_RESULT_ERROR_INVALID_ALIGNMENT = 4, ///< Invalid alignment of an argument UMF_RESULT_ERROR_NOT_SUPPORTED = 5, ///< Operation not supported - + UMF_RESULT_ERROR_USER_SPECIFIC = + 6, ///< Failure in user provider code (i.e in user provided callback) UMF_RESULT_ERROR_UNKNOWN = 0x7ffffffe ///< Unknown or internal error } umf_result_t; diff --git a/include/umf/ipc.h b/include/umf/ipc.h index ffe38bfc8..ab47b0971 100644 --- a/include/umf/ipc.h +++ b/include/umf/ipc.h @@ -19,6 +19,8 @@ extern "C" { typedef struct umf_ipc_data_t *umf_ipc_handle_t; +typedef void *umf_ipc_handler_handle_t; + /// /// @brief Returns the size of IPC handles for the specified pool. /// @param hPool [in] Pool handle @@ -44,11 +46,11 @@ umf_result_t umfPutIPCHandle(umf_ipc_handle_t ipcHandle); /// /// @brief Open IPC handle retrieved by umfGetIPCHandle. -/// @param hPool [in] Pool handle where to open the the IPC handle. +/// @param hIPCHandler [in] IPC Handler handle used to open the IPC handle. /// @param ipcHandle [in] IPC handle. /// @param ptr [out] pointer to the memory in the current process. /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. -umf_result_t umfOpenIPCHandle(umf_memory_pool_handle_t hPool, +umf_result_t umfOpenIPCHandle(umf_ipc_handler_handle_t hIPCHandler, umf_ipc_handle_t ipcHandle, void **ptr); /// @@ -57,6 +59,13 @@ umf_result_t umfOpenIPCHandle(umf_memory_pool_handle_t hPool, /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. umf_result_t umfCloseIPCHandle(void *ptr); +/// @brief Get handle to the IPC handler from existing pool. +/// @param hPool [in] Pool handle +/// @param hIPCHandler [out] handle to the IPC handler +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfPoolGetIPCHandler(umf_memory_pool_handle_t hPool, + umf_ipc_handler_handle_t *hIPCHandler); + #ifdef __cplusplus } #endif diff --git a/include/umf/memory_provider.h b/include/umf/memory_provider.h index cec8edbcf..cff6f9eec 100644 --- a/include/umf/memory_provider.h +++ b/include/umf/memory_provider.h @@ -17,6 +17,23 @@ extern "C" { #endif +/// @brief Memory visibility mode +typedef enum umf_memory_visibility_t { + UMF_MEM_MAP_PRIVATE = 1, ///< private memory mapping + UMF_MEM_MAP_SHARED, ///< shared memory mapping (Linux only) +} umf_memory_visibility_t; + +/// @brief Protection of the memory allocations +typedef enum umf_mem_protection_flags_t { + UMF_PROTECTION_NONE = (1 << 0), ///< Memory allocations can not be accessed + UMF_PROTECTION_READ = (1 << 1), ///< Memory allocations can be read. + UMF_PROTECTION_WRITE = (1 << 2), ///< Memory allocations can be written. + UMF_PROTECTION_EXEC = (1 << 3), ///< Memory allocations can be executed. + /// @cond + UMF_PROTECTION_MAX // must be the last one + /// @endcond +} umf_mem_protection_flags_t; + /// @brief A struct containing memory provider specific set of functions typedef struct umf_memory_provider_t *umf_memory_provider_handle_t; diff --git a/include/umf/memory_provider_gpu.h b/include/umf/memory_provider_gpu.h new file mode 100644 index 000000000..cc3cc3e3e --- /dev/null +++ b/include/umf/memory_provider_gpu.h @@ -0,0 +1,31 @@ +/* + * + * Copyright (C) 2023-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#ifndef UMF_MEMORY_PROVIDER_GPU_H +#define UMF_MEMORY_PROVIDER_GPU_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/// @brief USM memory allocation type +typedef enum umf_usm_memory_type_t { + UMF_MEMORY_TYPE_UNKNOWN = 0, ///< The memory pointed to is of unknown type + UMF_MEMORY_TYPE_HOST, ///< The memory pointed to is a host allocation + UMF_MEMORY_TYPE_DEVICE, ///< The memory pointed to is a device allocation + UMF_MEMORY_TYPE_SHARED, ///< The memory pointed to is a shared ownership allocation +} umf_usm_memory_type_t; + +#ifdef __cplusplus +} +#endif + +#endif /* UMF_MEMORY_PROVIDER_GPU_H */ diff --git a/include/umf/memory_provider_ops.h b/include/umf/memory_provider_ops.h index a61e0aad0..0b9c7cfce 100644 --- a/include/umf/memory_provider_ops.h +++ b/include/umf/memory_provider_ops.h @@ -22,6 +22,15 @@ extern "C" { /// can keep them NULL. /// typedef struct umf_memory_provider_ext_ops_t { + /// + /// @brief Frees the memory space pointed by \p ptr from the memory \p provider + /// @param provider pointer to the memory provider + /// @param ptr pointer to the allocated memory to free + /// @param size size of the allocation + /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure + /// + umf_result_t (*free)(void *provider, void *ptr, size_t size); + /// /// @brief Discard physical pages within the virtual memory mapping associated at the given addr /// and \p size. This call is asynchronous and may delay purging the pages indefinitely. @@ -172,15 +181,6 @@ typedef struct umf_memory_provider_ops_t { umf_result_t (*alloc)(void *provider, size_t size, size_t alignment, void **ptr); - /// - /// @brief Frees the memory space pointed by \p ptr from the memory \p provider - /// @param provider pointer to the memory provider - /// @param ptr pointer to the allocated memory to free - /// @param size size of the allocation - /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure - /// - umf_result_t (*free)(void *provider, void *ptr, size_t size); - /// /// @brief Retrieve string representation of the underlying provider specific /// result reported by the last API that returned diff --git a/include/umf/memspace.h b/include/umf/memspace.h index 2d2d77728..85b6b3681 100644 --- a/include/umf/memspace.h +++ b/include/umf/memspace.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -84,6 +85,106 @@ umf_const_memspace_handle_t umfMemspaceHighestBandwidthGet(void); /// umf_const_memspace_handle_t umfMemspaceLowestLatencyGet(void); +/// \brief Creates new empty memspace, which can be populated with umfMemspaceMemtargetAdd() +/// \param hMemspace [out] handle to the newly created memspace +/// \return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +/// +umf_result_t umfMemspaceNew(umf_memspace_handle_t *hMemspace); + +/// \brief Returns number of memory targets in memspace. +/// \param hMemspace handle to memspace +/// \return number of memory targets in memspace +/// +size_t umfMemspaceMemtargetNum(umf_const_memspace_handle_t hMemspace); + +/// \brief Returns memory target by index. +/// \param hMemspace handle to memspace +/// \param targetNum index of the memory target +/// \return memory target handle on success or NULL on invalid input. +/// +umf_const_memtarget_handle_t +umfMemspaceMemtargetGet(umf_const_memspace_handle_t hMemspace, + unsigned targetNum); + +/// \brief Adds memory target to memspace. +/// +/// \details +/// This function duplicates the memory target and then adds it to the memspace. +/// This means that original memtarget handle and the handle of the duplicated memtarget are different +/// and you cannot use it interchangeably. +/// You can use `umfMemspaceMemtargetGet()` to retrieve new handle. +/// +/// \param hMemspace handle to memspace +/// \param hMemtarget handle to memory target +/// \return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +/// +umf_result_t umfMemspaceMemtargetAdd(umf_memspace_handle_t hMemspace, + umf_const_memtarget_handle_t hMemtarget); + +/// \brief Removes memory target from memspace. +/// +/// \param hMemspace handle to memspace +/// \param hMemtarget handle to memory target +/// \return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +/// +umf_result_t +umfMemspaceMemtargetRemove(umf_memspace_handle_t hMemspace, + umf_const_memtarget_handle_t hMemtarget); + +/// \brief Clones memspace. +/// +/// \param hMemspace handle to memspace +/// \param hNewMemspace [out] handle to the newly created memspace +/// \return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +/// +umf_result_t umfMemspaceClone(umf_const_memspace_handle_t hMemspace, + umf_memspace_handle_t *hNewMemspace); + +/// \brief Custom filter function for umfMemspaceUserFilter +/// +/// \param hMemspace handle to memspace +/// \param hMemtarget handle to memory target +/// \param args user provided arguments +/// \return zero if hMemtarget should be removed from memspace, positive otherwise, and negative on error +/// +typedef int (*umf_memspace_filter_func_t)( + umf_const_memspace_handle_t hMemspace, + umf_const_memtarget_handle_t hMemtarget, void *args); + +/// \brief Removes all memory targets with non-matching numa node ids. +/// +/// \param hMemspace handle to memspace +/// \param ids array of numa node ids +/// \param size size of the array +/// \return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +/// If the error code is UMF_RESULT_UNKNOWN the memspace is corrupted, otherwise the memspace is not modified. +/// +umf_result_t umfMemspaceFilterById(umf_memspace_handle_t hMemspace, + unsigned *ids, size_t size); + +/// \brief Filters out memory targets that capacity is less than specified size. +/// +/// \param hMemspace handle to memspace +/// \param size minimum capacity of memory target +/// \return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +/// If the error code is UMF_RESULT_UNKNOWN the memspace is corrupted, otherwise the memspace is not modified. +/// \details Negative values of size parameters are reserved for future +/// extension of functionality of this function. +/// +umf_result_t umfMemspaceFilterByCapacity(umf_memspace_handle_t hMemspace, + int64_t size); + +/// \brief Filters out memory targets based on user provided function +/// +/// \param hMemspace handle to memspace +/// \param filter user provided function +/// \param args user provided arguments +/// \return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +/// If the error code is UMF_RESULT_UNKNOWN the memspace is corrupted, otherwise the memspace is not modified. +/// +umf_result_t umfMemspaceUserFilter(umf_memspace_handle_t hMemspace, + umf_memspace_filter_func_t filter, + void *args); #ifdef __cplusplus } #endif diff --git a/include/umf/memtarget.h b/include/umf/memtarget.h new file mode 100644 index 000000000..d74947f14 --- /dev/null +++ b/include/umf/memtarget.h @@ -0,0 +1,53 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#ifndef UMF_MEMTARGET_H +#define UMF_MEMTARGET_H 1 + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct umf_memtarget_t *umf_memtarget_handle_t; +typedef const struct umf_memtarget_t *umf_const_memtarget_handle_t; + +typedef enum umf_memtarget_type_t { + UMF_MEMTARGET_TYPE_UNKNOWN = 0, + UMF_MEMTARGET_TYPE_NUMA = 1, +} umf_memtarget_type_t; + +/// \brief Gets the type of the memory target. +/// \param hMemtarget handle to the memory target +/// \param type [out] type of the memory target +/// \return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfMemtargetGetType(umf_const_memtarget_handle_t hMemtarget, + umf_memtarget_type_t *type); + +/// \brief Get size of the memory target in bytes. +/// \param hMemtarget handle to the memory target +/// \param capacity [out] capacity of the memory target +/// \return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfMemtargetGetCapacity(umf_const_memtarget_handle_t hMemtarget, + size_t *capacity); + +/// \brief Get physical ID of the memory target. +/// \param hMemtarget handle to the memory target +/// \param id [out] id of the memory target +/// \return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfMemtargetGetId(umf_const_memtarget_handle_t hMemtarget, + unsigned *id); + +#ifdef __cplusplus +} +#endif + +#endif /* UMF_MEMTARGET_H */ diff --git a/include/umf/pools/pool_disjoint.h b/include/umf/pools/pool_disjoint.h index 2fe5355a2..fdf682ae5 100644 --- a/include/umf/pools/pool_disjoint.h +++ b/include/umf/pools/pool_disjoint.h @@ -16,68 +16,93 @@ extern "C" { /// i.e. if multiple pools use the same shared limits, sum of those pools' /// sizes cannot exceed MaxSize. typedef struct umf_disjoint_pool_shared_limits_t - umf_disjoint_pool_shared_limits_t; + *umf_disjoint_pool_shared_limits_handle_t; -/// @brief Create a pool limits struct -/// @param MaxSize specifies hard limit for memory allocated from a provider -/// @return pointer to created pool limits struct -umf_disjoint_pool_shared_limits_t * +struct umf_disjoint_pool_params_t; +/// @brief handle to the parameters of the disjoint pool. +typedef struct umf_disjoint_pool_params_t *umf_disjoint_pool_params_handle_t; + +/// @brief Create a pool limits struct. +/// @param MaxSize specifies hard limit for memory allocated from a provider. +/// @return handle to the created shared limits struct. +umf_disjoint_pool_shared_limits_handle_t umfDisjointPoolSharedLimitsCreate(size_t MaxSize); -/// @brief Destroy previously created pool limits struct -/// @param PoolLimits pointer to a pool limits struct +/// @brief Destroy previously created pool limits struct. +/// @param hSharedLimits handle to the shared limits struct. void umfDisjointPoolSharedLimitsDestroy( - umf_disjoint_pool_shared_limits_t *PoolLimits); - -/// @brief Configuration of Disjoint Pool -typedef struct umf_disjoint_pool_params_t { - /// Minimum allocation size that will be requested from the system. - /// By default this is the minimum allocation size of each memory type. - size_t SlabMinSize; - - /// Allocations up to this limit will be subject to chunking/pooling - size_t MaxPoolableSize; - - /// When pooling, each bucket will hold a max of 'Capacity' unfreed slabs - size_t Capacity; - - /// Holds the minimum bucket size valid for allocation of a memory type. - /// This value must be a power of 2. - size_t MinBucketSize; - - /// Holds size of the pool managed by the allocator. - size_t CurPoolSize; - - /// Whether to print pool usage statistics - int PoolTrace; - - /// Memory limits that can be shared between multitple pool instances, - /// i.e. if multiple pools use the same SharedLimits sum of those pools' - /// sizes cannot exceed MaxSize. - umf_disjoint_pool_shared_limits_t *SharedLimits; - - /// Name used in traces - const char *Name; -} umf_disjoint_pool_params_t; + umf_disjoint_pool_shared_limits_handle_t hSharedLimits); + +/// @brief Create a struct to store parameters of disjoint pool. +/// @param hParams [out] handle to the newly created parameters struct. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams); + +/// @brief Destroy parameters struct. +/// @param hParams handle to the parameters of the disjoint pool. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams); + +/// @brief Set minimum allocation size that will be requested from the memory provider. +/// @param hParams handle to the parameters of the disjoint pool. +/// @param slabMinSize minimum allocation size. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfDisjointPoolParamsSetSlabMinSize(umf_disjoint_pool_params_handle_t hParams, + size_t slabMinSize); + +/// @brief Set size limit for allocations that are subject to pooling. +/// @param hParams handle to the parameters of the disjoint pool. +/// @param maxPoolableSize maximum poolable size. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfDisjointPoolParamsSetMaxPoolableSize( + umf_disjoint_pool_params_handle_t hParams, size_t maxPoolableSize); + +/// @brief Set maximum capacity of each bucket. Each bucket will hold a +/// max of \p maxCapacity unfreed slabs. +/// @param hParams handle to the parameters of the disjoint pool. +/// @param maxCapacity maximum capacity of each bucket. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfDisjointPoolParamsSetCapacity(umf_disjoint_pool_params_handle_t hParams, + size_t maxCapacity); + +/// @brief Set minimum bucket allocation size. +/// @param hParams handle to the parameters of the disjoint pool. +/// @param minBucketSize minimum bucket size. Must be power of 2. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfDisjointPoolParamsSetMinBucketSize(umf_disjoint_pool_params_handle_t hParams, + size_t minBucketSize); + +/// @brief Set trace level for pool usage statistics. +/// @param hParams handle to the parameters of the disjoint pool. +/// @param poolTrace trace level. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, + int poolTrace); + +/// @brief Set shared limits for disjoint pool. +/// @param hParams handle to the parameters of the disjoint pool. +/// @param hSharedLimits handle tp the shared limits. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfDisjointPoolParamsSetSharedLimits( + umf_disjoint_pool_params_handle_t hParams, + umf_disjoint_pool_shared_limits_handle_t hSharedLimits); + +/// @brief Set custom name of the disjoint pool to be used in the traces. +/// @param hParams handle to the parameters of the disjoint pool. +/// @param name custom name of the pool. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, + const char *name); umf_memory_pool_ops_t *umfDisjointPoolOps(void); -/// @brief Create default params struct for disjoint pool -static inline umf_disjoint_pool_params_t umfDisjointPoolParamsDefault(void) { - umf_disjoint_pool_params_t params = { - 0, /* SlabMinSize */ - 0, /* MaxPoolableSize */ - 0, /* Capacity */ - UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE, /* MinBucketSize */ - 0, /* CurPoolSize */ - 0, /* PoolTrace */ - NULL, /* SharedLimits */ - "disjoint_pool" /* Name */ - }; - - return params; -} - #ifdef __cplusplus } #endif diff --git a/include/umf/pools/pool_jemalloc.h b/include/umf/pools/pool_jemalloc.h index dfd75746b..0cbecd38f 100644 --- a/include/umf/pools/pool_jemalloc.h +++ b/include/umf/pools/pool_jemalloc.h @@ -17,11 +17,31 @@ extern "C" { #include #include -/// @brief Configuration of Jemalloc Pool -typedef struct umf_jemalloc_pool_params_t { - /// Set to true if umfMemoryProviderFree() should never be called. - bool disable_provider_free; -} umf_jemalloc_pool_params_t; +struct umf_jemalloc_pool_params_t; + +/// @brief handle to the parameters of the jemalloc pool. +typedef struct umf_jemalloc_pool_params_t *umf_jemalloc_pool_params_handle_t; + +/// @brief Create a struct to store parameters of jemalloc pool. +/// @param hParams [out] handle to the newly created parameters struct. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfJemallocPoolParamsCreate(umf_jemalloc_pool_params_handle_t *hParams); + +/// @brief Destroy parameters struct. +/// @param hParams handle to the parameters of the jemalloc pool. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfJemallocPoolParamsDestroy(umf_jemalloc_pool_params_handle_t hParams); + +/// @brief Set if \p umfMemoryProviderFree() should never be called. +/// @param hParams handle to the parameters of the jemalloc pool. +/// @param keepAllMemory \p true if the jemalloc pool should not call +/// \p umfMemoryProviderFree, \p false otherwise. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfJemallocPoolParamsSetKeepAllMemory(umf_jemalloc_pool_params_handle_t hParams, + bool keepAllMemory); umf_memory_pool_ops_t *umfJemallocPoolOps(void); diff --git a/include/umf/pools/pool_scalable.h b/include/umf/pools/pool_scalable.h index 3b9945f0b..072169b68 100644 --- a/include/umf/pools/pool_scalable.h +++ b/include/umf/pools/pool_scalable.h @@ -14,9 +14,47 @@ extern "C" { #endif +#include + #include #include +struct umf_scalable_pool_params_t; + +/// @brief handle to the parameters of the scalable pool. +typedef struct umf_scalable_pool_params_t *umf_scalable_pool_params_handle_t; + +/// @brief Create a struct to store parameters of scalable pool. +/// @param hParams [out] handle to the newly created parameters struct. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfScalablePoolParamsCreate(umf_scalable_pool_params_handle_t *hParams); + +/// @brief Destroy parameters struct. +/// @param hParams handle to the parameters of the scalable pool. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfScalablePoolParamsDestroy(umf_scalable_pool_params_handle_t hParams); + +/// @brief Set granularity of allocations that scalable pool requests from a memory provider. +/// @param hParams handle to the parameters of the scalable pool. +/// @param granularity granularity in bytes. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfScalablePoolParamsSetGranularity(umf_scalable_pool_params_handle_t hParams, + size_t granularity); + +/// @brief Set if scalable pool should keep all memory allocated from memory provider till destruction. +/// @param hParams handle to the parameters of the scalable pool. +/// @param keepAllMemory \p true if the scalable pool should not call +/// \p umfMemoryProviderFree until it is destroyed, \p false otherwise. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t +umfScalablePoolParamsSetKeepAllMemory(umf_scalable_pool_params_handle_t hParams, + bool keepAllMemory); + +/// @brief Return \p ops structure containing pointers to the scalable pool implementation. +/// @return pointer to the \p umf_memory_pool_ops_t struct. umf_memory_pool_ops_t *umfScalablePoolOps(void); #ifdef __cplusplus diff --git a/include/umf/providers/provider_coarse.h b/include/umf/providers/provider_coarse.h new file mode 100644 index 000000000..6ed6e0fbc --- /dev/null +++ b/include/umf/providers/provider_coarse.h @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2023-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_COARSE_PROVIDER_H +#define UMF_COARSE_PROVIDER_H + +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/// @brief Coarse Memory Provider allocation strategy +typedef enum coarse_memory_provider_strategy_t { + /// Always allocate a free block of the (size + alignment) size + /// and cut out the properly aligned part leaving two remaining parts. + /// It is the fastest strategy but causes memory fragmentation + /// when alignment is greater than 0. + /// It is the best strategy when alignment always equals 0. + UMF_COARSE_MEMORY_STRATEGY_FASTEST = 0, + + /// Check if the first free block of the 'size' size has the correct alignment. + /// If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, + + /// Look through all free blocks of the 'size' size + /// and choose the first one with the correct alignment. + /// If none of them had the correct alignment, + /// use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE, + + /// The maximum value (it has to be the last one). + UMF_COARSE_MEMORY_STRATEGY_MAX +} coarse_memory_provider_strategy_t; + +/// @brief Coarse Memory Provider settings struct. +typedef struct coarse_memory_provider_params_t { + /// Handle to the upstream memory provider. + /// It has to be NULL if init_buffer is set + /// (exactly one of them has to be non-NULL). + umf_memory_provider_handle_t upstream_memory_provider; + + /// Memory allocation strategy. + /// See coarse_memory_provider_strategy_t for details. + coarse_memory_provider_strategy_t allocation_strategy; + + /// A pre-allocated buffer that will be the only memory that + /// the coarse provider can provide (the fixed-size memory provider option). + /// If it is non-NULL, `init_buffer_size ` has to contain its size. + /// It has to be NULL if upstream_memory_provider is set + /// (exactly one of them has to be non-NULL). + void *init_buffer; + + /// Size of the initial buffer: + /// 1) `init_buffer` if it is non-NULL xor + /// 2) that will be allocated from the upstream_memory_provider + /// (if it is non-NULL) in the `.initialize` operation. + size_t init_buffer_size; + + /// When it is true and the upstream_memory_provider is given, + /// the init buffer (of `init_buffer_size` bytes) would be pre-allocated + /// during creation time using the `upstream_memory_provider`. + /// If upstream_memory_provider is not given, + /// the init_buffer is always used instead + /// (regardless of the value of this parameter). + bool immediate_init_from_upstream; + + /// Destroy upstream_memory_provider in finalize(). + bool destroy_upstream_memory_provider; +} coarse_memory_provider_params_t; + +/// @brief Coarse Memory Provider stats (TODO move to CTL) +typedef struct coarse_memory_provider_stats_t { + /// Total allocation size. + size_t alloc_size; + + /// Size of used memory. + size_t used_size; + + /// Number of memory blocks allocated from the upstream provider. + size_t num_upstream_blocks; + + /// Total number of allocated memory blocks. + size_t num_all_blocks; + + /// Number of free memory blocks. + size_t num_free_blocks; +} coarse_memory_provider_stats_t; + +umf_memory_provider_ops_t *umfCoarseMemoryProviderOps(void); + +// TODO use CTL +coarse_memory_provider_stats_t +umfCoarseMemoryProviderGetStats(umf_memory_provider_handle_t provider); + +/// @brief Create default params for the coarse memory provider +static inline coarse_memory_provider_params_t +umfCoarseMemoryProviderParamsDefault(void) { + coarse_memory_provider_params_t coarse_memory_provider_params; + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + return coarse_memory_provider_params; +} + +#ifdef __cplusplus +} +#endif + +#endif // UMF_COARSE_PROVIDER_H diff --git a/include/umf/providers/provider_cuda.h b/include/umf/providers/provider_cuda.h new file mode 100644 index 000000000..5f1d5a6e2 --- /dev/null +++ b/include/umf/providers/provider_cuda.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_PROVIDER_CUDA_H +#define UMF_PROVIDER_CUDA_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct umf_cuda_memory_provider_params_t; + +typedef struct umf_cuda_memory_provider_params_t + *umf_cuda_memory_provider_params_handle_t; + +/// @brief Create a struct to store parameters of the CUDA Memory Provider. +/// @param hParams [out] handle to the newly created parameters struct. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfCUDAMemoryProviderParamsCreate( + umf_cuda_memory_provider_params_handle_t *hParams); + +/// @brief Destroy parameters struct. +/// @param hParams handle to the parameters of the CUDA Memory Provider. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfCUDAMemoryProviderParamsDestroy( + umf_cuda_memory_provider_params_handle_t hParams); + +/// @brief Set the CUDA context handle in the parameters struct. +/// @param hParams handle to the parameters of the CUDA Memory Provider. +/// @param hContext handle to the CUDA context. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfCUDAMemoryProviderParamsSetContext( + umf_cuda_memory_provider_params_handle_t hParams, void *hContext); + +/// @brief Set the CUDA device handle in the parameters struct. +/// @param hParams handle to the parameters of the CUDA Memory Provider. +/// @param hDevice handle to the CUDA device. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfCUDAMemoryProviderParamsSetDevice( + umf_cuda_memory_provider_params_handle_t hParams, int hDevice); + +/// @brief Set the memory type in the parameters struct. +/// @param hParams handle to the parameters of the CUDA Memory Provider. +/// @param memoryType memory type. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfCUDAMemoryProviderParamsSetMemoryType( + umf_cuda_memory_provider_params_handle_t hParams, + umf_usm_memory_type_t memoryType); + +umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void); + +#ifdef __cplusplus +} +#endif + +#endif /* UMF_PROVIDER_CUDA_H */ diff --git a/include/umf/providers/provider_devdax_memory.h b/include/umf/providers/provider_devdax_memory.h new file mode 100644 index 000000000..0fb5218bc --- /dev/null +++ b/include/umf/providers/provider_devdax_memory.h @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_DEVDAX_MEMORY_PROVIDER_H +#define UMF_DEVDAX_MEMORY_PROVIDER_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/// @cond +#define UMF_DEVDAX_RESULTS_START_FROM 2000 +/// @endcond + +struct umf_devdax_memory_provider_params_t; + +typedef struct umf_devdax_memory_provider_params_t + *umf_devdax_memory_provider_params_handle_t; + +/// @brief Create a struct to store parameters of the Devdax Memory Provider. +/// @param hParams [out] handle to the newly created parameters struct. +/// @param path [in] path of the device DAX. +/// @param size [in] size of the device DAX in bytes. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfDevDaxMemoryProviderParamsCreate( + umf_devdax_memory_provider_params_handle_t *hParams, const char *path, + size_t size); + +/// @brief Destroy parameters struct. +/// @param hParams [in] handle to the parameters of the Devdax Memory Provider. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfDevDaxMemoryProviderParamsDestroy( + umf_devdax_memory_provider_params_handle_t hParams); + +/// @brief Set a device DAX in the parameters struct. Overwrites the previous value. +/// It provides an ability to use the same instance of params to create multiple +/// instances of the provider for different DAX devices. +/// @param hParams [in] handle to the parameters of the Devdax Memory Provider. +/// @param path [in] path of the device DAX. +/// @param size [in] size of the device DAX in bytes. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfDevDaxMemoryProviderParamsSetDeviceDax( + umf_devdax_memory_provider_params_handle_t hParams, const char *path, + size_t size); + +/// @brief Set the protection flags in the parameters struct. +/// @param hParams [in] handle to the parameters of the Devdax Memory Provider. +/// @param protection [in] combination of 'umf_mem_protection_flags_t' flags. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfDevDaxMemoryProviderParamsSetProtection( + umf_devdax_memory_provider_params_handle_t hParams, unsigned protection); + +/// @brief Devdax Memory Provider operation results +typedef enum umf_devdax_memory_provider_native_error { + UMF_DEVDAX_RESULT_SUCCESS = UMF_DEVDAX_RESULTS_START_FROM, ///< Success + UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED, ///< Memory allocation failed + UMF_DEVDAX_RESULT_ERROR_ADDRESS_NOT_ALIGNED, ///< Allocated address is not aligned + UMF_DEVDAX_RESULT_ERROR_FREE_FAILED, ///< Memory deallocation failed + UMF_DEVDAX_RESULT_ERROR_PURGE_FORCE_FAILED, ///< Force purging failed +} umf_devdax_memory_provider_native_error_t; + +umf_memory_provider_ops_t *umfDevDaxMemoryProviderOps(void); + +#ifdef __cplusplus +} +#endif + +#endif /* UMF_DEVDAX_MEMORY_PROVIDER_H */ diff --git a/include/umf/providers/provider_file_memory.h b/include/umf/providers/provider_file_memory.h new file mode 100644 index 000000000..f652e2cb8 --- /dev/null +++ b/include/umf/providers/provider_file_memory.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_FILE_MEMORY_PROVIDER_H +#define UMF_FILE_MEMORY_PROVIDER_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/// @cond +#define UMF_FILE_RESULTS_START_FROM 3000 +/// @endcond + +struct umf_file_memory_provider_params_t; + +typedef struct umf_file_memory_provider_params_t + *umf_file_memory_provider_params_handle_t; + +/// @brief Create a struct to store parameters of the File Memory Provider. +/// @param hParams [out] handle to the newly created parameters struct. +/// @param path path to the file. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfFileMemoryProviderParamsCreate( + umf_file_memory_provider_params_handle_t *hParams, const char *path); + +/// @brief Destroy parameters struct. +/// @param hParams handle to the parameters of the File Memory Provider. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfFileMemoryProviderParamsDestroy( + umf_file_memory_provider_params_handle_t hParams); + +/// @brief Set the path in the parameters struct. +/// @param hParams handle to the parameters of the File Memory Provider. +/// @param path path to the file. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfFileMemoryProviderParamsSetPath( + umf_file_memory_provider_params_handle_t hParams, const char *path); + +/// @brief Set the protection in the parameters struct. +/// @param hParams handle to the parameters of the File Memory Provider. +/// @param protection protection. Combination of \p umf_mem_protection_flags_t flags +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfFileMemoryProviderParamsSetProtection( + umf_file_memory_provider_params_handle_t hParams, unsigned protection); + +/// @brief Set the visibility in the parameters struct. +/// @param hParams handle to the parameters of the File Memory Provider. +/// @param visibility memory visibility mode. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfFileMemoryProviderParamsSetVisibility( + umf_file_memory_provider_params_handle_t hParams, + umf_memory_visibility_t visibility); + +/// @brief File Memory Provider operation results +typedef enum umf_file_memory_provider_native_error { + UMF_FILE_RESULT_SUCCESS = UMF_FILE_RESULTS_START_FROM, ///< Success + UMF_FILE_RESULT_ERROR_ALLOC_FAILED, ///< Memory allocation failed + UMF_FILE_RESULT_ERROR_FREE_FAILED, ///< Memory deallocation failed + UMF_FILE_RESULT_ERROR_PURGE_FORCE_FAILED, ///< Force purging failed +} umf_file_memory_provider_native_error_t; + +umf_memory_provider_ops_t *umfFileMemoryProviderOps(void); + +#ifdef __cplusplus +} +#endif + +#endif /* UMF_FILE_MEMORY_PROVIDER_H */ diff --git a/include/umf/providers/provider_level_zero.h b/include/umf/providers/provider_level_zero.h index f0c2acfbc..f760c5724 100644 --- a/include/umf/providers/provider_level_zero.h +++ b/include/umf/providers/provider_level_zero.h @@ -8,26 +8,65 @@ #ifndef UMF_PROVIDER_LEVEL_ZERO_H #define UMF_PROVIDER_LEVEL_ZERO_H -#include "umf/memory_provider.h" +#include #ifdef __cplusplus extern "C" { #endif -/// @brief USM memory allocation type -typedef enum umf_usm_memory_type_t { - UMF_MEMORY_TYPE_UNKNOWN = 0, ///< The memory pointed to is of unknown type - UMF_MEMORY_TYPE_HOST, ///< The memory pointed to is a host allocation - UMF_MEMORY_TYPE_DEVICE, ///< The memory pointed to is a device allocation - UMF_MEMORY_TYPE_SHARED, ///< The memory pointed to is a shared ownership allocation -} umf_usm_memory_type_t; - -/// @brief Level Zero Memory Provider settings struct -typedef struct level_zero_memory_provider_params_t { - void *level_zero_context_handle; ///< Handle to the Level Zero context - void *level_zero_device_handle; ///< Handle to the Level Zero device - umf_usm_memory_type_t memory_type; ///< Allocation memory type -} level_zero_memory_provider_params_t; +typedef struct _ze_device_handle_t *ze_device_handle_t; +typedef struct _ze_context_handle_t *ze_context_handle_t; + +struct umf_level_zero_memory_provider_params_t; + +/// @brief handle to the parameters of the Level Zero Memory Provider. +typedef struct umf_level_zero_memory_provider_params_t + *umf_level_zero_memory_provider_params_handle_t; + +/// @brief Create a struct to store parameters of the Level Zero Memory Provider. +/// @param hParams [out] handle to the newly created parameters struct. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfLevelZeroMemoryProviderParamsCreate( + umf_level_zero_memory_provider_params_handle_t *hParams); + +/// @brief Destroy parameters struct. +/// @param hParams handle to the parameters of the Level Zero Memory Provider. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfLevelZeroMemoryProviderParamsDestroy( + umf_level_zero_memory_provider_params_handle_t hParams); + +/// @brief Set the Level Zero context handle in the parameters struct. +/// @param hParams handle to the parameters of the Level Zero Memory Provider. +/// @param hContext handle to the Level Zero context. Cannot be \p NULL. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfLevelZeroMemoryProviderParamsSetContext( + umf_level_zero_memory_provider_params_handle_t hParams, + ze_context_handle_t hContext); + +/// @brief Set the Level Zero device handle in the parameters struct. +/// @param hParams handle to the parameters of the Level Zero Memory Provider. +/// @param hDevice handle to the Level Zero device. Can be \p NULL if memory type is \p UMF_MEMORY_TYPE_HOST. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfLevelZeroMemoryProviderParamsSetDevice( + umf_level_zero_memory_provider_params_handle_t hParams, + ze_device_handle_t hDevice); + +/// @brief Set the memory type in the parameters struct. +/// @param hParams handle to the parameters of the Level Zero Memory Provider. +/// @param memoryType memory type. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfLevelZeroMemoryProviderParamsSetMemoryType( + umf_level_zero_memory_provider_params_handle_t hParams, + umf_usm_memory_type_t memoryType); + +/// @brief Set the resident devices in the parameters struct. +/// @param hParams handle to the parameters of the Level Zero Memory Provider. +/// @param hDevices array of devices for which the memory should be made resident. +/// @param deviceCount number of devices for which the memory should be made resident. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( + umf_level_zero_memory_provider_params_handle_t hParams, + ze_device_handle_t *hDevices, uint32_t deviceCount); umf_memory_provider_ops_t *umfLevelZeroMemoryProviderOps(void); diff --git a/include/umf/providers/provider_os_memory.h b/include/umf/providers/provider_os_memory.h index 1d4494547..a6bf43a7d 100644 --- a/include/umf/providers/provider_os_memory.h +++ b/include/umf/providers/provider_os_memory.h @@ -18,23 +18,6 @@ extern "C" { #define UMF_OS_RESULTS_START_FROM 1000 /// @endcond -/// @brief Protection of the memory allocations -typedef enum umf_mem_protection_flags_t { - UMF_PROTECTION_NONE = (1 << 0), ///< Memory allocations can not be accessed - UMF_PROTECTION_READ = (1 << 1), ///< Memory allocations can be read. - UMF_PROTECTION_WRITE = (1 << 2), ///< Memory allocations can be written. - UMF_PROTECTION_EXEC = (1 << 3), ///< Memory allocations can be executed. - /// @cond - UMF_PROTECTION_MAX // must be the last one - /// @endcond -} umf_mem_protection_flags_t; - -/// @brief Memory visibility mode -typedef enum umf_memory_visibility_t { - UMF_MEM_MAP_PRIVATE = 1, ///< private memory mapping - UMF_MEM_MAP_SHARED, ///< shared memory mapping (supported on Linux only) -} umf_memory_visibility_t; - /// @brief Memory binding mode /// Specifies how memory is bound to NUMA nodes on systems that support NUMA. /// Not every mode is supported on every system. @@ -77,32 +60,77 @@ typedef struct umf_numa_split_partition_t { unsigned target; } umf_numa_split_partition_t; -/// @brief Memory provider settings struct -typedef struct umf_os_memory_provider_params_t { - /// Combination of 'umf_mem_protection_flags_t' flags - unsigned protection; - /// memory visibility mode - umf_memory_visibility_t visibility; - /// (optional) a name of a shared memory file (valid only in case of the shared memory visibility) - char *shm_name; - - // NUMA config - /// ordered list of numa nodes - unsigned *numa_list; - /// length of numa_list - unsigned numa_list_len; - - /// Describes how node list is interpreted - umf_numa_mode_t numa_mode; - /// part size for interleave mode - 0 means default (system specific) - /// It might be rounded up because of HW constraints - size_t part_size; - - /// ordered list of the partitions for the split mode - umf_numa_split_partition_t *partitions; - /// len of the partitions array - unsigned partitions_len; -} umf_os_memory_provider_params_t; +struct umf_os_memory_provider_params_t; + +typedef struct umf_os_memory_provider_params_t + *umf_os_memory_provider_params_handle_t; + +/// @brief Create a struct to store parameters of the OS memory provider. +/// @param hParams [out] handle to the newly created parameters struct. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfOsMemoryProviderParamsCreate( + umf_os_memory_provider_params_handle_t *hParams); + +/// @brief Destroy parameters struct. +/// @param hParams handle to the parameters of the OS memory provider. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfOsMemoryProviderParamsDestroy( + umf_os_memory_provider_params_handle_t hParams); + +/// @brief Set protection flags for the OS memory provider. +/// @param hParams handle to the parameters of the OS memory provider. +/// @param protection combination of \p umf_mem_protection_flags_t flags. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfOsMemoryProviderParamsSetProtection( + umf_os_memory_provider_params_handle_t hParams, unsigned protection); + +/// @brief Set visibility mode for the OS memory provider. +/// @param hParams handle to the parameters of the OS memory provider. +/// @param visibility memory visibility mode. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfOsMemoryProviderParamsSetVisibility( + umf_os_memory_provider_params_handle_t hParams, + umf_memory_visibility_t visibility); + +/// @brief Set a name of a shared memory file for the OS memory provider. +/// @param hParams handle to the parameters of the OS memory provider. +/// @param shm_name a name of a shared memory file. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfOsMemoryProviderParamsSetShmName( + umf_os_memory_provider_params_handle_t hParams, const char *shm_name); + +/// @brief Set NUMA nodes for the OS memory provider. +/// @param hParams handle to the parameters of the OS memory provider. +/// @param numa_list ordered list of NUMA nodes. +/// @param numa_list_len length of the numa_list. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfOsMemoryProviderParamsSetNumaList( + umf_os_memory_provider_params_handle_t hParams, unsigned *numa_list, + unsigned numa_list_len); + +/// @brief Set NUMA mode for the OS memory provider. +/// @param hParams handle to the parameters of the OS memory provider. +/// @param numa_mode NUMA mode. Describes how node list is interpreted. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfOsMemoryProviderParamsSetNumaMode( + umf_os_memory_provider_params_handle_t hParams, umf_numa_mode_t numa_mode); + +/// @brief Set part size for the interleave mode. 0 means default (system specific) +/// It might be rounded up because of HW constraints. +/// @param hParams handle to the parameters of the OS memory provider. +/// @param part_size part size for interleave mode. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfOsMemoryProviderParamsSetPartSize( + umf_os_memory_provider_params_handle_t hParams, size_t part_size); + +/// @brief Set partitions for the split mode. +/// @param hParams handle to the parameters of the OS memory provider. +/// @param partitions ordered list of the partitions for the split mode. +/// @param partitions_len length of the partitions array. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfOsMemoryProviderParamsSetPartitions( + umf_os_memory_provider_params_handle_t hParams, + umf_numa_split_partition_t *partitions, unsigned partitions_len); /// @brief OS Memory Provider operation results typedef enum umf_os_memory_provider_native_error { @@ -118,23 +146,6 @@ typedef enum umf_os_memory_provider_native_error { umf_memory_provider_ops_t *umfOsMemoryProviderOps(void); -/// @brief Create default params for os memory provider -static inline umf_os_memory_provider_params_t -umfOsMemoryProviderParamsDefault(void) { - umf_os_memory_provider_params_t params = { - UMF_PROTECTION_READ | UMF_PROTECTION_WRITE, /* protection */ - UMF_MEM_MAP_PRIVATE, /* visibility mode */ - NULL, /* (optional) a name of a shared memory file (valid only in case of the shared memory visibility) */ - NULL, /* numa_list */ - 0, /* numa_list_len */ - UMF_NUMA_MODE_DEFAULT, /* numa_mode */ - 0, /* part_size */ - NULL, /* partitions */ - 0}; /* partitions_len*/ - - return params; -} - #ifdef __cplusplus } #endif diff --git a/licensing/third-party-programs.txt b/licensing/third-party-programs.txt index 54520c141..8ee09e2e9 100644 --- a/licensing/third-party-programs.txt +++ b/licensing/third-party-programs.txt @@ -422,4 +422,544 @@ _______________________________________________________________________________ _______________________________________________________________________________ +8. uthash: + + Copyright (c) 2005-2018, Troy D. Hanson http://troydhanson.github.com/uthash/ + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +_______________________________________________________________________________ + +9. google benchmark + + Copyright 2015 Google Inc. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +_______________________________________________________________________________ + +9. NVIDIA runtime headers + + Preface + ------- + + The Software License Agreement in Chapter 1 and the Supplement + in Chapter 2 contain license terms and conditions that govern + the use of NVIDIA software. By accepting this agreement, you + agree to comply with all the terms and conditions applicable + to the product(s) included herein. + + 1. License Agreement for NVIDIA Software Development Kits + --------------------------------------------------------- + + + Release Date: July 26, 2018 + --------------------------- + + + Important NoticeRead before downloading, installing, + copying or using the licensed software: + ------------------------------------------------------- + + This license agreement, including exhibits attached + ("Agreement”) is a legal agreement between you and NVIDIA + Corporation ("NVIDIA") and governs your use of a NVIDIA + software development kit (“SDK”). + + Each SDK has its own set of software and materials, but here + is a description of the types of items that may be included in + a SDK: source code, header files, APIs, data sets and assets + (examples include images, textures, models, scenes, videos, + native API input/output files), binary software, sample code, + libraries, utility programs, programming code and + documentation. + + This Agreement can be accepted only by an adult of legal age + of majority in the country in which the SDK is used. + + If you are entering into this Agreement on behalf of a company + or other legal entity, you represent that you have the legal + authority to bind the entity to this Agreement, in which case + “you” will mean the entity you represent. + + If you don’t have the required age or authority to accept + this Agreement, or if you don’t accept all the terms and + conditions of this Agreement, do not download, install or use + the SDK. + + You agree to use the SDK only for purposes that are permitted + by (a) this Agreement, and (b) any applicable law, regulation + or generally accepted practices or guidelines in the relevant + jurisdictions. + + + 1.1. License + + + 1.1.1. License Grant + + Subject to the terms of this Agreement, NVIDIA hereby grants + you a non-exclusive, non-transferable license, without the + right to sublicense (except as expressly provided in this + Agreement) to: + + 1. Install and use the SDK, + + 2. Modify and create derivative works of sample source code + delivered in the SDK, and + + 3. Distribute those portions of the SDK that are identified + in this Agreement as distributable, as incorporated in + object code format into a software application that meets + the distribution requirements indicated in this Agreement. + + + 1.1.2. Distribution Requirements + + These are the distribution requirements for you to exercise + the distribution grant: + + 1. Your application must have material additional + functionality, beyond the included portions of the SDK. + + 2. The distributable portions of the SDK shall only be + accessed by your application. + + 3. The following notice shall be included in modifications + and derivative works of sample source code distributed: + “This software contains source code provided by NVIDIA + Corporation.” + + 4. Unless a developer tool is identified in this Agreement + as distributable, it is delivered for your internal use + only. + + 5. The terms under which you distribute your application + must be consistent with the terms of this Agreement, + including (without limitation) terms relating to the + license grant and license restrictions and protection of + NVIDIA’s intellectual property rights. Additionally, you + agree that you will protect the privacy, security and + legal rights of your application users. + + 6. You agree to notify NVIDIA in writing of any known or + suspected distribution or use of the SDK not in compliance + with the requirements of this Agreement, and to enforce + the terms of your agreements with respect to distributed + SDK. + + + 1.1.3. Authorized Users + + You may allow employees and contractors of your entity or of + your subsidiary(ies) to access and use the SDK from your + secure network to perform work on your behalf. + + If you are an academic institution you may allow users + enrolled or employed by the academic institution to access and + use the SDK from your secure network. + + You are responsible for the compliance with the terms of this + Agreement by your authorized users. If you become aware that + your authorized users didn’t follow the terms of this + Agreement, you agree to take reasonable steps to resolve the + non-compliance and prevent new occurrences. + + + 1.1.4. Pre-Release SDK + + The SDK versions identified as alpha, beta, preview or + otherwise as pre-release, may not be fully functional, may + contain errors or design flaws, and may have reduced or + different security, privacy, accessibility, availability, and + reliability standards relative to commercial versions of + NVIDIA software and materials. Use of a pre-release SDK may + result in unexpected results, loss of data, project delays or + other unpredictable damage or loss. + + You may use a pre-release SDK at your own risk, understanding + that pre-release SDKs are not intended for use in production + or business-critical systems. + + NVIDIA may choose not to make available a commercial version + of any pre-release SDK. NVIDIA may also choose to abandon + development and terminate the availability of a pre-release + SDK at any time without liability. + + + 1.1.5. Updates + + NVIDIA may, at its option, make available patches, workarounds + or other updates to this SDK. Unless the updates are provided + with their separate governing terms, they are deemed part of + the SDK licensed to you as provided in this Agreement. You + agree that the form and content of the SDK that NVIDIA + provides may change without prior notice to you. While NVIDIA + generally maintains compatibility between versions, NVIDIA may + in some cases make changes that introduce incompatibilities in + future versions of the SDK. + + + 1.1.6. Third Party Licenses + + The SDK may come bundled with, or otherwise include or be + distributed with, third party software licensed by a NVIDIA + supplier and/or open source software provided under an open + source license. Use of third party software is subject to the + third-party license terms, or in the absence of third party + terms, the terms of this Agreement. Copyright to third party + software is held by the copyright holders indicated in the + third-party software or license. + + + 1.1.7. Reservation of Rights + + NVIDIA reserves all rights, title, and interest in and to the + SDK, not expressly granted to you under this Agreement. + + + 1.2. Limitations + + The following license limitations apply to your use of the + SDK: + + 1. You may not reverse engineer, decompile or disassemble, + or remove copyright or other proprietary notices from any + portion of the SDK or copies of the SDK. + + 2. Except as expressly provided in this Agreement, you may + not copy, sell, rent, sublicense, transfer, distribute, + modify, or create derivative works of any portion of the + SDK. For clarity, you may not distribute or sublicense the + SDK as a stand-alone product. + + 3. Unless you have an agreement with NVIDIA for this + purpose, you may not indicate that an application created + with the SDK is sponsored or endorsed by NVIDIA. + + 4. You may not bypass, disable, or circumvent any + encryption, security, digital rights management or + authentication mechanism in the SDK. + + 5. You may not use the SDK in any manner that would cause it + to become subject to an open source software license. As + examples, licenses that require as a condition of use, + modification, and/or distribution that the SDK be: + + a. Disclosed or distributed in source code form; + + b. Licensed for the purpose of making derivative works; + or + + c. Redistributable at no charge. + + 6. Unless you have an agreement with NVIDIA for this + purpose, you may not use the SDK with any system or + application where the use or failure of the system or + application can reasonably be expected to threaten or + result in personal injury, death, or catastrophic loss. + Examples include use in avionics, navigation, military, + medical, life support or other life critical applications. + NVIDIA does not design, test or manufacture the SDK for + these critical uses and NVIDIA shall not be liable to you + or any third party, in whole or in part, for any claims or + damages arising from such uses. + + 7. You agree to defend, indemnify and hold harmless NVIDIA + and its affiliates, and their respective employees, + contractors, agents, officers and directors, from and + against any and all claims, damages, obligations, losses, + liabilities, costs or debt, fines, restitutions and + expenses (including but not limited to attorney’s fees + and costs incident to establishing the right of + indemnification) arising out of or related to your use of + the SDK outside of the scope of this Agreement, or not in + compliance with its terms. + + + 1.3. Ownership + + 1. NVIDIA or its licensors hold all rights, title and + interest in and to the SDK and its modifications and + derivative works, including their respective intellectual + property rights, subject to your rights described in this + section. This SDK may include software and materials from + NVIDIA’s licensors, and these licensors are intended + third party beneficiaries that may enforce this Agreement + with respect to their intellectual property rights. + + 2. You hold all rights, title and interest in and to your + applications and your derivative works of the sample + source code delivered in the SDK, including their + respective intellectual property rights, subject to + NVIDIA’s rights described in this section. + + 3. You may, but don’t have to, provide to NVIDIA + suggestions, feature requests or other feedback regarding + the SDK, including possible enhancements or modifications + to the SDK. For any feedback that you voluntarily provide, + you hereby grant NVIDIA and its affiliates a perpetual, + non-exclusive, worldwide, irrevocable license to use, + reproduce, modify, license, sublicense (through multiple + tiers of sublicensees), and distribute (through multiple + tiers of distributors) it without the payment of any + royalties or fees to you. NVIDIA will use feedback at its + choice. NVIDIA is constantly looking for ways to improve + its products, so you may send feedback to NVIDIA through + the developer portal at https://developer.nvidia.com. + + + 1.4. No Warranties + + THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL + FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND + ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND + OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING, + BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS + FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE + ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO + WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF + DEALING OR COURSE OF TRADE. + + + 1.5. Limitation of Liability + + TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS + AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, + PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS + OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF + PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION + WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK, + WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH + OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), + PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF + LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES + TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS + AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE + NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS + LIMIT. + + These exclusions and limitations of liability shall apply + regardless if NVIDIA or its affiliates have been advised of + the possibility of such damages, and regardless of whether a + remedy fails its essential purpose. These exclusions and + limitations of liability form an essential basis of the + bargain between the parties, and, absent any of these + exclusions or limitations of liability, the provisions of this + Agreement, including, without limitation, the economic terms, + would be substantially different. + + + 1.6. Termination + + 1. This Agreement will continue to apply until terminated by + either you or NVIDIA as described below. + + 2. If you want to terminate this Agreement, you may do so by + stopping to use the SDK. + + 3. NVIDIA may, at any time, terminate this Agreement if: + + a. (i) you fail to comply with any term of this + Agreement and the non-compliance is not fixed within + thirty (30) days following notice from NVIDIA (or + immediately if you violate NVIDIA’s intellectual + property rights); + + b. (ii) you commence or participate in any legal + proceeding against NVIDIA with respect to the SDK; or + + c. (iii) NVIDIA decides to no longer provide the SDK in + a country or, in NVIDIA’s sole discretion, the + continued use of it is no longer commercially viable. + + 4. Upon any termination of this Agreement, you agree to + promptly discontinue use of the SDK and destroy all copies + in your possession or control. Your prior distributions in + accordance with this Agreement are not affected by the + termination of this Agreement. Upon written request, you + will certify in writing that you have complied with your + commitments under this section. Upon any termination of + this Agreement all provisions survive except for the + license grant provisions. + + + 1.7. General + + If you wish to assign this Agreement or your rights and + obligations, including by merger, consolidation, dissolution + or operation of law, contact NVIDIA to ask for permission. Any + attempted assignment not approved by NVIDIA in writing shall + be void and of no effect. NVIDIA may assign, delegate or + transfer this Agreement and its rights and obligations, and if + to a non-affiliate you will be notified. + + You agree to cooperate with NVIDIA and provide reasonably + requested information to verify your compliance with this + Agreement. + + This Agreement will be governed in all respects by the laws of + the United States and of the State of Delaware as those laws + are applied to contracts entered into and performed entirely + within Delaware by Delaware residents, without regard to the + conflicts of laws principles. The United Nations Convention on + Contracts for the International Sale of Goods is specifically + disclaimed. You agree to all terms of this Agreement in the + English language. + + The state or federal courts residing in Santa Clara County, + California shall have exclusive jurisdiction over any dispute + or claim arising out of this Agreement. Notwithstanding this, + you agree that NVIDIA shall still be allowed to apply for + injunctive remedies or an equivalent type of urgent legal + relief in any jurisdiction. + + If any court of competent jurisdiction determines that any + provision of this Agreement is illegal, invalid or + unenforceable, such provision will be construed as limited to + the extent necessary to be consistent with and fully + enforceable under the law and the remaining provisions will + remain in full force and effect. Unless otherwise specified, + remedies are cumulative. + + Each party acknowledges and agrees that the other is an + independent contractor in the performance of this Agreement. + + The SDK has been developed entirely at private expense and is + “commercial items” consisting of “commercial computer + software” and “commercial computer software + documentation” provided with RESTRICTED RIGHTS. Use, + duplication or disclosure by the U.S. Government or a U.S. + Government subcontractor is subject to the restrictions in + this Agreement pursuant to DFARS 227.7202-3(a) or as set forth + in subparagraphs (c)(1) and (2) of the Commercial Computer + Software - Restricted Rights clause at FAR 52.227-19, as + applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas + Expressway, Santa Clara, CA 95051. + + The SDK is subject to United States export laws and + regulations. You agree that you will not ship, transfer or + export the SDK into any country, or use the SDK in any manner, + prohibited by the United States Bureau of Industry and + Security or economic sanctions regulations administered by the + U.S. Department of Treasury’s Office of Foreign Assets + Control (OFAC), or any applicable export laws, restrictions or + regulations. These laws include restrictions on destinations, + end users and end use. By accepting this Agreement, you + confirm that you are not a resident or citizen of any country + currently embargoed by the U.S. and that you are not otherwise + prohibited from receiving the SDK. + + Any notice delivered by NVIDIA to you under this Agreement + will be delivered via mail, email or fax. You agree that any + notices that NVIDIA sends you electronically will satisfy any + legal communication requirements. Please direct your legal + notices or other correspondence to NVIDIA Corporation, 2788 + San Tomas Expressway, Santa Clara, California 95051, United + States of America, Attention: Legal Department. + + This Agreement and any exhibits incorporated into this + Agreement constitute the entire agreement of the parties with + respect to the subject matter of this Agreement and supersede + all prior negotiations or documentation exchanged between the + parties relating to this SDK license. Any additional and/or + conflicting terms on documents issued by you are null, void, + and invalid. Any amendment or waiver under this Agreement + shall be in writing and signed by representatives of both + parties. + + + 2. CUDA Toolkit Supplement to Software License Agreement for + NVIDIA Software Development Kits + ------------------------------------------------------------ + + + Release date: August 16, 2018 + ----------------------------- + + The terms in this supplement govern your use of the NVIDIA + CUDA Toolkit SDK under the terms of your license agreement + (“Agreement”) as modified by this supplement. Capitalized + terms used but not defined below have the meaning assigned to + them in the Agreement. + + This supplement is an exhibit to the Agreement and is + incorporated as an integral part of the Agreement. In the + event of conflict between the terms in this supplement and the + terms in the Agreement, the terms in this supplement govern. + + + 2.1. License Scope + + The SDK is licensed for you to develop applications only for + use in systems with NVIDIA GPUs. + + + 2.2. Distribution + + The portions of the SDK that are distributable under the + Agreement are listed in Attachment A. + + + 2.3. Operating Systems + + Those portions of the SDK designed exclusively for use on the + Linux or FreeBSD operating systems, or other operating systems + derived from the source code to these operating systems, may + be copied and redistributed for use in accordance with this + Agreement, provided that the object code files are not + modified in any way (except for unzipping of compressed + files). + + + 2.4. Audio and Video Encoders and Decoders + + You acknowledge and agree that it is your sole responsibility + to obtain any additional third-party licenses required to + make, have made, use, have used, sell, import, and offer for + sale your products or services that include or incorporate any + third-party software and content relating to audio and/or + video encoders and decoders from, including but not limited + to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A., + MPEG-LA, and Coding Technologies. NVIDIA does not grant to you + under this Agreement any necessary patent or other rights with + respect to any audio and/or video encoders and decoders. + + + 2.5. Licensing + + If the distribution terms in this Agreement are not suitable + for your organization, or for any questions regarding this + Agreement, please contact NVIDIA at + nvidia-compute-license-questions@nvidia.com. + +_______________________________________________________________________________ + *Other names and brands may be claimed as the property of others. diff --git a/scripts/coverage/coverage_capture.sh b/scripts/coverage/coverage_capture.sh new file mode 100755 index 000000000..c77f1b141 --- /dev/null +++ b/scripts/coverage/coverage_capture.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# This script calculates coverage for a single build + +set -e + +[ "$1" != "" ] && OUTPUT_NAME="$1" || OUTPUT_NAME="output_coverage" + +set -x + +lcov --capture --directory . \ +--exclude "/usr/*" \ +--exclude "*/build/*" \ +--exclude "*/benchmark/*" \ +--exclude "*/examples/*" \ +--exclude "*/test/*" \ +--exclude "*/src/critnib/*" \ +--exclude "*/src/ravl/*" \ +--exclude "*proxy_lib_new_delete.h" \ +--output-file $OUTPUT_NAME || \ + ( echo "RETRY after ERROR !!!:" && \ + lcov --capture --directory . \ + --exclude "/usr/*" \ + --exclude "*/build/*" \ + --exclude "*/benchmark/*" \ + --exclude "*/examples/*" \ + --exclude "*/test/*" \ + --exclude "*/src/critnib/*" \ + --exclude "*/src/ravl/*" \ + --exclude "*proxy_lib_new_delete.h" \ + --ignore-errors mismatch,unused,negative,corrupt \ + --output-file $OUTPUT_NAME ) + +# Most common UMF source code directory on most GH CI runners +COMMON_UMF_DIR=/home/runner/work/unified-memory-framework/unified-memory-framework + +# Get the current UMF source code directory +# This is ${CURRENT_UMF_DIR}/scripts/coverage/coverage_capture.sh file, so +CURRENT_UMF_DIR=$(realpath $(dirname $0)/../..) + +# Coverage (lcov) has to be run in the same directory on all runners: +# /home/runner/work/unified-memory-framework/unified-memory-framework/build +# to be able to merge all results, so we have to replace the paths if they are different: +if [ "$CURRENT_UMF_DIR" != "$COMMON_UMF_DIR" ]; then + sed -i "s|$CURRENT_UMF_DIR|$COMMON_UMF_DIR|g" $OUTPUT_NAME +fi diff --git a/scripts/coverage/merge_coverage_files.sh b/scripts/coverage/merge_coverage_files.sh new file mode 100755 index 000000000..193600556 --- /dev/null +++ b/scripts/coverage/merge_coverage_files.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# +# Arguments: +# +# This script looks for "${PREFIX}-*" lcov output files in the current directory, +# merges them and saves the merged output in the $OUTPUT_NAME file. +# + +[ "$1" != "" ] && PREFIX="$1" || PREFIX="exports-coverage" +[ "$2" != "" ] && OUTPUT_NAME="$2" || OUTPUT_NAME="total_coverage" + +OPTS="" +for file in $(ls -1 ${PREFIX}-*); do + OPTS="$OPTS -a $file" +done + +set -x + +lcov $OPTS -o $OUTPUT_NAME || \ + ( echo "RETRY after ERROR !!!:" && \ + lcov $OPTS \ + --ignore-errors mismatch,unused,negative,corrupt \ + --output-file $OUTPUT_NAME ) diff --git a/scripts/docs_config/api.rst b/scripts/docs_config/api.rst index 1233f59f7..7f734cad2 100644 --- a/scripts/docs_config/api.rst +++ b/scripts/docs_config/api.rst @@ -80,6 +80,19 @@ and operate on the provider. .. doxygenfile:: memory_provider.h :sections: define enum typedef func var +Coarse Provider +------------------------------------------ + +A memory provider that can provide memory from: + +1) A given pre-allocated buffer (the fixed-size memory provider option) or +2) From an additional upstream provider (e.g. provider that does not support + the free() operation like the File memory provider or the DevDax memory + provider - see below). + +.. doxygenfile:: provider_coarse.h + :sections: define enum typedef func var + OS Memory Provider ------------------------------------------ @@ -96,6 +109,22 @@ A memory provider that provides memory from L0 device. .. doxygenfile:: provider_level_zero.h :sections: define enum typedef func var +DevDax Memory Provider +------------------------------------------ + +A memory provider that provides memory from a device DAX (a character device file /dev/daxX.Y). + +.. doxygenfile:: provider_devdax_memory.h + :sections: define enum typedef func var + +File Memory Provider +------------------------------------------ + +A memory provider that provides memory by mapping a regular, extendable file. + +.. doxygenfile:: provider_file_memory.h + :sections: define enum typedef func var + Memspace ========================================== @@ -116,6 +145,15 @@ Mempolicy .. doxygenfile:: mempolicy.h :sections: define enum typedef func +Memtarget +========================================== + +TODO: Add general information about memtargets. + +Memtarget +------------------------------------------ +.. doxygenfile:: memtarget.h + :sections: define enum typedef func Inter-Process Communication ========================================== diff --git a/scripts/docs_config/conf.py b/scripts/docs_config/conf.py index b93d7d977..28c9b5f9f 100644 --- a/scripts/docs_config/conf.py +++ b/scripts/docs_config/conf.py @@ -22,7 +22,7 @@ author = "Intel" # The full version, including alpha/beta/rc tags -release = "0.9.0" +release = "0.10.0" # -- General configuration --------------------------------------------------- diff --git a/scripts/docs_config/examples.rst b/scripts/docs_config/examples.rst index 4098583a6..c58e7fc22 100644 --- a/scripts/docs_config/examples.rst +++ b/scripts/docs_config/examples.rst @@ -31,11 +31,20 @@ the OS Memory Provider API:: #include "umf/providers/provider_os_memory.h" -Get a pointer to the OS memory provider operations struct and -a copy of default parameters:: +Get a pointer to the OS memory provider operations struct:: umf_memory_provider_ops_t *provider_ops = umfOsMemoryProviderOps(); - umf_os_memory_provider_params_t params = umfOsMemoryProviderParamsDefault(); + +Get a default OS memory provider parameters. The handle to the parameters object +is returned by the :any:`umfOsMemoryProviderParamsCreate` function:: + + umf_os_memory_provider_params_handle_t params = NULL; + + res = umfOsMemoryProviderParamsCreate(¶ms); + if (res != UMF_RESULT_SUCCESS) { + printf("Failed to create OS memory provider params!\n"); + return -1; + } The handle to created memory ``provider`` object is returned as the last argument of :any:`umfMemoryProviderCreate`:: @@ -43,7 +52,10 @@ of :any:`umfMemoryProviderCreate`:: umf_memory_provider_handle_t provider; umfMemoryProviderCreate(provider_ops, ¶ms, &provider); -With this handle we can allocate a chunk of memory, call :any:`umfMemoryProviderAlloc`:: +The ``params`` object can be destroyed after the provider is created:: + umfOsMemoryProviderParamsDestroy(params); + +With the ``provider`` handle we can allocate a chunk of memory, call :any:`umfMemoryProviderAlloc`:: size_t alloc_size = 5000; size_t alignment = 0; @@ -111,7 +123,23 @@ Freeing memory is as easy as can be:: GPU shared memory ============================================================================== -You can find the full example code in the `examples/gpu_shared_memory/gpu_shared_memory.c`_ file +You can find the full example code in the `examples/level_zero_shared_memory/level_zero_shared_memory.c`_ file +or `examples/cuda_shared_memory/cuda_shared_memory.c`_ file in the UMF repository. + +TODO + +Memspace +============================================================================== + +You can find the full examples code in the `examples/memspace`_ directory +in the UMF repository. + +TODO + +Custom memory provider +============================================================================== + +You can find the full examples code in the `examples/custom_file_provider/custom_file_provider.c`_ file in the UMF repository. TODO @@ -166,12 +194,15 @@ to another process it can be opened by the :any:`umfOpenIPCHandle` function. .. code-block:: c + umf_ipc_handler_handle_t ipc_handler = 0; + umf_result = umfPoolGetIPCHandler(consumer_pool, &ipc_handler); + void *mapped_buf = NULL; - umf_result = umfOpenIPCHandle(consumer_pool, ipc_handle, &mapped_buf); + umf_result = umfOpenIPCHandle(ipc_handler, ipc_handle, &mapped_buf); -The :any:`umfOpenIPCHandle` function requires the memory pool handle and the IPC handle as input parameters. It maps +The :any:`umfOpenIPCHandle` function requires the IPC handler and the IPC handle as input parameters. The IPC handler maps the handle to the current process address space and returns the pointer to the same memory region that was allocated -in the producer process. +in the producer process. To retrieve the IPC handler, the :any:`umfPoolGetIPCHandler` function is used. .. note:: The virtual addresses of the memory region referred to by the IPC handle may not be the same in the producer and consumer processes. @@ -193,8 +224,11 @@ function is called on the consumer side. The memory mappings on the consumer sid the :any:`umfCloseIPCHandle` function is called. .. _examples/basic/basic.c: https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/basic.c -.. _examples/gpu_shared_memory/gpu_shared_memory.c: https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/gpu_shared_memory/gpu_shared_memory.c +.. _examples/level_zero_shared_memory/level_zero_shared_memory.c: https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/level_zero_shared_memory/level_zero_shared_memory.c +.. _examples/cuda_shared_memory/cuda_shared_memory.c: https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/cuda_shared_memory/cuda_shared_memory.c .. _examples/ipc_level_zero/ipc_level_zero.c: https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/ipc_level_zero/ipc_level_zero.c +.. _examples/custom_file_provider/custom_file_provider.c: https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/custom_file_provider/custom_file_provider.c +.. _examples/memspace: https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/memspace/ .. _README: https://github.com/oneapi-src/unified-memory-framework/blob/main/README.md#memory-pool-managers .. _umf/ipc.h: https://github.com/oneapi-src/unified-memory-framework/blob/main/include/umf/ipc.h .. _provider_os_memory.h: https://github.com/oneapi-src/unified-memory-framework/blob/main/include/umf/providers/provider_os_memory.h diff --git a/scripts/docs_config/introduction.rst b/scripts/docs_config/introduction.rst index d47439047..f90b26b41 100644 --- a/scripts/docs_config/introduction.rst +++ b/scripts/docs_config/introduction.rst @@ -99,7 +99,7 @@ defined pool allocators if they implement the UMF interface. Memory Pools ============ -A memory pool consists of a pool allocator and a memory provider instancies +A memory pool consists of a pool allocator and a memory provider instances along with their properties and allocation policies. Memory pools are used by the :ref:`allocation API ` as a first argument. There is also a possibility to retrieve a memory pool from an existing memory pointer that points to a memory diff --git a/scripts/qemu/qemu_config.py b/scripts/qemu/parse_config.py similarity index 70% rename from scripts/qemu/qemu_config.py rename to scripts/qemu/parse_config.py index 20dc67c50..3d14fb344 100644 --- a/scripts/qemu/qemu_config.py +++ b/scripts/qemu/parse_config.py @@ -1,3 +1,10 @@ +""" + Copyright (C) 2023-2024 Intel Corporation + + Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +""" + import re import subprocess # nosec import sys @@ -6,10 +13,13 @@ import psutil import shutil -# If you want to manually run this script please install deps by: pip install -r requirements.txt -# To get virsh please install libvirt-clients +# This script parses the topology xml file and returns QEMU arguments. +# +# Before running this script: +# - install python deps for this script: pip install -r requirements.txt +# - install 'libvirt-clients' package (for virsh) # -# Enable verbose mode by using environment variable ENABLE_VERBOSE=1 +# Enable verbose mode by setting environment variable: ENABLE_VERBOSE=1 TopologyCfg = collections.namedtuple( "TopologyCfg", ["name", "hmat", "cpu_model", "cpu_options", "mem_options"] @@ -20,7 +30,7 @@ def enable_verbose(): """ - Parse command line arguments + Check if env var ENABLE_VERBOSE is set and enable verbose mode """ global verbose_mode verbose_mode = os.getenv("ENABLE_VERBOSE", False) @@ -43,9 +53,13 @@ def parse_topology_xml(tpg_file_name: str) -> TopologyCfg: result.check_returncode() libvirt_args = result.stdout.decode("utf-8").strip() + if verbose_mode != False: + print(f"\nFull libvirt_args: {libvirt_args}\n") + + hmat_search = re.search(r"hmat=(\w+)", libvirt_args) tpg_cfg = { "name": re.search(r"guest=(\w+)", libvirt_args).group(1), - "hmat": "hmat=on" in libvirt_args, + "hmat": hmat_search.group(0) if hmat_search else "hmat=off", "cpu_model": re.search(r"cpu (\S+)", libvirt_args).group(1), "cpu_options": re.search("(?=-smp)(.*)threads=[0-9]+", libvirt_args).group( 0 @@ -67,7 +81,7 @@ def parse_topology_xml(tpg_file_name: str) -> TopologyCfg: except subprocess.CalledProcessError: sys.exit(f"\n XML file: {tpg_file_name} error in virsh parsing") except Exception: - sys.exit(f"\n Provided file is missing or missing virsh.") + sys.exit(f"\n Provided file ({tpg_file_name}) is missing or missing virsh.") return tpg @@ -76,13 +90,16 @@ def get_qemu_args(tpg_file_name: str) -> str: Get QEMU arguments from topology xml file """ tpg = parse_topology_xml(tpg_file_name) - qemu_args = f"-name {tpg.name} {calculate_memory(tpg)} -cpu {tpg.cpu_model} {tpg.cpu_options} {tpg.mem_options}" + qemu_args = ( + f"-machine q35,usb=off,{tpg.hmat} -name {tpg.name} " + f"{calculate_memory(tpg)} -cpu {tpg.cpu_model} {tpg.cpu_options} {tpg.mem_options}" + ) return qemu_args def calculate_memory(tpg: TopologyCfg) -> str: """ - Memory used by QEMU + Total memory required by given QEMU config """ if tpg.mem_options: mem_needed = 0 @@ -105,4 +122,6 @@ def calculate_memory(tpg: TopologyCfg) -> str: tpg_file_name = sys.argv[1] else: sys.exit(f"\n Usage: {sys.argv[0]} ") + + # Print QEMU arguments as a result of this script print(get_qemu_args(tpg_file_name)) diff --git a/scripts/qemu/run-build.sh b/scripts/qemu/run-build.sh index 91c2e4f61..06d6043f6 100755 --- a/scripts/qemu/run-build.sh +++ b/scripts/qemu/run-build.sh @@ -5,15 +5,16 @@ set -e -repo=$1 -branch=$2 +[ "$1" = "COVERAGE" ] && COVERAGE=ON || COVERAGE=OFF -echo password | sudo -Sk apt update -echo password | sudo -Sk apt install -y git cmake gcc g++ numactl libnuma-dev libhwloc-dev libjemalloc-dev libtbb-dev pkg-config valgrind hwloc +# This is ${UMF_DIR}/scripts/qemu/run-build.sh file, so +UMF_DIR=$(dirname $0)/../.. +cd $UMF_DIR +pwd -git clone $repo umf -cd umf -git checkout $branch +echo password | sudo -Sk apt-get update +echo password | sudo -Sk apt-get install -y git cmake gcc g++ pkg-config \ + numactl libnuma-dev hwloc libhwloc-dev libjemalloc-dev libtbb-dev valgrind lcov mkdir build cd build @@ -21,11 +22,13 @@ cd build cmake .. \ -DCMAKE_BUILD_TYPE=Debug \ -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON \ + -DUMF_BUILD_CUDA_PROVIDER=ON \ -DUMF_FORMAT_CODE_STYLE=OFF \ -DUMF_DEVELOPER_MODE=ON \ -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON \ -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON \ -DUMF_BUILD_EXAMPLES=ON \ + -DUMF_USE_COVERAGE=${COVERAGE} \ -DUMF_TESTS_FAIL_ON_SKIP=ON make -j $(nproc) diff --git a/scripts/qemu/run-tests.sh b/scripts/qemu/run-tests.sh index 00fdfb8fd..9d855590b 100755 --- a/scripts/qemu/run-tests.sh +++ b/scripts/qemu/run-tests.sh @@ -3,8 +3,23 @@ # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# If env var SHORT_RUN is set to true, part of the tests are skipped here. +# For coverage, OS_FULL_NAME env variable has to be set to the name of the OS. + set -e +COVERAGE=$1 +XML_CONFIG_FILE=$2 + +CONFIG_NAME=$(echo $XML_CONFIG_FILE | cut -d. -f1) # remove the '.xml' extension +COVERAGE_DIR=${HOME}/coverage +mkdir -p $COVERAGE_DIR + +# This is ${UMF_DIR}/scripts/qemu/run-build.sh file, so +UMF_DIR=$(dirname $0)/../.. +cd $UMF_DIR +UMF_DIR=$(pwd) + # Drop caches, restores free memory on NUMA nodes echo password | sudo sync; echo password | sudo sh -c "/usr/bin/echo 3 > /proc/sys/vm/drop_caches" @@ -13,14 +28,23 @@ echo password | sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" numactl -H -cd umf/build +cd build +echo "## Running all tests ..." ctest --verbose -# run tests bound to a numa node +echo "## Running tests bound to a numa node 0 and node 1 ..." numactl -N 0 ctest --output-on-failure numactl -N 1 ctest --output-on-failure -# run tests under valgrind -echo "Running tests under valgrind memcheck ..." -../test/test_valgrind.sh .. . memcheck +if [ "$COVERAGE" = "COVERAGE" ]; then + COVERAGE_FILE_NAME=exports-coverage-qemu-${OS_FULL_NAME}-${CONFIG_NAME} + echo "COVERAGE_FILE_NAME: $COVERAGE_FILE_NAME" + ../scripts/coverage/coverage_capture.sh $COVERAGE_FILE_NAME + mv ./$COVERAGE_FILE_NAME $COVERAGE_DIR +fi +# run tests under valgrind only on long run or for default configuration +if [ "${SHORT_RUN}" != "true" ] || [ "${CONFIG_NAME}" == "default" ]; then + echo "## Running tests under valgrind memcheck ..." + ../test/test_valgrind.sh .. . memcheck +fi diff --git a/scripts/qemu/start_qemu.sh b/scripts/qemu/start_qemu.sh index 0962dd98a..c4758ac17 100755 --- a/scripts/qemu/start_qemu.sh +++ b/scripts/qemu/start_qemu.sh @@ -3,28 +3,25 @@ # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -set -x set -e -config_file=$1 +# The config file name (should be located in ./configs/ sub-dir) +config_name=$1 -python3 scripts/qemu/qemu_config.py $config_file +# Parse the config file to get topology info and fix escaped single quotes +parsed_config=$(python3 "$(dirname $0)/parse_config.py" "$(dirname $0)/configs/${config_name}" | sed s/''\''/'/g) -if grep -q '' "$config_file"; then - hmat="on" -else - hmat="off" -fi +set -x sudo qemu-system-x86_64 \ - -drive file=./ubuntu-23.04-server-cloudimg-amd64.img,format=qcow2,index=0,media=disk,id=hd \ + -drive file=./qemu_image.img,format=qcow2,index=0,media=disk,id=hd \ -cdrom ./ubuntu-cloud-init.iso \ - -machine q35,usb=off,hmat=$hmat \ -enable-kvm \ -net nic -net user,hostfwd=tcp::2222-:22 \ - $(python3 scripts/qemu/qemu_config.py $config_file | sed s/''\''/'/g) \ + ${parsed_config} \ -daemonize -display none +# Enable ssh connection to the VM until ssh-keyscan -p 2222 -H 127.0.0.1 >> ~/.ssh/known_hosts 2>/dev/null; do echo "Waiting for SSH..." sleep 1 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0ebd1160f..b4736ed0f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,39 +4,18 @@ include(${UMF_CMAKE_SOURCE_DIR}/cmake/helpers.cmake) +set(UMF_LEVEL_ZERO_INCLUDE_DIR + "" + CACHE PATH "Directory containing the Level Zero headers") +set(UMF_CUDA_INCLUDE_DIR + "" + CACHE PATH "Directory containing the CUDA headers") + # Compile definitions for UMF library. # # TODO: Cleanup the compile definitions across all the CMake files set(UMF_COMMON_COMPILE_DEFINITIONS UMF_VERSION=${UMF_VERSION}) -if(UMF_BUILD_LEVEL_ZERO_PROVIDER) - include(FetchContent) - - set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") - set(LEVEL_ZERO_LOADER_TAG v1.16.1) - - message( - STATUS - "Installing level-zero ${LEVEL_ZERO_LOADER_TAG} from ${LEVEL_ZERO_LOADER_REPO} ..." - ) - - FetchContent_Declare( - level-zero-loader - GIT_REPOSITORY ${LEVEL_ZERO_LOADER_REPO} - GIT_TAG ${LEVEL_ZERO_LOADER_TAG} - EXCLUDE_FROM_ALL) - - FetchContent_GetProperties(level-zero-loader) - if(NOT level-zero-loader_POPULATED) - FetchContent_Populate(level-zero-loader) - endif() - - set(LEVEL_ZERO_INCLUDE_DIRS - ${level-zero-loader_SOURCE_DIR}/include - CACHE PATH "Path to Level Zero Headers") - message(STATUS "Level Zero include directory: ${LEVEL_ZERO_INCLUDE_DIRS}") -endif() - add_subdirectory(utils) set(UMF_LIBS $) @@ -67,80 +46,64 @@ set(UMF_SOURCES ${BA_SOURCES} libumf.c ipc.c + ipc_cache.c memory_pool.c memory_provider.c memory_provider_get_last_failed.c - memory_target.c + memtarget.c mempolicy.c memspace.c + memspaces/memspace_host_all.c + memspaces/memspace_highest_capacity.c + memspaces/memspace_highest_bandwidth.c + memspaces/memspace_lowest_latency.c + memspaces/memspace_numa.c + provider/provider_coarse.c + provider/provider_cuda.c + provider/provider_devdax_memory.c + provider/provider_file_memory.c + provider/provider_level_zero.c + provider/provider_os_memory.c provider/provider_tracking.c critnib/critnib.c + ravl/ravl.c pool/pool_proxy.c pool/pool_scalable.c) if(NOT UMF_DISABLE_HWLOC) - set(UMF_SOURCES ${UMF_SOURCES} ${HWLOC_DEPENDENT_SOURCES}) + set(UMF_SOURCES ${UMF_SOURCES} ${HWLOC_DEPENDENT_SOURCES} + memtargets/memtarget_numa.c) + set(UMF_LIBS ${UMF_LIBS} ${LIBHWLOC_LIBRARIES}) + set(UMF_PRIVATE_LIBRARY_DIRS ${UMF_PRIVATE_LIBRARY_DIRS} + ${LIBHWLOC_LIBRARY_DIRS}) +else() + set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} + "UMF_NO_HWLOC=1") endif() set(UMF_SOURCES_LINUX libumf_linux.c) - set(UMF_SOURCES_MACOSX libumf_linux.c) - set(UMF_SOURCES_WINDOWS libumf_windows.c) -set(UMF_SOURCES_COMMON_LINUX_MACOSX - provider/provider_os_memory.c - provider/provider_os_memory_posix.c - memory_targets/memory_target_numa.c - memspaces/memspace_numa.c - memspaces/memspace_host_all.c - memspaces/memspace_highest_capacity.c - memspaces/memspace_highest_bandwidth.c - memspaces/memspace_lowest_latency.c) - -if(NOT UMF_DISABLE_HWLOC) - set(UMF_SOURCES_LINUX - ${UMF_SOURCES_LINUX} ${UMF_SOURCES_COMMON_LINUX_MACOSX} - provider/provider_os_memory_linux.c) - - set(UMF_SOURCES_MACOSX - ${UMF_SOURCES_MACOSX} ${UMF_SOURCES_COMMON_LINUX_MACOSX} - provider/provider_os_memory_macosx.c) - - set(UMF_SOURCES_WINDOWS - ${UMF_SOURCES_WINDOWS} provider/provider_os_memory.c - provider/provider_os_memory_windows.c) - - set(UMF_LIBS ${UMF_LIBS} ${LIBHWLOC_LIBRARIES}) - - if(NOT WINDOWS) - add_optional_symbol(umfMemspaceCreateFromNumaArray) - add_optional_symbol(umfMemspaceHighestBandwidthGet) - add_optional_symbol(umfMemspaceHighestCapacityGet) - add_optional_symbol(umfMemspaceHostAllGet) - add_optional_symbol(umfMemspaceLowestLatencyGet) - endif() +# Add compile definitions to handle unsupported functions +if(NOT UMF_BUILD_CUDA_PROVIDER) + set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} + "UMF_NO_CUDA_PROVIDER=1") endif() - -if(WINDOWS) - message(STATUS "UMF_OPTIONAL_SYMBOLS: ${UMF_OPTIONAL_SYMBOLS_WINDOWS}") -else() - message(STATUS "UMF_OPTIONAL_SYMBOLS: ${UMF_OPTIONAL_SYMBOLS_LINUX}") +if(NOT UMF_BUILD_LEVEL_ZERO_PROVIDER) + set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} + "UMF_NO_LEVEL_ZERO_PROVIDER=1") +endif() +if(UMF_DISABLE_HWLOC OR WINDOWS) + set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} + "UMF_NO_DEVDAX_PROVIDER=1") + set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} + "UMF_NO_FILE_PROVIDER=1") endif() - -# Configure the DEF file based on whether Level Zero provider is built -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libumf.def.in" - "${CMAKE_CURRENT_BINARY_DIR}/libumf.def" @ONLY) - -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libumf.map.in" - "${CMAKE_CURRENT_BINARY_DIR}/libumf.map" @ONLY) - -set(UMF_PRIVATE_LIBRARY_DIRS ${UMF_PRIVATE_LIBRARY_DIRS} - ${LIBHWLOC_LIBRARY_DIRS}) if(LINUX) set(UMF_SOURCES ${UMF_SOURCES} ${UMF_SOURCES_LINUX}) - set(UMF_LIBS ${UMF_LIBS} dl rt) # librt for shm_open() + set(UMF_LIBS ${UMF_LIBS} dl) elseif(WINDOWS) set(UMF_SOURCES ${UMF_SOURCES} ${UMF_SOURCES_WINDOWS}) @@ -153,6 +116,10 @@ elseif(MACOSX) endif() if(UMF_BUILD_SHARED_LIBRARY) + if(UMF_INSTALL_RPATH) + set(CMAKE_INSTALL_RPATH "${UMF_INSTALL_RPATH}") + endif() + if(NOT UMF_DISABLE_HWLOC) set(HWLOC_LIB ${UMF_HWLOC_NAME}) endif() @@ -161,8 +128,8 @@ if(UMF_BUILD_SHARED_LIBRARY) TYPE SHARED SRCS ${UMF_SOURCES} LIBS ${UMF_LIBS} ${HWLOC_LIB} - LINUX_MAP_FILE ${CMAKE_CURRENT_BINARY_DIR}/libumf.map - WINDOWS_DEF_FILE ${CMAKE_CURRENT_BINARY_DIR}/libumf.def) + LINUX_MAP_FILE ${CMAKE_CURRENT_SOURCE_DIR}/libumf.map + WINDOWS_DEF_FILE ${CMAKE_CURRENT_SOURCE_DIR}/libumf.def) set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} "UMF_SHARED_LIBRARY") set_target_properties( @@ -178,11 +145,6 @@ else() LIBS ${UMF_LIBS}) endif() -if(UMF_DISABLE_HWLOC) - set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} - UMF_NO_HWLOC=1) -endif() - if(UMF_LINK_HWLOC_STATICALLY) add_dependencies(umf ${UMF_HWLOC_NAME}) endif() @@ -192,8 +154,6 @@ target_link_directories(umf PRIVATE ${UMF_PRIVATE_LIBRARY_DIRS}) target_compile_definitions(umf PRIVATE ${UMF_COMMON_COMPILE_DEFINITIONS}) if(UMF_BUILD_LEVEL_ZERO_PROVIDER) - target_sources(umf PRIVATE provider/provider_level_zero.c) - if(LINUX) # WA for error ze_api.h:14234:20: no newline at end of file # [-Werror,-Wnewline-eof] @@ -206,6 +166,11 @@ if(UMF_BUILD_LEVEL_ZERO_PROVIDER) "UMF_BUILD_LEVEL_ZERO_PROVIDER=1") endif() +if(UMF_BUILD_CUDA_PROVIDER) + set(UMF_COMPILE_DEFINITIONS ${UMF_COMPILE_DEFINITIONS} + "UMF_BUILD_CUDA_PROVIDER=1") +endif() + add_library(${PROJECT_NAME}::umf ALIAS umf) if(LIBHWLOC_INCLUDE_DIRS) @@ -216,22 +181,26 @@ if(LEVEL_ZERO_INCLUDE_DIRS) target_include_directories(umf PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) endif() +if(CUDA_INCLUDE_DIRS) + target_include_directories(umf PRIVATE ${CUDA_INCLUDE_DIRS}) +endif() + target_include_directories( umf PUBLIC $ $ + $ $ $ $ - $ + $ + $ $) install(TARGETS umf EXPORT ${PROJECT_NAME}-targets) add_subdirectory(pool) -if(UMF_PROXY_LIB_ENABLED - AND NOT UMF_LINK_HWLOC_STATICALLY - AND NOT UMF_DISABLE_HWLOC) +if(UMF_PROXY_LIB_ENABLED) add_subdirectory(proxy_lib) endif() diff --git a/src/base_alloc/base_alloc.c b/src/base_alloc/base_alloc.c index 144f5423b..209ace7fe 100644 --- a/src/base_alloc/base_alloc.c +++ b/src/base_alloc/base_alloc.c @@ -36,7 +36,7 @@ struct umf_ba_chunk_t { struct umf_ba_main_pool_meta_t { size_t pool_size; // size of each pool (argument of each ba_os_alloc() call) size_t chunk_size; // size of all memory chunks in this pool - os_mutex_t free_lock; // lock of free_list + utils_mutex_t free_lock; // lock of free_list umf_ba_chunk_t *free_list; // list of free chunks size_t n_allocs; // number of allocated chunks #ifndef NDEBUG @@ -134,8 +134,11 @@ static void *ba_os_alloc_annotated(size_t pool_size) { } umf_ba_pool_t *umf_ba_create(size_t size) { - size_t chunk_size = ALIGN_UP(size, MEMORY_ALIGNMENT); - size_t mutex_size = ALIGN_UP(util_mutex_get_size(), MEMORY_ALIGNMENT); + size_t chunk_size = ALIGN_UP_SAFE(size, MEMORY_ALIGNMENT); + if (chunk_size == 0) { + return NULL; + } + size_t mutex_size = ALIGN_UP(utils_mutex_get_size(), MEMORY_ALIGNMENT); size_t metadata_size = sizeof(struct umf_ba_main_pool_meta_t); size_t pool_size = sizeof(void *) + metadata_size + mutex_size + @@ -144,7 +147,10 @@ umf_ba_pool_t *umf_ba_create(size_t size) { pool_size = MINIMUM_POOL_SIZE; } - pool_size = ALIGN_UP(pool_size, ba_os_get_page_size()); + pool_size = ALIGN_UP_SAFE(pool_size, ba_os_get_page_size()); + if (pool_size == 0) { + return NULL; + } umf_ba_pool_t *pool = (umf_ba_pool_t *)ba_os_alloc_annotated(pool_size); if (!pool) { @@ -168,10 +174,11 @@ umf_ba_pool_t *umf_ba_create(size_t size) { char *data_ptr = (char *)&pool->data; size_t size_left = pool_size - offsetof(umf_ba_pool_t, data); - util_align_ptr_size((void **)&data_ptr, &size_left, MEMORY_ALIGNMENT); + utils_align_ptr_up_size_down((void **)&data_ptr, &size_left, + MEMORY_ALIGNMENT); // init free_lock - os_mutex_t *mutex = util_mutex_init(&pool->metadata.free_lock); + utils_mutex_t *mutex = utils_mutex_init(&pool->metadata.free_lock); if (!mutex) { ba_os_free(pool, pool_size); return NULL; @@ -184,13 +191,13 @@ umf_ba_pool_t *umf_ba_create(size_t size) { } void *umf_ba_alloc(umf_ba_pool_t *pool) { - util_mutex_lock(&pool->metadata.free_lock); + utils_mutex_lock(&pool->metadata.free_lock); if (pool->metadata.free_list == NULL) { umf_ba_next_pool_t *new_pool = (umf_ba_next_pool_t *)ba_os_alloc_annotated( pool->metadata.pool_size); if (!new_pool) { - util_mutex_unlock(&pool->metadata.free_lock); + utils_mutex_unlock(&pool->metadata.free_lock); return NULL; } @@ -209,7 +216,8 @@ void *umf_ba_alloc(umf_ba_pool_t *pool) { size_t size_left = pool->metadata.pool_size - offsetof(umf_ba_next_pool_t, data); - util_align_ptr_size((void **)&data_ptr, &size_left, MEMORY_ALIGNMENT); + utils_align_ptr_up_size_down((void **)&data_ptr, &size_left, + MEMORY_ALIGNMENT); ba_divide_memory_into_chunks(pool, data_ptr, size_left); } @@ -234,7 +242,7 @@ void *umf_ba_alloc(umf_ba_pool_t *pool) { VALGRIND_DO_MALLOCLIKE_BLOCK(chunk, pool->metadata.chunk_size, 0, 0); utils_annotate_memory_undefined(chunk, pool->metadata.chunk_size); - util_mutex_unlock(&pool->metadata.free_lock); + utils_mutex_unlock(&pool->metadata.free_lock); return chunk; } @@ -269,7 +277,7 @@ void umf_ba_free(umf_ba_pool_t *pool, void *ptr) { umf_ba_chunk_t *chunk = (umf_ba_chunk_t *)ptr; - util_mutex_lock(&pool->metadata.free_lock); + utils_mutex_lock(&pool->metadata.free_lock); assert(pool_contains_pointer(pool, ptr)); chunk->next = pool->metadata.free_list; pool->metadata.free_list = chunk; @@ -281,14 +289,14 @@ void umf_ba_free(umf_ba_pool_t *pool, void *ptr) { VALGRIND_DO_FREELIKE_BLOCK(chunk, 0); utils_annotate_memory_inaccessible(chunk, pool->metadata.chunk_size); - util_mutex_unlock(&pool->metadata.free_lock); + utils_mutex_unlock(&pool->metadata.free_lock); } void umf_ba_destroy(umf_ba_pool_t *pool) { // Do not destroy if we are running in the proxy library, // because it may need those resources till // the very end of exiting the application. - if (pool->metadata.n_allocs && util_is_running_in_proxy_lib()) { + if (pool->metadata.n_allocs && utils_is_running_in_proxy_lib()) { return; } @@ -308,6 +316,6 @@ void umf_ba_destroy(umf_ba_pool_t *pool) { ba_os_free(current_pool, size); } - util_mutex_destroy_not_free(&pool->metadata.free_lock); + utils_mutex_destroy_not_free(&pool->metadata.free_lock); ba_os_free(pool, size); } diff --git a/src/base_alloc/base_alloc_global.c b/src/base_alloc/base_alloc_global.c index b5660d440..2aca5d29c 100644 --- a/src/base_alloc/base_alloc_global.c +++ b/src/base_alloc/base_alloc_global.c @@ -67,6 +67,8 @@ static void umf_ba_create_global(void) { size_t smallestSize = BASE_ALLOC.ac_sizes[0]; BASE_ALLOC.smallest_ac_size_log2 = log2Utils(smallestSize); + + LOG_DEBUG("UMF base allocator created"); } // returns index of the allocation class for a given size @@ -96,8 +98,12 @@ static void *add_metadata_and_align(void *ptr, size_t size, size_t alignment) { if (alignment <= ALLOC_METADATA_SIZE) { user_ptr = (void *)((uintptr_t)ptr + ALLOC_METADATA_SIZE); } else { - user_ptr = - (void *)ALIGN_UP((uintptr_t)ptr + ALLOC_METADATA_SIZE, alignment); + user_ptr = (void *)ALIGN_UP_SAFE((uintptr_t)ptr + ALLOC_METADATA_SIZE, + alignment); + if (!user_ptr) { + LOG_ERR("base_alloc: pointer alignment overflow"); + return NULL; + } } size_t ptr_offset_from_original = (uintptr_t)user_ptr - (uintptr_t)ptr; @@ -149,24 +155,30 @@ static void *get_original_alloc(void *user_ptr, size_t *total_size, } void *umf_ba_global_aligned_alloc(size_t size, size_t alignment) { - util_init_once(&ba_is_initialized, umf_ba_create_global); + utils_init_once(&ba_is_initialized, umf_ba_create_global); if (size == 0) { return NULL; } + if (size > SIZE_MAX - ALLOC_METADATA_SIZE) { + LOG_ERR("base_alloc: allocation size (%zu) too large.", size); + return NULL; + } + // for metadata size += ALLOC_METADATA_SIZE; if (alignment > ALLOC_METADATA_SIZE) { + if (size > SIZE_MAX - alignment) { + LOG_ERR("base_alloc: allocation size (%zu) too large.", size); + return NULL; + } size += alignment; } int ac_index = size_to_idx(size); if (ac_index >= NUM_ALLOCATION_CLASSES) { - LOG_WARN("base_alloc: allocation size (%zu) larger than the biggest " - "allocation class. Falling back to OS memory allocation.", - size); return add_metadata_and_align(ba_os_alloc(size), size, alignment); } diff --git a/src/base_alloc/base_alloc_linear.c b/src/base_alloc/base_alloc_linear.c index be7b0943c..a35a6c243 100644 --- a/src/base_alloc/base_alloc_linear.c +++ b/src/base_alloc/base_alloc_linear.c @@ -31,7 +31,7 @@ typedef struct umf_ba_next_linear_pool_t umf_ba_next_linear_pool_t; // metadata is set and used only in the main (the first) pool typedef struct umf_ba_main_linear_pool_meta_t { size_t pool_size; // size of this pool (argument of ba_os_alloc() call) - os_mutex_t lock; + utils_mutex_t lock; char *data_ptr; size_t size_left; size_t pool_n_allocs; // number of allocations in this pool @@ -88,7 +88,11 @@ umf_ba_linear_pool_t *umf_ba_linear_create(size_t pool_size) { pool_size = MINIMUM_LINEAR_POOL_SIZE; } - pool_size = ALIGN_UP(pool_size, ba_os_get_page_size()); + pool_size = ALIGN_UP_SAFE(pool_size, ba_os_get_page_size()); + if (pool_size == 0) { + LOG_ERR("pool_size page alignment overflow"); + return NULL; + } umf_ba_linear_pool_t *pool = (umf_ba_linear_pool_t *)ba_os_alloc(pool_size); if (!pool) { @@ -98,7 +102,7 @@ umf_ba_linear_pool_t *umf_ba_linear_create(size_t pool_size) { void *data_ptr = &pool->data; size_t size_left = pool_size - offsetof(umf_ba_linear_pool_t, data); - util_align_ptr_size(&data_ptr, &size_left, MEMORY_ALIGNMENT); + utils_align_ptr_up_size_down(&data_ptr, &size_left, MEMORY_ALIGNMENT); pool->metadata.pool_size = pool_size; pool->metadata.data_ptr = data_ptr; @@ -109,7 +113,7 @@ umf_ba_linear_pool_t *umf_ba_linear_create(size_t pool_size) { _DEBUG_EXECUTE(pool->metadata.global_n_allocs = 0); // init lock - os_mutex_t *lock = util_mutex_init(&pool->metadata.lock); + utils_mutex_t *lock = utils_mutex_init(&pool->metadata.lock); if (!lock) { ba_os_free(pool, pool_size); return NULL; @@ -122,15 +126,24 @@ void *umf_ba_linear_alloc(umf_ba_linear_pool_t *pool, size_t size) { if (size == 0) { return NULL; } - size_t aligned_size = ALIGN_UP(size, MEMORY_ALIGNMENT); - util_mutex_lock(&pool->metadata.lock); + size_t aligned_size = ALIGN_UP_SAFE(size, MEMORY_ALIGNMENT); + if (aligned_size == 0) { + LOG_ERR("size alignment overflow"); + return NULL; + } + utils_mutex_lock(&pool->metadata.lock); if (pool->metadata.size_left < aligned_size) { size_t pool_size = MINIMUM_LINEAR_POOL_SIZE; size_t usable_size = pool_size - offsetof(umf_ba_next_linear_pool_t, data); if (usable_size < aligned_size) { pool_size += aligned_size - usable_size; - pool_size = ALIGN_UP(pool_size, ba_os_get_page_size()); + pool_size = ALIGN_UP_SAFE(pool_size, ba_os_get_page_size()); + if (pool_size == 0) { + utils_mutex_unlock(&pool->metadata.lock); + LOG_ERR("pool_size page alignment overflow"); + return NULL; + } } assert(pool_size - offsetof(umf_ba_next_linear_pool_t, data) >= @@ -139,7 +152,7 @@ void *umf_ba_linear_alloc(umf_ba_linear_pool_t *pool, size_t size) { umf_ba_next_linear_pool_t *new_pool = (umf_ba_next_linear_pool_t *)ba_os_alloc(pool_size); if (!new_pool) { - util_mutex_unlock(&pool->metadata.lock); + utils_mutex_unlock(&pool->metadata.lock); return NULL; } @@ -149,7 +162,7 @@ void *umf_ba_linear_alloc(umf_ba_linear_pool_t *pool, size_t size) { void *data_ptr = &new_pool->data; size_t size_left = new_pool->pool_size - offsetof(umf_ba_next_linear_pool_t, data); - util_align_ptr_size(&data_ptr, &size_left, MEMORY_ALIGNMENT); + utils_align_ptr_up_size_down(&data_ptr, &size_left, MEMORY_ALIGNMENT); pool->metadata.data_ptr = data_ptr; pool->metadata.size_left = size_left; @@ -171,7 +184,7 @@ void *umf_ba_linear_alloc(umf_ba_linear_pool_t *pool, size_t size) { } _DEBUG_EXECUTE(pool->metadata.global_n_allocs++); _DEBUG_EXECUTE(ba_debug_checks(pool)); - util_mutex_unlock(&pool->metadata.lock); + utils_mutex_unlock(&pool->metadata.lock); return ptr; } @@ -188,7 +201,7 @@ static inline int pool_contains_ptr(void *pool, size_t pool_size, // 0 - ptr belonged to the pool and was freed // -1 - ptr doesn't belong to the pool and wasn't freed int umf_ba_linear_free(umf_ba_linear_pool_t *pool, void *ptr) { - util_mutex_lock(&pool->metadata.lock); + utils_mutex_lock(&pool->metadata.lock); _DEBUG_EXECUTE(ba_debug_checks(pool)); if (pool_contains_ptr(pool, pool->metadata.pool_size, pool->data, ptr)) { pool->metadata.pool_n_allocs--; @@ -204,7 +217,7 @@ int umf_ba_linear_free(umf_ba_linear_pool_t *pool, void *ptr) { pool->metadata.pool_size = page_size; } _DEBUG_EXECUTE(ba_debug_checks(pool)); - util_mutex_unlock(&pool->metadata.lock); + utils_mutex_unlock(&pool->metadata.lock); return 0; } @@ -227,14 +240,14 @@ int umf_ba_linear_free(umf_ba_linear_pool_t *pool, void *ptr) { ba_os_free(next_pool_ptr, size); } _DEBUG_EXECUTE(ba_debug_checks(pool)); - util_mutex_unlock(&pool->metadata.lock); + utils_mutex_unlock(&pool->metadata.lock); return 0; } prev_pool = next_pool; next_pool = next_pool->next_pool; } - util_mutex_unlock(&pool->metadata.lock); + utils_mutex_unlock(&pool->metadata.lock); // ptr doesn't belong to the pool and wasn't freed return -1; } @@ -243,7 +256,7 @@ void umf_ba_linear_destroy(umf_ba_linear_pool_t *pool) { // Do not destroy if we are running in the proxy library, // because it may need those resources till // the very end of exiting the application. - if (util_is_running_in_proxy_lib()) { + if (utils_is_running_in_proxy_lib()) { return; } @@ -262,7 +275,7 @@ void umf_ba_linear_destroy(umf_ba_linear_pool_t *pool) { ba_os_free(current_pool, current_pool->pool_size); } - util_mutex_destroy_not_free(&pool->metadata.lock); + utils_mutex_destroy_not_free(&pool->metadata.lock); ba_os_free(pool, pool->metadata.pool_size); } @@ -272,12 +285,12 @@ void umf_ba_linear_destroy(umf_ba_linear_pool_t *pool) { // to the end of the pool if ptr belongs to the pool size_t umf_ba_linear_pool_contains_pointer(umf_ba_linear_pool_t *pool, void *ptr) { - util_mutex_lock(&pool->metadata.lock); + utils_mutex_lock(&pool->metadata.lock); char *cptr = (char *)ptr; if (cptr >= pool->data && cptr < ((char *)(pool)) + pool->metadata.pool_size) { size_t size = ((char *)(pool)) + pool->metadata.pool_size - cptr; - util_mutex_unlock(&pool->metadata.lock); + utils_mutex_unlock(&pool->metadata.lock); return size; } @@ -286,12 +299,12 @@ size_t umf_ba_linear_pool_contains_pointer(umf_ba_linear_pool_t *pool, if (cptr >= next_pool->data && cptr < ((char *)(next_pool)) + next_pool->pool_size) { size_t size = ((char *)(next_pool)) + next_pool->pool_size - cptr; - util_mutex_unlock(&pool->metadata.lock); + utils_mutex_unlock(&pool->metadata.lock); return size; } next_pool = next_pool->next_pool; } - util_mutex_unlock(&pool->metadata.lock); + utils_mutex_unlock(&pool->metadata.lock); return 0; } diff --git a/src/base_alloc/base_alloc_linux.c b/src/base_alloc/base_alloc_linux.c index 3e5456b2c..260eec5aa 100644 --- a/src/base_alloc/base_alloc_linux.c +++ b/src/base_alloc/base_alloc_linux.c @@ -37,6 +37,6 @@ void ba_os_free(void *ptr, size_t size) { static void _ba_os_init_page_size(void) { Page_size = sysconf(_SC_PAGE_SIZE); } size_t ba_os_get_page_size(void) { - util_init_once(&Page_size_is_initialized, _ba_os_init_page_size); + utils_init_once(&Page_size_is_initialized, _ba_os_init_page_size); return Page_size; } diff --git a/src/base_alloc/base_alloc_windows.c b/src/base_alloc/base_alloc_windows.c index 6f6c58fbc..2e9da23d9 100644 --- a/src/base_alloc/base_alloc_windows.c +++ b/src/base_alloc/base_alloc_windows.c @@ -28,6 +28,6 @@ static void _ba_os_init_page_size(void) { } size_t ba_os_get_page_size(void) { - util_init_once(&Page_size_is_initialized, _ba_os_init_page_size); + utils_init_once(&Page_size_is_initialized, _ba_os_init_page_size); return Page_size; } diff --git a/src/cpp_helpers.hpp b/src/cpp_helpers.hpp index 86204a20e..6316ccbc7 100644 --- a/src/cpp_helpers.hpp +++ b/src/cpp_helpers.hpp @@ -79,12 +79,12 @@ template umf_memory_pool_ops_t poolOpsBase() { return ops; } -template umf_memory_provider_ops_t providerOpsBase() { +template constexpr umf_memory_provider_ops_t providerOpsBase() { umf_memory_provider_ops_t ops{}; ops.version = UMF_VERSION_CURRENT; ops.finalize = [](void *obj) { delete reinterpret_cast(obj); }; UMF_ASSIGN_OP(ops, T, alloc, UMF_RESULT_ERROR_UNKNOWN); - UMF_ASSIGN_OP(ops, T, free, UMF_RESULT_ERROR_UNKNOWN); + UMF_ASSIGN_OP(ops.ext, T, free, UMF_RESULT_ERROR_UNKNOWN); UMF_ASSIGN_OP_NORETURN(ops, T, get_last_native_error); UMF_ASSIGN_OP(ops, T, get_recommended_page_size, UMF_RESULT_ERROR_UNKNOWN); UMF_ASSIGN_OP(ops, T, get_min_page_size, UMF_RESULT_ERROR_UNKNOWN); @@ -134,7 +134,7 @@ template umf_memory_pool_ops_t poolMakeCOps() { // C API. 'params' from ops.initialize will be casted to 'ParamType*' // and passed to T::initialize() function. template -umf_memory_provider_ops_t providerMakeCOps() { +constexpr umf_memory_provider_ops_t providerMakeCOps() { umf_memory_provider_ops_t ops = detail::providerOpsBase(); ops.initialize = []([[maybe_unused]] void *params, void **obj) { diff --git a/src/critnib/critnib.c b/src/critnib/critnib.c index c8ee202e8..62d14af73 100644 --- a/src/critnib/critnib.c +++ b/src/critnib/critnib.c @@ -130,25 +130,25 @@ struct critnib { uint64_t remove_count; - struct os_mutex_t mutex; /* writes/removes */ + struct utils_mutex_t mutex; /* writes/removes */ }; /* * atomic load */ static void load(void *src, void *dst) { - util_atomic_load_acquire((word *)src, (word *)dst); + utils_atomic_load_acquire((word *)src, (word *)dst); } static void load64(uint64_t *src, uint64_t *dst) { - util_atomic_load_acquire(src, dst); + utils_atomic_load_acquire(src, dst); } /* * atomic store */ static void store(void *dst, void *src) { - util_atomic_store_release((word *)dst, (word)src); + utils_atomic_store_release((word *)dst, (word)src); } /* @@ -187,7 +187,7 @@ struct critnib *critnib_new(void) { memset(c, 0, sizeof(struct critnib)); - void *mutex_ptr = util_mutex_init(&c->mutex); + void *mutex_ptr = utils_mutex_init(&c->mutex); if (!mutex_ptr) { goto err_free_critnib; } @@ -226,7 +226,7 @@ void critnib_delete(struct critnib *c) { delete_node(c, c->root); } - util_mutex_destroy_not_free(&c->mutex); + utils_mutex_destroy_not_free(&c->mutex); for (struct critnib_node *m = c->deleted_node; m;) { struct critnib_node *mm = m->child[0]; @@ -315,7 +315,7 @@ static struct critnib_leaf *alloc_leaf(struct critnib *__restrict c) { } /* - * crinib_insert -- write a key:value pair to the critnib structure + * critnib_insert -- write a key:value pair to the critnib structure * * Returns: * • 0 on success @@ -325,11 +325,11 @@ static struct critnib_leaf *alloc_leaf(struct critnib *__restrict c) { * Takes a global write lock but doesn't stall any readers. */ int critnib_insert(struct critnib *c, word key, void *value, int update) { - util_mutex_lock(&c->mutex); + utils_mutex_lock(&c->mutex); struct critnib_leaf *k = alloc_leaf(c); if (!k) { - util_mutex_unlock(&c->mutex); + utils_mutex_unlock(&c->mutex); return ENOMEM; } @@ -345,7 +345,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { if (!n) { store(&c->root, kn); - util_mutex_unlock(&c->mutex); + utils_mutex_unlock(&c->mutex); return 0; } @@ -363,7 +363,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { n = prev; store(&n->child[slice_index(key, n->shift)], kn); - util_mutex_unlock(&c->mutex); + utils_mutex_unlock(&c->mutex); return 0; } @@ -377,22 +377,22 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { if (update) { to_leaf(n)->value = value; - util_mutex_unlock(&c->mutex); + utils_mutex_unlock(&c->mutex); return 0; } else { - util_mutex_unlock(&c->mutex); + utils_mutex_unlock(&c->mutex); return EEXIST; } } /* and convert that to an index. */ - sh_t sh = util_mssb_index(at) & (sh_t) ~(SLICE - 1); + sh_t sh = utils_mssb_index(at) & (sh_t) ~(SLICE - 1); struct critnib_node *m = alloc_node(c); if (!m) { free_leaf(c, to_leaf(kn)); - util_mutex_unlock(&c->mutex); + utils_mutex_unlock(&c->mutex); return ENOMEM; } @@ -408,7 +408,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { m->path = key & path_mask(sh); store(parent, m); - util_mutex_unlock(&c->mutex); + utils_mutex_unlock(&c->mutex); return 0; } @@ -420,14 +420,14 @@ void *critnib_remove(struct critnib *c, word key) { struct critnib_leaf *k; void *value = NULL; - util_mutex_lock(&c->mutex); + utils_mutex_lock(&c->mutex); struct critnib_node *n = c->root; if (!n) { goto not_found; } - word del = (util_atomic_increment(&c->remove_count) - 1) % DELETED_LIFE; + word del = (utils_atomic_increment(&c->remove_count) - 1) % DELETED_LIFE; free_node(c, c->pending_del_nodes[del]); free_leaf(c, c->pending_del_leaves[del]); c->pending_del_nodes[del] = NULL; @@ -490,7 +490,7 @@ void *critnib_remove(struct critnib *c, word key) { c->pending_del_leaves[del] = k; not_found: - util_mutex_unlock(&c->mutex); + utils_mutex_unlock(&c->mutex); return value; } @@ -813,9 +813,9 @@ static int iter(struct critnib_node *__restrict n, word min, word max, void critnib_iter(critnib *c, uintptr_t min, uintptr_t max, int (*func)(uintptr_t key, void *value, void *privdata), void *privdata) { - util_mutex_lock(&c->mutex); + utils_mutex_lock(&c->mutex); if (c->root) { iter(c->root, min, max, func, privdata); } - util_mutex_unlock(&c->mutex); + utils_mutex_unlock(&c->mutex); } diff --git a/src/ipc.c b/src/ipc.c index b266004f3..1b479fd7c 100644 --- a/src/ipc.c +++ b/src/ipc.c @@ -51,6 +51,11 @@ umf_result_t umfPoolGetIPCHandleSize(umf_memory_pool_handle_t hPool, umf_result_t umfGetIPCHandle(const void *ptr, umf_ipc_handle_t *umfIPCHandle, size_t *size) { + if (ptr == NULL || umfIPCHandle == NULL || size == NULL) { + LOG_ERR("invalid argument."); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + size_t ipcHandleSize = 0; umf_alloc_info_t allocInfo; umf_result_t ret = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); @@ -85,6 +90,8 @@ umf_result_t umfGetIPCHandle(const void *ptr, umf_ipc_handle_t *umfIPCHandle, return ret; } + // ipcData->handle_id is filled by tracking provider + ipcData->base = allocInfo.base; ipcData->pid = utils_getpid(); ipcData->baseSize = allocInfo.baseSize; ipcData->offset = (uintptr_t)ptr - (uintptr_t)allocInfo.base; @@ -102,7 +109,7 @@ umf_result_t umfPutIPCHandle(umf_ipc_handle_t umfIPCHandle) { // implementation does nothing in Put function. Tracking memory // provider relies on IPC cache and actually Put IPC handle back // to upstream memory provider when umfMemoryProviderFree is called. - // To support incapsulation we should not take into account + // To support encapsulation we should not take into account // implementation details of tracking memory provider and find the // appropriate pool, get memory provider of that pool and call // umfMemoryProviderPutIPCHandle(hProvider, @@ -112,12 +119,18 @@ umf_result_t umfPutIPCHandle(umf_ipc_handle_t umfIPCHandle) { return ret; } -umf_result_t umfOpenIPCHandle(umf_memory_pool_handle_t hPool, +umf_result_t umfOpenIPCHandle(umf_ipc_handler_handle_t hIPCHandler, umf_ipc_handle_t umfIPCHandle, void **ptr) { - // We cannot use umfPoolGetMemoryProvider function because it returns - // upstream provider but we need tracking one - umf_memory_provider_handle_t hProvider = hPool->provider; + // IPC handler is an instance of tracking memory provider + if (*(uint32_t *)hIPCHandler != UMF_VERSION_CURRENT) { + // It is a temporary hack to verify that user passes correct IPC handler, + // not a pool handle, as it was required in previous version. + LOG_ERR("Invalid IPC handler."); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_memory_provider_handle_t hProvider = hIPCHandler; void *base = NULL; umf_result_t ret = umfMemoryProviderOpenIPCHandle( @@ -146,3 +159,26 @@ umf_result_t umfCloseIPCHandle(void *ptr) { return umfMemoryProviderCloseIPCHandle(hProvider, allocInfo.base, allocInfo.baseSize); } + +umf_result_t umfPoolGetIPCHandler(umf_memory_pool_handle_t hPool, + umf_ipc_handler_handle_t *hIPCHandler) { + if (hPool == NULL) { + LOG_ERR("Pool handle is NULL."); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (hIPCHandler == NULL) { + LOG_ERR("hIPCHandler is NULL."); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // We cannot use umfPoolGetMemoryProvider function because it returns + // upstream provider but we need tracking one + umf_memory_provider_handle_t hProvider = hPool->provider; + + // We are using tracking provider as an IPC handler because + // it is doing IPC caching. + *hIPCHandler = (umf_ipc_handler_handle_t)hProvider; + + return UMF_RESULT_SUCCESS; +} diff --git a/src/ipc_cache.c b/src/ipc_cache.c new file mode 100644 index 000000000..60072d4df --- /dev/null +++ b/src/ipc_cache.c @@ -0,0 +1,244 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include + +#include "base_alloc_global.h" +#include "ipc_cache.h" +#include "uthash.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" +#include "utlist.h" + +// HASH_ADD macro produces `warning C4702: unreachable code` on MSVC +#ifdef _MSC_VER +#pragma warning(disable : 4702) +#endif + +struct ipc_handle_cache_entry_t; + +typedef struct ipc_handle_cache_entry_t *hash_map_t; +typedef struct ipc_handle_cache_entry_t *lru_list_t; + +typedef struct ipc_handle_cache_entry_t { + UT_hash_handle hh; + struct ipc_handle_cache_entry_t *next, *prev; + ipc_mapped_handle_cache_key_t key; + uint64_t ref_count; + uint64_t handle_id; + hash_map_t + *hash_table; // pointer to the hash table to which the entry belongs + ipc_mapped_handle_cache_value_t value; +} ipc_handle_cache_entry_t; + +typedef struct ipc_mapped_handle_cache_global_t { + utils_mutex_t cache_lock; + umf_ba_pool_t *cache_allocator; + size_t max_size; + size_t cur_size; + lru_list_t lru_list; +} ipc_mapped_handle_cache_global_t; + +typedef struct ipc_mapped_handle_cache_t { + ipc_mapped_handle_cache_global_t *global; + hash_map_t hash_table; + ipc_mapped_handle_cache_eviction_cb_t eviction_cb; +} ipc_mapped_handle_cache_t; + +ipc_mapped_handle_cache_global_t *IPC_MAPPED_CACHE_GLOBAL = NULL; + +umf_result_t umfIpcCacheGlobalInit(void) { + umf_result_t ret = UMF_RESULT_SUCCESS; + ipc_mapped_handle_cache_global_t *cache_global = + umf_ba_global_alloc(sizeof(*cache_global)); + if (!cache_global) { + LOG_ERR("Failed to allocate memory for the IPC cache global data"); + ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + goto err_exit; + } + + if (NULL == utils_mutex_init(&(cache_global->cache_lock))) { + LOG_ERR("Failed to initialize mutex for the IPC global cache"); + ret = UMF_RESULT_ERROR_UNKNOWN; + goto err_cache_global_free; + } + + cache_global->cache_allocator = + umf_ba_create(sizeof(ipc_handle_cache_entry_t)); + if (!cache_global->cache_allocator) { + LOG_ERR("Failed to create IPC cache allocator"); + ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + goto err_mutex_destroy; + } + + // TODO: make max_size configurable via environment variable + cache_global->max_size = 0; + cache_global->cur_size = 0; + cache_global->lru_list = NULL; + + IPC_MAPPED_CACHE_GLOBAL = cache_global; + goto err_exit; + +err_mutex_destroy: + utils_mutex_destroy_not_free(&(cache_global->cache_lock)); +err_cache_global_free: + umf_ba_global_free(cache_global); +err_exit: + return ret; +} + +#ifndef NDEBUG +static size_t getGlobalLruListSize(lru_list_t lru_list) { + size_t size = 0; + ipc_handle_cache_entry_t *tmp; + DL_COUNT(lru_list, tmp, size); + return size; +} +#endif /* NDEBUG */ + +void umfIpcCacheGlobalTearDown(void) { + ipc_mapped_handle_cache_global_t *cache_global = IPC_MAPPED_CACHE_GLOBAL; + IPC_MAPPED_CACHE_GLOBAL = NULL; + + if (!cache_global) { + return; + } + + assert(cache_global->cur_size == 0); + assert(getGlobalLruListSize(cache_global->lru_list) == 0); + + umf_ba_destroy(cache_global->cache_allocator); + utils_mutex_destroy_not_free(&(cache_global->cache_lock)); + umf_ba_global_free(cache_global); +} + +ipc_mapped_handle_cache_handle_t umfIpcHandleMappedCacheCreate( + ipc_mapped_handle_cache_eviction_cb_t eviction_cb) { + if (eviction_cb == NULL) { + LOG_ERR("Eviction callback is NULL"); + return NULL; + } + + ipc_mapped_handle_cache_t *cache = umf_ba_global_alloc(sizeof(*cache)); + + if (!cache) { + LOG_ERR("Failed to allocate memory for the IPC cache"); + return NULL; + } + + assert(IPC_MAPPED_CACHE_GLOBAL != NULL); + + cache->global = IPC_MAPPED_CACHE_GLOBAL; + cache->hash_table = NULL; + cache->eviction_cb = eviction_cb; + + return cache; +} + +void umfIpcHandleMappedCacheDestroy(ipc_mapped_handle_cache_handle_t cache) { + ipc_handle_cache_entry_t *entry, *tmp; + HASH_ITER(hh, cache->hash_table, entry, tmp) { + DL_DELETE(cache->global->lru_list, entry); + HASH_DEL(cache->hash_table, entry); + cache->global->cur_size -= 1; + cache->eviction_cb(&entry->key, &entry->value); + utils_mutex_destroy_not_free(&(entry->value.mmap_lock)); + umf_ba_free(cache->global->cache_allocator, entry); + } + HASH_CLEAR(hh, cache->hash_table); + + umf_ba_global_free(cache); +} + +umf_result_t +umfIpcHandleMappedCacheGet(ipc_mapped_handle_cache_handle_t cache, + const ipc_mapped_handle_cache_key_t *key, + uint64_t handle_id, + ipc_mapped_handle_cache_value_t **retEntry) { + ipc_handle_cache_entry_t *entry = NULL; + umf_result_t ret = UMF_RESULT_SUCCESS; + bool evicted = false; + ipc_mapped_handle_cache_value_t evicted_value; + + if (!cache || !key || !retEntry) { + LOG_ERR("Some arguments are NULL, cache=%p, key=%p, retEntry=%p", + (void *)cache, (const void *)key, (void *)retEntry); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + assert(cache->global != NULL); + + utils_mutex_lock(&(cache->global->cache_lock)); + + HASH_FIND(hh, cache->hash_table, key, sizeof(*key), entry); + if (entry && entry->handle_id == handle_id) { // cache hit + // update frequency list + // remove the entry from the current position + DL_DELETE(cache->global->lru_list, entry); + // add the entry to the head of the list + DL_PREPEND(cache->global->lru_list, entry); + } else { //cache miss + // Look for eviction candidate + if (entry == NULL && cache->global->max_size != 0 && + cache->global->cur_size >= cache->global->max_size) { + // If max_size is set and the cache is full, evict the least recently used entry. + entry = cache->global->lru_list->prev; + } + + if (entry) { // we have eviction candidate + // remove the entry from the frequency list + DL_DELETE(cache->global->lru_list, entry); + // remove the entry from the hash table it belongs to + HASH_DEL(*(entry->hash_table), entry); + cache->global->cur_size -= 1; + evicted_value.mapped_base_ptr = entry->value.mapped_base_ptr; + evicted_value.mapped_size = entry->value.mapped_size; + evicted = true; + } else { // allocate the new entry + entry = umf_ba_alloc(cache->global->cache_allocator); + if (!entry) { + ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + LOG_ERR("Failed to allocate memory for a new IPC cache entry"); + goto exit; + } + if (NULL == utils_mutex_init(&(entry->value.mmap_lock))) { + LOG_ERR("Failed to initialize mutex for the IPC cache entry"); + umf_ba_global_free(entry); + ret = UMF_RESULT_ERROR_UNKNOWN; + goto exit; + } + } + + entry->key = *key; + entry->ref_count = 0; + entry->handle_id = handle_id; + entry->hash_table = &cache->hash_table; + entry->value.mapped_size = 0; + entry->value.mapped_base_ptr = NULL; + + HASH_ADD(hh, cache->hash_table, key, sizeof(entry->key), entry); + DL_PREPEND(cache->global->lru_list, entry); + cache->global->cur_size += 1; + } + +exit: + if (ret == UMF_RESULT_SUCCESS) { + utils_atomic_increment(&entry->ref_count); + *retEntry = &entry->value; + } + + utils_mutex_unlock(&(cache->global->cache_lock)); + + if (evicted) { + cache->eviction_cb(key, &evicted_value); + } + + return ret; +} diff --git a/src/ipc_cache.h b/src/ipc_cache.h new file mode 100644 index 000000000..59ae28787 --- /dev/null +++ b/src/ipc_cache.h @@ -0,0 +1,52 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#ifndef UMF_IPC_CACHE_H +#define UMF_IPC_CACHE_H 1 + +#include + +#include "utils_concurrency.h" + +typedef struct ipc_mapped_handle_cache_key_t { + void *remote_base_ptr; + umf_memory_provider_handle_t local_provider; + int remote_pid; +} ipc_mapped_handle_cache_key_t; + +typedef struct ipc_mapped_handle_cache_value_t { + void *mapped_base_ptr; + size_t mapped_size; + utils_mutex_t mmap_lock; +} ipc_mapped_handle_cache_value_t; + +struct ipc_mapped_handle_cache_t; + +typedef struct ipc_mapped_handle_cache_t *ipc_mapped_handle_cache_handle_t; + +umf_result_t umfIpcCacheGlobalInit(void); +void umfIpcCacheGlobalTearDown(void); + +// define pointer to the eviction callback function +typedef void (*ipc_mapped_handle_cache_eviction_cb_t)( + const ipc_mapped_handle_cache_key_t *key, + const ipc_mapped_handle_cache_value_t *value); + +ipc_mapped_handle_cache_handle_t umfIpcHandleMappedCacheCreate( + ipc_mapped_handle_cache_eviction_cb_t eviction_cb); + +void umfIpcHandleMappedCacheDestroy(ipc_mapped_handle_cache_handle_t cache); + +umf_result_t +umfIpcHandleMappedCacheGet(ipc_mapped_handle_cache_handle_t cache, + const ipc_mapped_handle_cache_key_t *key, + uint64_t handle_id, + ipc_mapped_handle_cache_value_t **retEntry); + +#endif /* UMF_IPC_CACHE_H */ diff --git a/src/ipc_internal.h b/src/ipc_internal.h index 0f45b24e2..103214407 100644 --- a/src/ipc_internal.h +++ b/src/ipc_internal.h @@ -21,8 +21,10 @@ extern "C" { // providerIpcData is a Flexible Array Member because its size varies // depending on the provider. typedef struct umf_ipc_data_t { - int pid; // process ID of the process that allocated the memory - size_t baseSize; // size of base (coarse-grain) allocation + uint64_t handle_id; // unique ID of this handle + void *base; // base address of the memory + int pid; // process ID of the process that allocated the memory + size_t baseSize; // size of base (coarse-grain) allocation uint64_t offset; char providerIpcData[]; } umf_ipc_data_t; diff --git a/src/libumf.c b/src/libumf.c index 1d99ab26a..b89e5c844 100644 --- a/src/libumf.c +++ b/src/libumf.c @@ -10,8 +10,10 @@ #include #include "base_alloc_global.h" +#include "ipc_cache.h" #include "memspace_internal.h" #include "provider_tracking.h" +#include "utils_common.h" #include "utils_log.h" #if !defined(UMF_NO_HWLOC) #include "topology.h" @@ -22,16 +24,34 @@ umf_memory_tracker_handle_t TRACKER = NULL; static unsigned long long umfRefCount = 0; int umfInit(void) { - if (util_fetch_and_add64(&umfRefCount, 1) == 0) { - util_log_init(); + if (utils_fetch_and_add64(&umfRefCount, 1) == 0) { + utils_log_init(); TRACKER = umfMemoryTrackerCreate(); + if (!TRACKER) { + LOG_ERR("Failed to create memory tracker"); + return -1; + } + + LOG_DEBUG("UMF tracker created"); + + umf_result_t umf_result = umfIpcCacheGlobalInit(); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("Failed to initialize IPC cache"); + return -1; + } + + LOG_DEBUG("UMF IPC cache initialized"); + } + + if (TRACKER) { + LOG_DEBUG("UMF library initialized"); } - return (TRACKER) ? 0 : -1; + return 0; } void umfTearDown(void) { - if (util_fetch_and_add64(&umfRefCount, -1) == 1) { + if (utils_fetch_and_add64(&umfRefCount, -1) == 1) { #if !defined(_WIN32) && !defined(UMF_NO_HWLOC) umfMemspaceHostAllDestroy(); umfMemspaceHighestCapacityDestroy(); @@ -39,12 +59,27 @@ void umfTearDown(void) { umfMemspaceLowestLatencyDestroy(); umfDestroyTopology(); #endif + umfIpcCacheGlobalTearDown(); + + if (utils_is_running_in_proxy_lib_with_size_threshold()) { + // We cannot destroy the TRACKER nor the base allocator + // when we are running in the proxy library with a size threshold, + // because it could result in calling the system free() + // with an invalid pointer and a segfault. + goto fini_umfTearDown; + } + // make sure TRACKER is not used after being destroyed umf_memory_tracker_handle_t t = TRACKER; TRACKER = NULL; umfMemoryTrackerDestroy(t); + LOG_DEBUG("UMF tracker destroyed"); umf_ba_destroy_global(); + LOG_DEBUG("UMF base allocator destroyed"); + + fini_umfTearDown: + LOG_DEBUG("UMF library finalized"); } } diff --git a/src/libumf.def b/src/libumf.def new file mode 100644 index 000000000..33c09f4b9 --- /dev/null +++ b/src/libumf.def @@ -0,0 +1,118 @@ +;;;; Begin Copyright Notice +; Copyright (C) 2024 Intel Corporation +; Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +;;;; End Copyright Notice + +LIBRARY umf + +VERSION 1.0 + +EXPORTS + DllMain + umfInit + umfTearDown + umfGetCurrentVersion + umfCloseIPCHandle + umfCoarseMemoryProviderGetStats + umfCoarseMemoryProviderOps + umfCUDAMemoryProviderOps + umfCUDAMemoryProviderParamsCreate + umfCUDAMemoryProviderParamsDestroy + umfCUDAMemoryProviderParamsSetContext + umfCUDAMemoryProviderParamsSetDevice + umfCUDAMemoryProviderParamsSetMemoryType + umfDevDaxMemoryProviderOps + umfDevDaxMemoryProviderParamsCreate + umfDevDaxMemoryProviderParamsDestroy + umfDevDaxMemoryProviderParamsSetDeviceDax + umfDevDaxMemoryProviderParamsSetProtection + umfFree + umfFileMemoryProviderOps + umfFileMemoryProviderParamsCreate + umfFileMemoryProviderParamsDestroy + umfFileMemoryProviderParamsSetPath + umfFileMemoryProviderParamsSetProtection + umfFileMemoryProviderParamsSetVisibility + umfGetIPCHandle + umfGetLastFailedMemoryProvider + umfLevelZeroMemoryProviderOps + umfLevelZeroMemoryProviderParamsCreate + umfLevelZeroMemoryProviderParamsDestroy + umfLevelZeroMemoryProviderParamsSetContext + umfLevelZeroMemoryProviderParamsSetDevice + umfLevelZeroMemoryProviderParamsSetMemoryType + umfLevelZeroMemoryProviderParamsSetResidentDevices + umfMemoryProviderAlloc + umfMemoryProviderAllocationMerge + umfMemoryProviderAllocationSplit + umfMemoryProviderCloseIPCHandle + umfMemoryProviderCreate + umfMemoryProviderCreateFromMemspace + umfMemoryProviderDestroy + umfMemoryProviderFree + umfMemoryProviderGetIPCHandle + umfMemoryProviderGetIPCHandleSize + umfMemoryProviderGetLastNativeError + umfMemoryProviderGetMinPageSize + umfMemoryProviderGetName + umfMemoryProviderGetRecommendedPageSize + umfMemoryProviderOpenIPCHandle + umfMemoryProviderPurgeForce + umfMemoryProviderPurgeLazy + umfMemoryProviderPutIPCHandle + umfMemoryTrackerGetAllocInfo + umfMempolicyCreate + umfMempolicyDestroy + umfMempolicySetCustomSplitPartitions + umfMempolicySetInterleavePartSize + umfMemspaceClone + umfMemspaceCreateFromNumaArray + umfMemspaceDestroy + umfMemspaceFilterByCapacity + umfMemspaceFilterById + umfMemspaceHighestBandwidthGet + umfMemspaceHighestCapacityGet + umfMemspaceHostAllGet + umfMemspaceLowestLatencyGet + umfMemspaceMemtargetAdd + umfMemspaceMemtargetGet + umfMemspaceMemtargetNum + umfMemspaceMemtargetRemove + umfMemspaceNew + umfMemspaceUserFilter + umfMemtargetGetCapacity + umfMemtargetGetId + umfMemtargetGetType + umfOpenIPCHandle + umfOsMemoryProviderOps + umfOsMemoryProviderParamsCreate + umfOsMemoryProviderParamsDestroy + umfOsMemoryProviderParamsSetProtection + umfOsMemoryProviderParamsSetVisibility + umfOsMemoryProviderParamsSetShmName + umfOsMemoryProviderParamsSetNumaList + umfOsMemoryProviderParamsSetNumaMode + umfOsMemoryProviderParamsSetPartSize + umfOsMemoryProviderParamsSetPartitions + umfPoolAlignedMalloc + umfPoolByPtr + umfPoolCalloc + umfPoolCreate + umfPoolCreateFromMemspace + umfPoolDestroy + umfPoolFree + umfPoolGetIPCHandler + umfPoolGetIPCHandleSize + umfPoolGetLastAllocationError + umfPoolGetMemoryProvider + umfPoolMalloc + umfPoolMallocUsableSize + umfPoolRealloc + umfProxyPoolOps + umfPutIPCHandle + umfScalablePoolOps + umfScalablePoolParamsCreate + umfScalablePoolParamsDestroy + umfScalablePoolParamsSetGranularity + umfScalablePoolParamsSetKeepAllMemory diff --git a/src/libumf.def.in b/src/libumf.def.in deleted file mode 100644 index aa78d0953..000000000 --- a/src/libumf.def.in +++ /dev/null @@ -1,61 +0,0 @@ -;;;; Begin Copyright Notice -; Copyright (C) 2024 Intel Corporation -; Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. -; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -;;;; End Copyright Notice - -LIBRARY umf - -VERSION 1.0 - -EXPORTS - DllMain - umfInit - umfTearDown - umfGetCurrentVersion - umfCloseIPCHandle - umfFree - umfGetIPCHandle - umfGetLastFailedMemoryProvider - umfMemoryTrackerGetAllocInfo - umfMemoryProviderAlloc - umfMemoryProviderAllocationMerge - umfMemoryProviderAllocationSplit - umfMemoryProviderCloseIPCHandle - umfMemoryProviderCreate - umfMemoryProviderCreateFromMemspace - umfMemoryProviderDestroy - umfMemoryProviderFree - umfMemoryProviderGetIPCHandle - umfMemoryProviderGetIPCHandleSize - umfMemoryProviderGetLastNativeError - umfMemoryProviderGetMinPageSize - umfMemoryProviderGetName - umfMemoryProviderGetRecommendedPageSize - umfMemoryProviderOpenIPCHandle - umfMemoryProviderPurgeForce - umfMemoryProviderPurgeLazy - umfMemoryProviderPutIPCHandle - umfMempolicyCreate - umfMempolicyDestroy - umfMempolicySetCustomSplitPartitions - umfMempolicySetInterleavePartSize - umfMemspaceDestroy - umfOpenIPCHandle - umfPoolAlignedMalloc - umfPoolByPtr - umfPoolCalloc - umfPoolCreate - umfPoolCreateFromMemspace - umfPoolDestroy - umfPoolFree - umfPoolGetIPCHandleSize - umfPoolGetLastAllocationError - umfPoolGetMemoryProvider - umfPoolMalloc - umfPoolMallocUsableSize - umfPoolRealloc - umfProxyPoolOps - umfPutIPCHandle - umfScalablePoolOps - @UMF_OPTIONAL_SYMBOLS_WINDOWS@ diff --git a/src/libumf.map b/src/libumf.map new file mode 100644 index 000000000..c1e1fd62c --- /dev/null +++ b/src/libumf.map @@ -0,0 +1,115 @@ +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +UMF_1.0 { + global: + umfInit; + umfTearDown; + umfGetCurrentVersion; + umfCloseIPCHandle; + umfCoarseMemoryProviderGetStats; + umfCoarseMemoryProviderOps; + umfCUDAMemoryProviderOps; + umfCUDAMemoryProviderParamsCreate; + umfCUDAMemoryProviderParamsDestroy; + umfCUDAMemoryProviderParamsSetContext; + umfCUDAMemoryProviderParamsSetDevice; + umfCUDAMemoryProviderParamsSetMemoryType; + umfDevDaxMemoryProviderOps; + umfDevDaxMemoryProviderParamsCreate; + umfDevDaxMemoryProviderParamsDestroy; + umfDevDaxMemoryProviderParamsSetDeviceDax; + umfDevDaxMemoryProviderParamsSetProtection; + umfFree; + umfFileMemoryProviderOps; + umfFileMemoryProviderParamsCreate; + umfFileMemoryProviderParamsDestroy; + umfFileMemoryProviderParamsSetPath; + umfFileMemoryProviderParamsSetProtection; + umfFileMemoryProviderParamsSetVisibility; + umfGetIPCHandle; + umfGetLastFailedMemoryProvider; + umfLevelZeroMemoryProviderOps; + umfLevelZeroMemoryProviderParamsCreate; + umfLevelZeroMemoryProviderParamsDestroy; + umfLevelZeroMemoryProviderParamsSetContext; + umfLevelZeroMemoryProviderParamsSetDevice; + umfLevelZeroMemoryProviderParamsSetMemoryType; + umfLevelZeroMemoryProviderParamsSetResidentDevices; + umfMemoryProviderAlloc; + umfMemoryProviderAllocationMerge; + umfMemoryProviderAllocationSplit; + umfMemoryProviderCloseIPCHandle; + umfMemoryProviderCreate; + umfMemoryProviderCreateFromMemspace; + umfMemoryProviderDestroy; + umfMemoryProviderFree; + umfMemoryProviderGetIPCHandle; + umfMemoryProviderGetIPCHandleSize; + umfMemoryProviderGetLastNativeError; + umfMemoryProviderGetMinPageSize; + umfMemoryProviderGetName; + umfMemoryProviderGetRecommendedPageSize; + umfMemoryProviderOpenIPCHandle; + umfMemoryProviderPurgeForce; + umfMemoryProviderPurgeLazy; + umfMemoryProviderPutIPCHandle; + umfMemoryTrackerGetAllocInfo; + umfMempolicyCreate; + umfMempolicyDestroy; + umfMempolicySetCustomSplitPartitions; + umfMempolicySetInterleavePartSize; + umfMemspaceClone; + umfMemspaceCreateFromNumaArray; + umfMemspaceDestroy; + umfMemspaceFilterByCapacity; + umfMemspaceFilterById; + umfMemspaceHighestBandwidthGet; + umfMemspaceHighestCapacityGet; + umfMemspaceHostAllGet; + umfMemspaceLowestLatencyGet; + umfMemspaceMemtargetAdd; + umfMemspaceMemtargetGet; + umfMemspaceMemtargetNum; + umfMemspaceMemtargetRemove; + umfMemspaceNew; + umfMemspaceUserFilter; + umfMemtargetGetCapacity; + umfMemtargetGetId; + umfMemtargetGetType; + umfOpenIPCHandle; + umfOsMemoryProviderOps; + umfOsMemoryProviderParamsCreate; + umfOsMemoryProviderParamsDestroy; + umfOsMemoryProviderParamsSetProtection; + umfOsMemoryProviderParamsSetVisibility; + umfOsMemoryProviderParamsSetShmName; + umfOsMemoryProviderParamsSetNumaList; + umfOsMemoryProviderParamsSetNumaMode; + umfOsMemoryProviderParamsSetPartSize; + umfOsMemoryProviderParamsSetPartitions; + umfPoolAlignedMalloc; + umfPoolByPtr; + umfPoolCalloc; + umfPoolCreate; + umfPoolCreateFromMemspace; + umfPoolDestroy; + umfPoolFree; + umfPoolGetIPCHandler; + umfPoolGetIPCHandleSize; + umfPoolGetLastAllocationError; + umfPoolGetMemoryProvider; + umfPoolMalloc; + umfPoolMallocUsableSize; + umfPoolRealloc; + umfProxyPoolOps; + umfPutIPCHandle; + umfScalablePoolOps; + umfScalablePoolParamsCreate; + umfScalablePoolParamsDestroy; + umfScalablePoolParamsSetGranularity; + umfScalablePoolParamsSetKeepAllMemory; + local: + *; +}; diff --git a/src/libumf.map.in b/src/libumf.map.in deleted file mode 100644 index 20031f16e..000000000 --- a/src/libumf.map.in +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -UMF_1.0 { - global: - umfInit; - umfTearDown; - umfGetCurrentVersion; - umfCloseIPCHandle; - umfFree; - umfGetIPCHandle; - umfGetLastFailedMemoryProvider; - umfMemoryTrackerGetAllocInfo; - umfMemoryProviderAlloc; - umfMemoryProviderAllocationMerge; - umfMemoryProviderAllocationSplit; - umfMemoryProviderCloseIPCHandle; - umfMemoryProviderCreate; - umfMemoryProviderCreateFromMemspace; - umfMemoryProviderDestroy; - umfMemoryProviderFree; - umfMemoryProviderGetIPCHandle; - umfMemoryProviderGetIPCHandleSize; - umfMemoryProviderGetLastNativeError; - umfMemoryProviderGetMinPageSize; - umfMemoryProviderGetName; - umfMemoryProviderGetRecommendedPageSize; - umfMemoryProviderOpenIPCHandle; - umfMemoryProviderPurgeForce; - umfMemoryProviderPurgeLazy; - umfMemoryProviderPutIPCHandle; - umfMempolicyCreate; - umfMempolicyDestroy; - umfMempolicySetCustomSplitPartitions; - umfMempolicySetInterleavePartSize; - umfMemspaceDestroy; - umfOpenIPCHandle; - umfPoolAlignedMalloc; - umfPoolByPtr; - umfPoolCalloc; - umfPoolCreate; - umfPoolCreateFromMemspace; - umfPoolDestroy; - umfPoolFree; - umfPoolGetIPCHandleSize; - umfPoolGetLastAllocationError; - umfPoolGetMemoryProvider; - umfPoolMalloc; - umfPoolMallocUsableSize; - umfPoolRealloc; - umfProxyPoolOps; - umfPutIPCHandle; - umfScalablePoolOps; - @UMF_OPTIONAL_SYMBOLS_LINUX@ - local: - *; -}; diff --git a/src/memory_pool.c b/src/memory_pool.c index 7d65acf36..4a85955ef 100644 --- a/src/memory_pool.c +++ b/src/memory_pool.c @@ -41,8 +41,11 @@ static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, assert(ops->version == UMF_VERSION_CURRENT); if (!(flags & UMF_POOL_CREATE_FLAG_DISABLE_TRACKING)) { - // wrap provider with memory tracking provider - ret = umfTrackingMemoryProviderCreate(provider, pool, &pool->provider); + // Wrap provider with memory tracking provider. + // Check if the provider supports the free() operation. + bool upstreamDoesNotFree = umfIsFreeOpDefault(provider); + ret = umfTrackingMemoryProviderCreate(provider, pool, &pool->provider, + upstreamDoesNotFree); if (ret != UMF_RESULT_SUCCESS) { goto err_provider_create; } @@ -73,18 +76,20 @@ static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, void umfPoolDestroy(umf_memory_pool_handle_t hPool) { hPool->ops.finalize(hPool->pool_priv); - if (hPool->flags & UMF_POOL_CREATE_FLAG_OWN_PROVIDER) { - // Destroy associated memory provider. - umf_memory_provider_handle_t hProvider = NULL; - umfPoolGetMemoryProvider(hPool, &hProvider); - umfMemoryProviderDestroy(hProvider); - } + + umf_memory_provider_handle_t hUpstreamProvider = NULL; + umfPoolGetMemoryProvider(hPool, &hUpstreamProvider); if (!(hPool->flags & UMF_POOL_CREATE_FLAG_DISABLE_TRACKING)) { // Destroy tracking provider. umfMemoryProviderDestroy(hPool->provider); } + if (hPool->flags & UMF_POOL_CREATE_FLAG_OWN_PROVIDER) { + // Destroy associated memory provider. + umfMemoryProviderDestroy(hUpstreamProvider); + } + LOG_INFO("Memory pool destroyed: %p", (void *)hPool); // TODO: this free keeps memory in base allocator, so it can lead to OOM in some scenarios (it should be optimized) diff --git a/src/memory_provider.c b/src/memory_provider.c index f6e07af62..883f1be26 100644 --- a/src/memory_provider.c +++ b/src/memory_provider.c @@ -25,6 +25,13 @@ typedef struct umf_memory_provider_t { void *provider_priv; } umf_memory_provider_t; +static umf_result_t umfDefaultFree(void *provider, void *ptr, size_t size) { + (void)provider; + (void)ptr; + (void)size; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + static umf_result_t umfDefaultPurgeLazy(void *provider, void *ptr, size_t size) { (void)provider; @@ -99,6 +106,9 @@ static umf_result_t umfDefaultCloseIPCHandle(void *provider, void *ptr, } void assignOpsExtDefaults(umf_memory_provider_ops_t *ops) { + if (!ops->ext.free) { + ops->ext.free = umfDefaultFree; + } if (!ops->ext.purge_lazy) { ops->ext.purge_lazy = umfDefaultPurgeLazy; } @@ -133,7 +143,7 @@ void assignOpsIpcDefaults(umf_memory_provider_ops_t *ops) { static bool validateOpsMandatory(const umf_memory_provider_ops_t *ops) { // Mandatory ops should be non-NULL - return ops->alloc && ops->free && ops->get_recommended_page_size && + return ops->alloc && ops->get_recommended_page_size && ops->get_min_page_size && ops->initialize && ops->finalize && ops->get_last_native_error && ops->get_name; } @@ -159,6 +169,10 @@ static bool validateOps(const umf_memory_provider_ops_t *ops) { validateOpsIpc(&(ops->ipc)); } +bool umfIsFreeOpDefault(umf_memory_provider_handle_t hProvider) { + return (hProvider->ops.ext.free == umfDefaultFree); +} + umf_result_t umfMemoryProviderCreate(const umf_memory_provider_ops_t *ops, void *params, umf_memory_provider_handle_t *hProvider) { @@ -195,8 +209,10 @@ umf_result_t umfMemoryProviderCreate(const umf_memory_provider_ops_t *ops, } void umfMemoryProviderDestroy(umf_memory_provider_handle_t hProvider) { - hProvider->ops.finalize(hProvider->provider_priv); - umf_ba_global_free(hProvider); + if (hProvider) { + hProvider->ops.finalize(hProvider->provider_priv); + umf_ba_global_free(hProvider); + } } static void @@ -210,6 +226,7 @@ checkErrorAndSetLastProvider(umf_result_t result, umf_result_t umfMemoryProviderAlloc(umf_memory_provider_handle_t hProvider, size_t size, size_t alignment, void **ptr) { UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((ptr != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); umf_result_t res = hProvider->ops.alloc(hProvider->provider_priv, size, alignment, ptr); checkErrorAndSetLastProvider(res, hProvider); @@ -219,7 +236,8 @@ umf_result_t umfMemoryProviderAlloc(umf_memory_provider_handle_t hProvider, umf_result_t umfMemoryProviderFree(umf_memory_provider_handle_t hProvider, void *ptr, size_t size) { UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); - umf_result_t res = hProvider->ops.free(hProvider->provider_priv, ptr, size); + umf_result_t res = + hProvider->ops.ext.free(hProvider->provider_priv, ptr, size); checkErrorAndSetLastProvider(res, hProvider); return res; } @@ -241,6 +259,7 @@ umf_result_t umfMemoryProviderGetRecommendedPageSize(umf_memory_provider_handle_t hProvider, size_t size, size_t *pageSize) { UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((pageSize != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); umf_result_t res = hProvider->ops.get_recommended_page_size( hProvider->provider_priv, size, pageSize); checkErrorAndSetLastProvider(res, hProvider); @@ -251,6 +270,7 @@ umf_result_t umfMemoryProviderGetMinPageSize(umf_memory_provider_handle_t hProvider, void *ptr, size_t *pageSize) { UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((pageSize != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); umf_result_t res = hProvider->ops.get_min_page_size( hProvider->provider_priv, ptr, pageSize); checkErrorAndSetLastProvider(res, hProvider); @@ -265,6 +285,7 @@ const char *umfMemoryProviderGetName(umf_memory_provider_handle_t hProvider) { umf_result_t umfMemoryProviderPurgeLazy(umf_memory_provider_handle_t hProvider, void *ptr, size_t size) { UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((ptr != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); umf_result_t res = hProvider->ops.ext.purge_lazy(hProvider->provider_priv, ptr, size); checkErrorAndSetLastProvider(res, hProvider); @@ -274,6 +295,7 @@ umf_result_t umfMemoryProviderPurgeLazy(umf_memory_provider_handle_t hProvider, umf_result_t umfMemoryProviderPurgeForce(umf_memory_provider_handle_t hProvider, void *ptr, size_t size) { UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((ptr != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); umf_result_t res = hProvider->ops.ext.purge_force(hProvider->provider_priv, ptr, size); checkErrorAndSetLastProvider(res, hProvider); @@ -288,15 +310,11 @@ umf_result_t umfMemoryProviderAllocationSplit(umf_memory_provider_handle_t hProvider, void *ptr, size_t totalSize, size_t firstSize) { - if (!ptr) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - if (firstSize == 0 || totalSize == 0) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - if (firstSize >= totalSize) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } + UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((ptr != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((firstSize != 0 && totalSize != 0), + UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((firstSize < totalSize), UMF_RESULT_ERROR_INVALID_ARGUMENT); umf_result_t res = hProvider->ops.ext.allocation_split( hProvider->provider_priv, ptr, totalSize, firstSize); @@ -308,18 +326,13 @@ umf_result_t umfMemoryProviderAllocationMerge(umf_memory_provider_handle_t hProvider, void *lowPtr, void *highPtr, size_t totalSize) { - if (!lowPtr || !highPtr) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - if (totalSize == 0) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - if ((uintptr_t)lowPtr >= (uintptr_t)highPtr) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - if ((uintptr_t)highPtr - (uintptr_t)lowPtr > totalSize) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } + UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((lowPtr && highPtr), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((totalSize != 0), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK(((uintptr_t)lowPtr < (uintptr_t)highPtr), + UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK(((uintptr_t)highPtr - (uintptr_t)lowPtr < totalSize), + UMF_RESULT_ERROR_INVALID_ARGUMENT); umf_result_t res = hProvider->ops.ext.allocation_merge( hProvider->provider_priv, lowPtr, highPtr, totalSize); @@ -330,6 +343,8 @@ umfMemoryProviderAllocationMerge(umf_memory_provider_handle_t hProvider, umf_result_t umfMemoryProviderGetIPCHandleSize(umf_memory_provider_handle_t hProvider, size_t *size) { + UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((size != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); return hProvider->ops.ipc.get_ipc_handle_size(hProvider->provider_priv, size); } @@ -338,6 +353,9 @@ umf_result_t umfMemoryProviderGetIPCHandle(umf_memory_provider_handle_t hProvider, const void *ptr, size_t size, void *providerIpcData) { + UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((ptr != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((providerIpcData != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); return hProvider->ops.ipc.get_ipc_handle(hProvider->provider_priv, ptr, size, providerIpcData); } @@ -345,6 +363,8 @@ umfMemoryProviderGetIPCHandle(umf_memory_provider_handle_t hProvider, umf_result_t umfMemoryProviderPutIPCHandle(umf_memory_provider_handle_t hProvider, void *providerIpcData) { + UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((providerIpcData != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); return hProvider->ops.ipc.put_ipc_handle(hProvider->provider_priv, providerIpcData); } @@ -352,6 +372,9 @@ umfMemoryProviderPutIPCHandle(umf_memory_provider_handle_t hProvider, umf_result_t umfMemoryProviderOpenIPCHandle(umf_memory_provider_handle_t hProvider, void *providerIpcData, void **ptr) { + UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((providerIpcData != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((ptr != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); return hProvider->ops.ipc.open_ipc_handle(hProvider->provider_priv, providerIpcData, ptr); } @@ -359,6 +382,8 @@ umfMemoryProviderOpenIPCHandle(umf_memory_provider_handle_t hProvider, umf_result_t umfMemoryProviderCloseIPCHandle(umf_memory_provider_handle_t hProvider, void *ptr, size_t size) { + UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((ptr != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); return hProvider->ops.ipc.close_ipc_handle(hProvider->provider_priv, ptr, size); } diff --git a/src/memory_provider_internal.h b/src/memory_provider_internal.h index 4e858992d..49b2f2e53 100644 --- a/src/memory_provider_internal.h +++ b/src/memory_provider_internal.h @@ -10,6 +10,8 @@ #ifndef UMF_MEMORY_PROVIDER_INTERNAL_H #define UMF_MEMORY_PROVIDER_INTERNAL_H 1 +#include + #include #ifdef __cplusplus @@ -18,6 +20,7 @@ extern "C" { void *umfMemoryProviderGetPriv(umf_memory_provider_handle_t hProvider); umf_memory_provider_handle_t *umfGetLastFailedMemoryProviderPtr(void); +bool umfIsFreeOpDefault(umf_memory_provider_handle_t hProvider); #ifdef __cplusplus } diff --git a/src/memory_target.c b/src/memory_target.c deleted file mode 100644 index 3cbdb09d9..000000000 --- a/src/memory_target.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * - * Copyright (C) 2023-2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - */ - -#include -#include - -#include "base_alloc_global.h" -#include "libumf.h" -#include "memory_target.h" -#include "memory_target_ops.h" -#include "utils_concurrency.h" - -umf_result_t umfMemoryTargetCreate(const umf_memory_target_ops_t *ops, - void *params, - umf_memory_target_handle_t *memoryTarget) { - libumfInit(); - if (!ops || !memoryTarget) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - umf_memory_target_handle_t target = - umf_ba_global_alloc(sizeof(umf_memory_target_t)); - if (!target) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - assert(ops->version == UMF_VERSION_CURRENT); - - target->ops = ops; - - void *target_priv; - umf_result_t ret = ops->initialize(params, &target_priv); - if (ret != UMF_RESULT_SUCCESS) { - umf_ba_global_free(target); - return ret; - } - - target->priv = target_priv; - - *memoryTarget = target; - - return UMF_RESULT_SUCCESS; -} - -void umfMemoryTargetDestroy(umf_memory_target_handle_t memoryTarget) { - assert(memoryTarget); - memoryTarget->ops->finalize(memoryTarget->priv); - umf_ba_global_free(memoryTarget); -} - -umf_result_t umfMemoryTargetClone(umf_memory_target_handle_t memoryTarget, - umf_memory_target_handle_t *outHandle) { - assert(memoryTarget); - assert(outHandle); - - *outHandle = umf_ba_global_alloc(sizeof(umf_memory_target_t)); - if (!*outHandle) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - void *outPriv; - umf_result_t ret = memoryTarget->ops->clone(memoryTarget->priv, &outPriv); - if (ret != UMF_RESULT_SUCCESS) { - umf_ba_global_free(*outHandle); - return ret; - } - - (*outHandle)->ops = memoryTarget->ops; - (*outHandle)->priv = outPriv; - - return UMF_RESULT_SUCCESS; -} - -umf_result_t umfMemoryTargetGetCapacity(umf_memory_target_handle_t memoryTarget, - size_t *capacity) { - if (!memoryTarget || !capacity) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - return memoryTarget->ops->get_capacity(memoryTarget->priv, capacity); -} - -umf_result_t -umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget, - umf_memory_target_handle_t dstMemoryTarget, - size_t *bandwidth) { - if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - return srcMemoryTarget->ops->get_bandwidth( - srcMemoryTarget->priv, dstMemoryTarget->priv, bandwidth); -} - -umf_result_t -umfMemoryTargetGetLatency(umf_memory_target_handle_t srcMemoryTarget, - umf_memory_target_handle_t dstMemoryTarget, - size_t *latency) { - if (!srcMemoryTarget || !dstMemoryTarget || !latency) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - return srcMemoryTarget->ops->get_latency(srcMemoryTarget->priv, - dstMemoryTarget->priv, latency); -} diff --git a/src/memory_target.h b/src/memory_target.h deleted file mode 100644 index c522cce24..000000000 --- a/src/memory_target.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * - * Copyright (C) 2023-2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - */ - -#ifndef UMF_MEMORY_TARGET_H -#define UMF_MEMORY_TARGET_H 1 - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct umf_memory_target_ops_t; -typedef struct umf_memory_target_ops_t umf_memory_target_ops_t; - -typedef struct umf_memory_target_t { - const umf_memory_target_ops_t *ops; - void *priv; -} umf_memory_target_t; - -typedef umf_memory_target_t *umf_memory_target_handle_t; - -umf_result_t umfMemoryTargetCreate(const umf_memory_target_ops_t *ops, - void *params, - umf_memory_target_handle_t *memoryTarget); -void umfMemoryTargetDestroy(umf_memory_target_handle_t memoryTarget); - -umf_result_t umfMemoryTargetClone(umf_memory_target_handle_t memoryTarget, - umf_memory_target_handle_t *outHandle); -umf_result_t umfMemoryTargetGetCapacity(umf_memory_target_handle_t memoryTarget, - size_t *capacity); -umf_result_t -umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget, - umf_memory_target_handle_t dstMemoryTarget, - size_t *bandwidth); -umf_result_t -umfMemoryTargetGetLatency(umf_memory_target_handle_t srcMemoryTarget, - umf_memory_target_handle_t dstMemoryTarget, - size_t *latency); - -#ifdef __cplusplus -} -#endif - -#endif /* UMF_MEMORY_TARGET_H */ diff --git a/src/memspace.c b/src/memspace.c index 03e716c84..1cd80e1fa 100644 --- a/src/memspace.c +++ b/src/memspace.c @@ -13,9 +13,10 @@ #include #include "base_alloc_global.h" -#include "memory_target.h" -#include "memory_target_ops.h" #include "memspace_internal.h" +#include "memtarget_internal.h" +#include "memtarget_ops.h" +#include "utils_log.h" #ifndef NDEBUG static umf_result_t @@ -25,7 +26,7 @@ verifyMemTargetsTypes(umf_const_memspace_handle_t memspace) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - const struct umf_memory_target_ops_t *ops = memspace->nodes[0]->ops; + const struct umf_memtarget_ops_t *ops = memspace->nodes[0]->ops; for (size_t i = 1; i < memspace->size; i++) { if (memspace->nodes[i]->ops != ops) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; @@ -60,13 +61,17 @@ umf_result_t umfPoolCreateFromMemspace(umf_const_memspace_handle_t memspace, return UMF_RESULT_ERROR_INVALID_ARGUMENT; } + if (memspace->size == 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + void **privs = NULL; umf_result_t ret = memoryTargetHandlesToPriv(memspace, &privs); if (ret != UMF_RESULT_SUCCESS) { return ret; } - // TODO: for now, we only support memspaces that consist of memory_targets + // TODO: for now, we only support memspaces that consist of memtargets // of the same type. Fix this. assert(verifyMemTargetsTypes(memspace) == UMF_RESULT_SUCCESS); ret = memspace->nodes[0]->ops->pool_create_from_memspace( @@ -85,13 +90,17 @@ umfMemoryProviderCreateFromMemspace(umf_const_memspace_handle_t memspace, return UMF_RESULT_ERROR_INVALID_ARGUMENT; } + if (memspace->size == 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + void **privs = NULL; umf_result_t ret = memoryTargetHandlesToPriv(memspace, &privs); if (ret != UMF_RESULT_SUCCESS) { return ret; } - // TODO: for now, we only support memspaces that consist of memory_targets + // TODO: for now, we only support memspaces that consist of memtargets // of the same type. Fix this. assert(verifyMemTargetsTypes(memspace) == UMF_RESULT_SUCCESS); ret = memspace->nodes[0]->ops->memory_provider_create_from_memspace( @@ -102,10 +111,29 @@ umfMemoryProviderCreateFromMemspace(umf_const_memspace_handle_t memspace, return ret; } +umf_result_t umfMemspaceNew(umf_memspace_handle_t *hMemspace) { + if (!hMemspace) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_memspace_handle_t memspace = + umf_ba_global_alloc(sizeof(struct umf_memspace_t)); + if (!memspace) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + memspace->size = 0; + memspace->nodes = NULL; + + *hMemspace = memspace; + + return UMF_RESULT_SUCCESS; +} + void umfMemspaceDestroy(umf_memspace_handle_t memspace) { assert(memspace); for (size_t i = 0; i < memspace->size; i++) { - umfMemoryTargetDestroy(memspace->nodes[i]); + umfMemtargetDestroy(memspace->nodes[i]); } umf_ba_global_free(memspace->nodes); @@ -126,7 +154,7 @@ umf_result_t umfMemspaceClone(umf_const_memspace_handle_t hMemspace, clone->size = hMemspace->size; clone->nodes = - umf_ba_global_alloc(sizeof(umf_memory_target_handle_t) * clone->size); + umf_ba_global_alloc(sizeof(umf_memtarget_handle_t) * clone->size); if (!clone->nodes) { umf_ba_global_free(clone); return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -136,7 +164,7 @@ umf_result_t umfMemspaceClone(umf_const_memspace_handle_t hMemspace, umf_result_t ret; for (i = 0; i < clone->size; i++) { - ret = umfMemoryTargetClone(hMemspace->nodes[i], &clone->nodes[i]); + ret = umfMemtargetClone(hMemspace->nodes[i], &clone->nodes[i]); if (ret != UMF_RESULT_SUCCESS) { goto err; } @@ -148,21 +176,21 @@ umf_result_t umfMemspaceClone(umf_const_memspace_handle_t hMemspace, err: while (i != 0) { i--; - umfMemoryTargetDestroy(clone->nodes[i]); + umfMemtargetDestroy(clone->nodes[i]); } umf_ba_global_free(clone->nodes); umf_ba_global_free(clone); return ret; } -struct memory_target_sort_entry { +struct memtarget_sort_entry { uint64_t property; - umf_memory_target_handle_t node; + umf_memtarget_handle_t node; }; static int propertyCmp(const void *a, const void *b) { - const struct memory_target_sort_entry *entryA = a; - const struct memory_target_sort_entry *entryB = b; + const struct memtarget_sort_entry *entryA = a; + const struct memtarget_sort_entry *entryB = b; if (entryA->property < entryB->property) { return 1; @@ -173,16 +201,14 @@ static int propertyCmp(const void *a, const void *b) { } } -umf_result_t -umfMemspaceSortDesc(umf_memspace_handle_t hMemspace, - umf_result_t (*getProperty)(umf_memory_target_handle_t node, - uint64_t *property)) { +umf_result_t umfMemspaceSortDesc(umf_memspace_handle_t hMemspace, + umfGetPropertyFn getProperty) { if (!hMemspace || !getProperty) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - struct memory_target_sort_entry *entries = umf_ba_global_alloc( - sizeof(struct memory_target_sort_entry) * hMemspace->size); + struct memtarget_sort_entry *entries = umf_ba_global_alloc( + sizeof(struct memtarget_sort_entry) * hMemspace->size); if (!entries) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } @@ -198,7 +224,7 @@ umfMemspaceSortDesc(umf_memspace_handle_t hMemspace, } } - qsort(entries, hMemspace->size, sizeof(struct memory_target_sort_entry), + qsort(entries, hMemspace->size, sizeof(struct memtarget_sort_entry), propertyCmp); // apply the order to the original array @@ -218,7 +244,7 @@ umf_result_t umfMemspaceFilter(umf_const_memspace_handle_t hMemspace, return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - umf_memory_target_handle_t *uniqueBestNodes = + umf_memtarget_handle_t *uniqueBestNodes = umf_ba_global_alloc(hMemspace->size * sizeof(*uniqueBestNodes)); if (!uniqueBestNodes) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -228,7 +254,7 @@ umf_result_t umfMemspaceFilter(umf_const_memspace_handle_t hMemspace, size_t numUniqueBestNodes = 0; for (size_t nodeIdx = 0; nodeIdx < hMemspace->size; nodeIdx++) { - umf_memory_target_handle_t target = NULL; + umf_memtarget_handle_t target = NULL; ret = getTarget(hMemspace->nodes[nodeIdx], hMemspace->nodes, hMemspace->size, &target); if (ret != UMF_RESULT_SUCCESS) { @@ -268,8 +294,8 @@ umf_result_t umfMemspaceFilter(umf_const_memspace_handle_t hMemspace, size_t cloneIdx = 0; for (cloneIdx = 0; cloneIdx < newMemspace->size; cloneIdx++) { - ret = umfMemoryTargetClone(uniqueBestNodes[cloneIdx], - &newMemspace->nodes[cloneIdx]); + ret = umfMemtargetClone(uniqueBestNodes[cloneIdx], + &newMemspace->nodes[cloneIdx]); if (ret != UMF_RESULT_SUCCESS) { goto err_free_cloned_nodes; } @@ -283,7 +309,7 @@ umf_result_t umfMemspaceFilter(umf_const_memspace_handle_t hMemspace, err_free_cloned_nodes: while (cloneIdx != 0) { cloneIdx--; - umfMemoryTargetDestroy(newMemspace->nodes[cloneIdx]); + umfMemtargetDestroy(newMemspace->nodes[cloneIdx]); } umf_ba_global_free(newMemspace->nodes); err_free_new_memspace: @@ -292,3 +318,261 @@ umf_result_t umfMemspaceFilter(umf_const_memspace_handle_t hMemspace, umf_ba_global_free(uniqueBestNodes); return ret; } + +size_t umfMemspaceMemtargetNum(umf_const_memspace_handle_t hMemspace) { + if (!hMemspace) { + return 0; + } + return hMemspace->size; +} + +umf_const_memtarget_handle_t +umfMemspaceMemtargetGet(umf_const_memspace_handle_t hMemspace, + unsigned targetNum) { + if (!hMemspace || targetNum >= hMemspace->size) { + return NULL; + } + return hMemspace->nodes[targetNum]; +} + +umf_result_t umfMemspaceMemtargetAdd(umf_memspace_handle_t hMemspace, + umf_const_memtarget_handle_t hMemtarget) { + if (!hMemspace || !hMemtarget) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < hMemspace->size; i++) { + int cmp; + umf_result_t ret = + umfMemtargetCompare(hMemspace->nodes[i], hMemtarget, &cmp); + if (ret != UMF_RESULT_SUCCESS) { + return ret; + } + + if (cmp == 0) { + LOG_ERR("Memory target already exists in the memspace"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } else if (cmp < 0) { + LOG_ERR("You can't mix different memory target types in the same " + "memspace"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + } + + umf_memtarget_handle_t *newNodes = + umf_ba_global_alloc(sizeof(*newNodes) * (hMemspace->size + 1)); + if (!newNodes) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + for (size_t i = 0; i < hMemspace->size; i++) { + newNodes[i] = hMemspace->nodes[i]; + } + umf_memtarget_t *hMemtargetClone; + + umf_result_t ret = umfMemtargetClone(hMemtarget, &hMemtargetClone); + if (ret != UMF_RESULT_SUCCESS) { + umf_ba_global_free(newNodes); + return ret; + } + newNodes[hMemspace->size++] = hMemtargetClone; + + umf_ba_global_free(hMemspace->nodes); + hMemspace->nodes = newNodes; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfMemspaceMemtargetRemove(umf_memspace_handle_t hMemspace, + umf_const_memtarget_handle_t hMemtarget) { + if (!hMemspace || !hMemtarget) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + unsigned i; + for (i = 0; i < hMemspace->size; i++) { + int cmp; + umf_result_t ret = + umfMemtargetCompare(hMemspace->nodes[i], hMemtarget, &cmp); + + if (ret != UMF_RESULT_SUCCESS) { + return ret; + } + + if (cmp == 0) { + break; + } + } + + if (i == hMemspace->size) { + LOG_ERR("Memory target not found in the memspace"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_memtarget_handle_t *newNodes = + umf_ba_global_alloc(sizeof(*newNodes) * (hMemspace->size - 1)); + if (!newNodes) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + for (unsigned j = 0, z = 0; j < hMemspace->size; j++) { + if (j != i) { + newNodes[z++] = hMemspace->nodes[j]; + } + } + + umfMemtargetDestroy(hMemspace->nodes[i]); + umf_ba_global_free(hMemspace->nodes); + hMemspace->nodes = newNodes; + hMemspace->size--; + return UMF_RESULT_SUCCESS; +} + +// Helper function - returns zero on success, negative in case of error in filter function +// and positive error code, in case of other errors. +static int umfMemspaceFilterHelper(umf_memspace_handle_t memspace, + umf_memspace_filter_func_t filter, + void *args) { + + if (!memspace || !filter) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + size_t idx = 0; + int ret; + umf_memtarget_handle_t *nodesToRemove = + umf_ba_global_alloc(sizeof(*nodesToRemove) * memspace->size); + if (!nodesToRemove) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + for (size_t i = 0; i < memspace->size; i++) { + ret = filter(memspace, memspace->nodes[i], args); + if (ret < 0) { + LOG_ERR("filter function failed"); + goto free_mem; + } else if (ret == 0) { + nodesToRemove[idx++] = memspace->nodes[i]; + } + } + + size_t i = 0; + for (; i < idx; i++) { + ret = umfMemspaceMemtargetRemove(memspace, nodesToRemove[i]); + if (ret != UMF_RESULT_SUCCESS) { + goto re_add; + } + } + + umf_ba_global_free(nodesToRemove); + return UMF_RESULT_SUCCESS; + +re_add: + // If target removal failed, add back previously removed targets. + for (size_t j = 0; j < i; j++) { + umf_result_t ret2 = umfMemspaceMemtargetAdd(memspace, nodesToRemove[j]); + if (ret2 != UMF_RESULT_SUCCESS) { + ret = + UMF_RESULT_ERROR_UNKNOWN; // indicate that memspace is corrupted + break; + } + } +free_mem: + umf_ba_global_free(nodesToRemove); + return ret; +} + +umf_result_t umfMemspaceUserFilter(umf_memspace_handle_t memspace, + umf_memspace_filter_func_t filter, + void *args) { + + if (!memspace || !filter) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + int ret = umfMemspaceFilterHelper(memspace, filter, args); + if (ret < 0) { + return UMF_RESULT_ERROR_USER_SPECIFIC; + } + + return ret; +} + +typedef struct filter_by_id_args { + unsigned *ids; // array of numa nodes ids + size_t size; // size of the array +} filter_by_id_args_t; + +/* + * The following predefined filter callbacks returns umf_result_t codes as negative value + * because only negative values are treated as errors. umfMemspaceFilterHelper() will pass + * this error code through and umfMemspaceFilterBy*() functions will translate this code to positive + * umf_result_t code. + */ + +static int filterById(umf_const_memspace_handle_t memspace, + umf_const_memtarget_handle_t target, void *args) { + if (!memspace || !target || !args) { + return -UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + filter_by_id_args_t *filterArgs = args; + for (size_t i = 0; i < filterArgs->size; i++) { + unsigned id; + umf_result_t ret = umfMemtargetGetId(target, &id); + if (ret != UMF_RESULT_SUCCESS) { + return -ret; + } + + if (id == filterArgs->ids[i]) { + return 1; + } + } + return 0; +} + +static int filterByCapacity(umf_const_memspace_handle_t memspace, + umf_const_memtarget_handle_t target, void *args) { + if (!memspace || !target || !args) { + return -UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + size_t capacity; + umf_result_t ret = umfMemtargetGetCapacity(target, &capacity); + if (ret != UMF_RESULT_SUCCESS) { + return -ret; + } + + size_t *targetCapacity = args; + return (capacity >= *targetCapacity) ? 1 : 0; +} + +umf_result_t umfMemspaceFilterById(umf_memspace_handle_t memspace, + unsigned *ids, size_t size) { + if (!memspace || !ids || size == 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + filter_by_id_args_t args = {ids, size}; + int ret = umfMemspaceFilterHelper(memspace, &filterById, &args); + + // if umfMemspaceFilter() returned negative umf_result_t change it to positive + return ret < 0 ? -ret : ret; +} + +umf_result_t umfMemspaceFilterByCapacity(umf_memspace_handle_t memspace, + int64_t capacity) { + if (!memspace) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + // TODO: At this moment this function filters out memory targets that capacity is + // less than specified size. We can extend this function to support reverse filter, + // by using negative values of capacity parameter. + // For now we just return invalid argument. + if (capacity < 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + int ret = umfMemspaceFilterHelper(memspace, &filterByCapacity, &capacity); + + // if umfMemspaceFilter() returned negative umf_result_t change it to positive + return ret < 0 ? -ret : ret; +} diff --git a/src/memspace_internal.h b/src/memspace_internal.h index b570a7472..e31898e84 100644 --- a/src/memspace_internal.h +++ b/src/memspace_internal.h @@ -13,7 +13,7 @@ #include #include "base_alloc.h" -#include "memory_target.h" +#include "memtarget_internal.h" #ifdef __cplusplus extern "C" { @@ -21,16 +21,10 @@ extern "C" { struct umf_memspace_t { size_t size; - umf_memory_target_handle_t *nodes; + umf_memtarget_handle_t *nodes; }; -/// -/// \brief Clones memspace -/// -umf_result_t umfMemspaceClone(umf_const_memspace_handle_t hMemspace, - umf_memspace_handle_t *outHandle); - -typedef umf_result_t (*umfGetPropertyFn)(umf_memory_target_handle_t, +typedef umf_result_t (*umfGetPropertyFn)(umf_const_memtarget_handle_t, uint64_t *); /// @@ -39,10 +33,10 @@ typedef umf_result_t (*umfGetPropertyFn)(umf_memory_target_handle_t, umf_result_t umfMemspaceSortDesc(umf_memspace_handle_t hMemspace, umfGetPropertyFn getProperty); -typedef umf_result_t (*umfGetTargetFn)(umf_memory_target_handle_t initiator, - umf_memory_target_handle_t *nodes, +typedef umf_result_t (*umfGetTargetFn)(umf_memtarget_handle_t initiator, + umf_memtarget_handle_t *nodes, size_t numNodes, - umf_memory_target_handle_t *target); + umf_memtarget_handle_t *target); /// /// \brief Filters the targets using getTarget() to create a new memspace diff --git a/src/memspaces/memspace_highest_bandwidth.c b/src/memspaces/memspace_highest_bandwidth.c index d82e5f4f1..93fede2cd 100644 --- a/src/memspaces/memspace_highest_bandwidth.c +++ b/src/memspaces/memspace_highest_bandwidth.c @@ -11,24 +11,38 @@ #include #include +#include +#include + +// UMF_MEMSPACE_HIGHEST_BANDWIDTH requires HWLOC +// Additionally, it is currently unsupported on Win +#if defined(_WIN32) || defined(UMF_NO_HWLOC) + +umf_const_memspace_handle_t umfMemspaceHighestBandwidthGet(void) { + // not supported + return NULL; +} + +#else // !defined(_WIN32) && !defined(UMF_NO_HWLOC) + #include "base_alloc_global.h" -#include "memory_target_numa.h" #include "memspace_internal.h" +#include "memtarget_numa.h" #include "topology.h" #include "utils_common.h" #include "utils_concurrency.h" #include "utils_log.h" -static umf_result_t getBestBandwidthTarget(umf_memory_target_handle_t initiator, - umf_memory_target_handle_t *nodes, +static umf_result_t getBestBandwidthTarget(umf_memtarget_handle_t initiator, + umf_memtarget_handle_t *nodes, size_t numNodes, - umf_memory_target_handle_t *target) { + umf_memtarget_handle_t *target) { size_t bestNodeIdx = 0; size_t bestBandwidth = 0; for (size_t nodeIdx = 0; nodeIdx < numNodes; nodeIdx++) { size_t bandwidth = 0; umf_result_t ret = - umfMemoryTargetGetBandwidth(initiator, nodes[nodeIdx], &bandwidth); + umfMemtargetGetBandwidth(initiator, nodes[nodeIdx], &bandwidth); if (ret) { return ret; } @@ -96,7 +110,9 @@ static void umfMemspaceHighestBandwidthInit(void) { } umf_const_memspace_handle_t umfMemspaceHighestBandwidthGet(void) { - util_init_once(&UMF_MEMSPACE_HBW_INITIALIZED, - umfMemspaceHighestBandwidthInit); + utils_init_once(&UMF_MEMSPACE_HBW_INITIALIZED, + umfMemspaceHighestBandwidthInit); return UMF_MEMSPACE_HIGHEST_BANDWIDTH; } + +#endif // !defined(_WIN32) && !defined(UMF_NO_HWLOC) diff --git a/src/memspaces/memspace_highest_capacity.c b/src/memspaces/memspace_highest_capacity.c index 0b8f3522e..4a195316a 100644 --- a/src/memspaces/memspace_highest_capacity.c +++ b/src/memspaces/memspace_highest_capacity.c @@ -10,9 +10,23 @@ #include #include +#include +#include + +// UMF_MEMSPACE_HIGHEST_CAPACITY requires HWLOC +// Additionally, it is currently unsupported on Win +#if defined(_WIN32) || defined(UMF_NO_HWLOC) + +umf_const_memspace_handle_t umfMemspaceHighestCapacityGet(void) { + // not supported + return NULL; +} + +#else // !defined(_WIN32) && !defined(UMF_NO_HWLOC) + #include "base_alloc_global.h" -#include "memory_target_numa.h" #include "memspace_internal.h" +#include "memtarget_numa.h" #include "topology.h" #include "utils_concurrency.h" @@ -34,7 +48,7 @@ umfMemspaceHighestCapacityCreate(umf_memspace_handle_t *hMemspace) { } ret = umfMemspaceSortDesc(highCapacityMemspace, - (umfGetPropertyFn)&umfMemoryTargetGetCapacity); + (umfGetPropertyFn)&umfMemtargetGetCapacity); if (ret != UMF_RESULT_SUCCESS) { return ret; } @@ -68,7 +82,9 @@ static void umfMemspaceHighestCapacityInit(void) { } umf_const_memspace_handle_t umfMemspaceHighestCapacityGet(void) { - util_init_once(&UMF_MEMSPACE_HIGHEST_CAPACITY_INITIALIZED, - umfMemspaceHighestCapacityInit); + utils_init_once(&UMF_MEMSPACE_HIGHEST_CAPACITY_INITIALIZED, + umfMemspaceHighestCapacityInit); return UMF_MEMSPACE_HIGHEST_CAPACITY; } + +#endif // !defined(_WIN32) && !defined(UMF_NO_HWLOC) diff --git a/src/memspaces/memspace_host_all.c b/src/memspaces/memspace_host_all.c index 62c968743..4b7db69d4 100644 --- a/src/memspaces/memspace_host_all.c +++ b/src/memspaces/memspace_host_all.c @@ -10,9 +10,23 @@ #include #include +#include +#include + +// UMF_MEMSPACE_HOST_ALL requires HWLOC +// Additionally, it is currently unsupported on Win + +#if defined(_WIN32) || defined(UMF_NO_HWLOC) +umf_const_memspace_handle_t umfMemspaceHostAllGet(void) { + // not supported + return NULL; +} + +#else // !defined(_WIN32) && !defined(UMF_NO_HWLOC) + #include "base_alloc_global.h" -#include "memory_target_numa.h" #include "memspace_internal.h" +#include "memtarget_numa.h" #include "topology.h" #include "utils_concurrency.h" @@ -90,6 +104,8 @@ static void umfMemspaceHostAllInit(void) { } umf_const_memspace_handle_t umfMemspaceHostAllGet(void) { - util_init_once(&UMF_MEMSPACE_HOST_ALL_INITIALIZED, umfMemspaceHostAllInit); + utils_init_once(&UMF_MEMSPACE_HOST_ALL_INITIALIZED, umfMemspaceHostAllInit); return UMF_MEMSPACE_HOST_ALL; } + +#endif // !defined(_WIN32) && !defined(UMF_NO_HWLOC) diff --git a/src/memspaces/memspace_lowest_latency.c b/src/memspaces/memspace_lowest_latency.c index 2c6656ab2..5ca369fee 100644 --- a/src/memspaces/memspace_lowest_latency.c +++ b/src/memspaces/memspace_lowest_latency.c @@ -11,24 +11,38 @@ #include #include +#include +#include + +// UMF_MEMSPACE_LOWEST_LATENCY requires HWLOC +// Additionally, it is currently unsupported on Win +#if defined(_WIN32) || defined(UMF_NO_HWLOC) + +umf_const_memspace_handle_t umfMemspaceLowestLatencyGet(void) { + // not supported + return NULL; +} + +#else // !defined(_WIN32) && !defined(UMF_NO_HWLOC) + #include "base_alloc_global.h" -#include "memory_target_numa.h" #include "memspace_internal.h" +#include "memtarget_numa.h" #include "topology.h" #include "utils_common.h" #include "utils_concurrency.h" #include "utils_log.h" -static umf_result_t getBestLatencyTarget(umf_memory_target_handle_t initiator, - umf_memory_target_handle_t *nodes, +static umf_result_t getBestLatencyTarget(umf_memtarget_handle_t initiator, + umf_memtarget_handle_t *nodes, size_t numNodes, - umf_memory_target_handle_t *target) { + umf_memtarget_handle_t *target) { size_t bestNodeIdx = 0; size_t bestLatency = SIZE_MAX; for (size_t nodeIdx = 0; nodeIdx < numNodes; nodeIdx++) { size_t latency = SIZE_MAX; umf_result_t ret = - umfMemoryTargetGetLatency(initiator, nodes[nodeIdx], &latency); + umfMemtargetGetLatency(initiator, nodes[nodeIdx], &latency); if (ret) { return ret; } @@ -96,7 +110,9 @@ static void umfMemspaceLowestLatencyInit(void) { } umf_const_memspace_handle_t umfMemspaceLowestLatencyGet(void) { - util_init_once(&UMF_MEMSPACE_LOWEST_LATENCY_INITIALIZED, - umfMemspaceLowestLatencyInit); + utils_init_once(&UMF_MEMSPACE_LOWEST_LATENCY_INITIALIZED, + umfMemspaceLowestLatencyInit); return UMF_MEMSPACE_LOWEST_LATENCY; } + +#endif // !defined(_WIN32) && !defined(UMF_NO_HWLOC) diff --git a/src/memspaces/memspace_numa.c b/src/memspaces/memspace_numa.c index 52dc85b64..0028e394d 100644 --- a/src/memspaces/memspace_numa.c +++ b/src/memspaces/memspace_numa.c @@ -9,8 +9,26 @@ #include -#include "../memory_targets/memory_target_numa.h" +#include +#include + +// umfMemspaceCreateFromNumaArray requires HWLOC +// Additionally, it is currently unsupported on Win +#if defined(_WIN32) || defined(UMF_NO_HWLOC) +umf_result_t umfMemspaceCreateFromNumaArray(unsigned *nodeIds, size_t numIds, + umf_memspace_handle_t *hMemspace) { + (void)nodeIds; + (void)numIds; + (void)hMemspace; + + // not supported + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +#else // !defined(_WIN32) && !defined(UMF_NO_HWLOC) + #include "../memspace_internal.h" +#include "../memtargets/memtarget_numa.h" #include "base_alloc_global.h" umf_result_t umfMemspaceCreateFromNumaArray(unsigned *nodeIds, size_t numIds, @@ -27,8 +45,8 @@ umf_result_t umfMemspaceCreateFromNumaArray(unsigned *nodeIds, size_t numIds, } memspace->size = numIds; - memspace->nodes = umf_ba_global_alloc(memspace->size * - sizeof(umf_memory_target_handle_t)); + memspace->nodes = + umf_ba_global_alloc(memspace->size * sizeof(umf_memtarget_handle_t)); if (!memspace->nodes) { ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; goto err_nodes_alloc; @@ -36,9 +54,9 @@ umf_result_t umfMemspaceCreateFromNumaArray(unsigned *nodeIds, size_t numIds, size_t nodeIdx; for (nodeIdx = 0; nodeIdx < numIds; nodeIdx++) { - struct umf_numa_memory_target_config_t config = {nodeIds[nodeIdx]}; - ret = umfMemoryTargetCreate(&UMF_MEMORY_TARGET_NUMA_OPS, &config, - &memspace->nodes[nodeIdx]); + struct umf_numa_memtarget_config_t config = {nodeIds[nodeIdx]}; + ret = umfMemtargetCreate(&UMF_MEMTARGET_NUMA_OPS, &config, + &memspace->nodes[nodeIdx]); if (ret) { goto err_target_create; } @@ -51,9 +69,11 @@ umf_result_t umfMemspaceCreateFromNumaArray(unsigned *nodeIds, size_t numIds, err_target_create: umf_ba_global_free(memspace->nodes); for (size_t i = 0; i < nodeIdx; i++) { - umfMemoryTargetDestroy(memspace->nodes[i]); + umfMemtargetDestroy(memspace->nodes[i]); } err_nodes_alloc: umf_ba_global_free(memspace); return ret; } + +#endif // !defined(_WIN32) && !defined(UMF_NO_HWLOC) diff --git a/src/memtarget.c b/src/memtarget.c new file mode 100644 index 000000000..a89708460 --- /dev/null +++ b/src/memtarget.c @@ -0,0 +1,156 @@ +/* + * + * Copyright (C) 2023-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include + +#include "base_alloc_global.h" +#include "libumf.h" +#include "memtarget_internal.h" +#include "memtarget_ops.h" +#include "utils_concurrency.h" + +umf_result_t umfMemtargetCreate(const umf_memtarget_ops_t *ops, void *params, + umf_memtarget_handle_t *memoryTarget) { + libumfInit(); + if (!ops || !memoryTarget) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_memtarget_handle_t target = + umf_ba_global_alloc(sizeof(umf_memtarget_t)); + if (!target) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + assert(ops->version == UMF_VERSION_CURRENT); + + target->ops = ops; + + void *target_priv; + umf_result_t ret = ops->initialize(params, &target_priv); + if (ret != UMF_RESULT_SUCCESS) { + umf_ba_global_free(target); + return ret; + } + + target->priv = target_priv; + + *memoryTarget = target; + + return UMF_RESULT_SUCCESS; +} + +void umfMemtargetDestroy(umf_memtarget_handle_t memoryTarget) { + assert(memoryTarget); + memoryTarget->ops->finalize(memoryTarget->priv); + umf_ba_global_free(memoryTarget); +} + +umf_result_t umfMemtargetClone(umf_const_memtarget_handle_t memoryTarget, + umf_memtarget_handle_t *outHandle) { + assert(memoryTarget); + assert(outHandle); + + *outHandle = umf_ba_global_alloc(sizeof(umf_memtarget_t)); + if (!*outHandle) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + void *outPriv; + umf_result_t ret = memoryTarget->ops->clone(memoryTarget->priv, &outPriv); + if (ret != UMF_RESULT_SUCCESS) { + umf_ba_global_free(*outHandle); + return ret; + } + + (*outHandle)->ops = memoryTarget->ops; + (*outHandle)->priv = outPriv; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfMemtargetGetCapacity(umf_const_memtarget_handle_t memoryTarget, + size_t *capacity) { + if (!memoryTarget || !capacity) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return memoryTarget->ops->get_capacity(memoryTarget->priv, capacity); +} + +umf_result_t umfMemtargetGetBandwidth(umf_memtarget_handle_t srcMemoryTarget, + umf_memtarget_handle_t dstMemoryTarget, + size_t *bandwidth) { + if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return srcMemoryTarget->ops->get_bandwidth( + srcMemoryTarget->priv, dstMemoryTarget->priv, bandwidth); +} + +umf_result_t umfMemtargetGetLatency(umf_memtarget_handle_t srcMemoryTarget, + umf_memtarget_handle_t dstMemoryTarget, + size_t *latency) { + if (!srcMemoryTarget || !dstMemoryTarget || !latency) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return srcMemoryTarget->ops->get_latency(srcMemoryTarget->priv, + dstMemoryTarget->priv, latency); +} + +umf_result_t umfMemtargetGetId(umf_const_memtarget_handle_t hMemtarget, + unsigned *id) { + if (!hMemtarget || !id) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return hMemtarget->ops->get_id(hMemtarget->priv, id); +} + +umf_result_t umfMemtargetGetType(umf_const_memtarget_handle_t memoryTarget, + umf_memtarget_type_t *type) { + if (!memoryTarget || !type) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return memoryTarget->ops->get_type(memoryTarget->priv, type); +} + +umf_result_t umfMemtargetCompare(umf_const_memtarget_handle_t a, + umf_const_memtarget_handle_t b, int *result) { + umf_memtarget_type_t typeA, typeB; + umf_result_t ret = umfMemtargetGetType(a, &typeA); + if (ret != UMF_RESULT_SUCCESS) { + return ret; + } + + ret = umfMemtargetGetType(b, &typeB); + if (ret != UMF_RESULT_SUCCESS) { + return ret; + } + + if (typeA != typeB) { + *result = -1; + return UMF_RESULT_SUCCESS; + } + + ret = a->ops->compare(a->priv, b->priv, result); + if (ret != UMF_RESULT_SUCCESS) { + return ret; + } + + if (*result) { + *result = 1; + } + + return UMF_RESULT_SUCCESS; +} diff --git a/src/memtarget_internal.h b/src/memtarget_internal.h new file mode 100644 index 000000000..c5b9a61c5 --- /dev/null +++ b/src/memtarget_internal.h @@ -0,0 +1,49 @@ +/* + * + * Copyright (C) 2023-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#ifndef UMF_MEMTARGET_INTERNAL_H +#define UMF_MEMTARGET_INTERNAL_H 1 + +#include +#include +#ifdef __cplusplus +extern "C" { +#endif + +struct umf_memtarget_ops_t; +typedef struct umf_memtarget_ops_t umf_memtarget_ops_t; + +typedef struct umf_memtarget_t { + const umf_memtarget_ops_t *ops; + void *priv; +} umf_memtarget_t; + +umf_result_t umfMemtargetCreate(const umf_memtarget_ops_t *ops, void *params, + umf_memtarget_handle_t *memoryTarget); +void umfMemtargetDestroy(umf_memtarget_handle_t memoryTarget); + +umf_result_t umfMemtargetClone(umf_const_memtarget_handle_t memoryTarget, + umf_memtarget_handle_t *outHandle); + +umf_result_t umfMemtargetGetBandwidth(umf_memtarget_handle_t srcMemoryTarget, + umf_memtarget_handle_t dstMemoryTarget, + size_t *bandwidth); +umf_result_t umfMemtargetGetLatency(umf_memtarget_handle_t srcMemoryTarget, + umf_memtarget_handle_t dstMemoryTarget, + size_t *latency); + +/// return 0 if memtargets are equal, -1 if they are of different types, +/// and 1 if they are different targets of the same type +umf_result_t umfMemtargetCompare(umf_const_memtarget_handle_t a, + umf_const_memtarget_handle_t b, int *result); +#ifdef __cplusplus +} +#endif + +#endif /* UMF_MEMTARGET_INTERNAL_H */ diff --git a/src/memory_target_ops.h b/src/memtarget_ops.h similarity index 74% rename from src/memory_target_ops.h rename to src/memtarget_ops.h index 24e4e8108..75e16447e 100644 --- a/src/memory_target_ops.h +++ b/src/memtarget_ops.h @@ -1,25 +1,24 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * */ -#ifndef UMF_MEMORY_TARGET_OPS_H -#define UMF_MEMORY_TARGET_OPS_H 1 +#ifndef UMF_MEMTARGET_OPS_H +#define UMF_MEMTARGET_OPS_H 1 #include #include +#include #ifdef __cplusplus extern "C" { #endif -typedef struct umf_memory_target_t *umf_memory_target_handle_t; - -typedef struct umf_memory_target_ops_t { +typedef struct umf_memtarget_ops_t { /// Version of the ops structure. /// Should be initialized using UMF_VERSION_CURRENT uint32_t version; @@ -44,10 +43,15 @@ typedef struct umf_memory_target_ops_t { size_t *bandwidth); umf_result_t (*get_latency)(void *srcMemoryTarget, void *dstMemoryTarget, size_t *latency); -} umf_memory_target_ops_t; + + umf_result_t (*get_type)(void *memoryTarget, umf_memtarget_type_t *type); + umf_result_t (*get_id)(void *memoryTarget, unsigned *type); + umf_result_t (*compare)(void *memTarget, void *otherMemTarget, int *result); + +} umf_memtarget_ops_t; #ifdef __cplusplus } #endif -#endif /* #ifndef UMF_MEMORY_TARGET_OPS_H */ +#endif /* #ifndef UMF_MEMTARGET_OPS_H */ diff --git a/src/memory_targets/memory_target_numa.c b/src/memtargets/memtarget_numa.c similarity index 65% rename from src/memory_targets/memory_target_numa.c rename to src/memtargets/memtarget_numa.c index aa33a1853..f32774ebb 100644 --- a/src/memory_targets/memory_target_numa.c +++ b/src/memtargets/memtarget_numa.c @@ -16,13 +16,13 @@ #include "../memory_pool_internal.h" #include "base_alloc.h" #include "base_alloc_global.h" -#include "memory_target_numa.h" #include "mempolicy_internal.h" +#include "memtarget_numa.h" #include "topology.h" #include "utils_assert.h" #include "utils_log.h" -struct numa_memory_target_t { +struct numa_memtarget_t { unsigned physical_id; }; @@ -31,16 +31,16 @@ static umf_result_t numa_initialize(void *params, void **memTarget) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - struct umf_numa_memory_target_config_t *config = - (struct umf_numa_memory_target_config_t *)params; + struct umf_numa_memtarget_config_t *config = + (struct umf_numa_memtarget_config_t *)params; - struct numa_memory_target_t *numaTarget = - umf_ba_global_alloc(sizeof(struct numa_memory_target_t)); + struct numa_memtarget_t *numaTarget = + umf_ba_global_alloc(sizeof(struct numa_memtarget_t)); if (!numaTarget) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - numaTarget->physical_id = config->physical_id; + numaTarget->physical_id = (unsigned)config->physical_id; *memTarget = numaTarget; return UMF_RESULT_SUCCESS; } @@ -52,8 +52,8 @@ static umf_result_t numa_memory_provider_create_from_memspace( umf_const_mempolicy_handle_t policy, umf_memory_provider_handle_t *provider) { - struct numa_memory_target_t **numaTargets = - (struct numa_memory_target_t **)memTargets; + struct numa_memtarget_t **numaTargets = + (struct numa_memtarget_t **)memTargets; size_t numNodesProvider; @@ -69,22 +69,27 @@ static umf_result_t numa_memory_provider_create_from_memspace( return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - umf_os_memory_provider_params_t params = umfOsMemoryProviderParamsDefault(); + umf_numa_mode_t numa_mode = UMF_NUMA_MODE_DEFAULT; + size_t part_size = 0; + umf_numa_split_partition_t *partitions = NULL; + unsigned partitions_len = 0; + unsigned *numa_list = NULL; + unsigned numa_list_len = 0; if (policy) { switch (policy->type) { case UMF_MEMPOLICY_INTERLEAVE: - params.numa_mode = UMF_NUMA_MODE_INTERLEAVE; - params.part_size = policy->ops.interleave.part_size; + numa_mode = UMF_NUMA_MODE_INTERLEAVE; + part_size = policy->ops.interleave.part_size; break; case UMF_MEMPOLICY_BIND: - params.numa_mode = UMF_NUMA_MODE_BIND; + numa_mode = UMF_NUMA_MODE_BIND; break; case UMF_MEMPOLICY_PREFERRED: - params.numa_mode = UMF_NUMA_MODE_PREFERRED; + numa_mode = UMF_NUMA_MODE_PREFERRED; break; case UMF_MEMPOLICY_SPLIT: - params.numa_mode = UMF_NUMA_MODE_SPLIT; + numa_mode = UMF_NUMA_MODE_SPLIT; // compile time check to ensure we can just cast // umf_mempolicy_split_partition_t to @@ -98,9 +103,8 @@ static umf_result_t numa_memory_provider_create_from_memspace( offsetof(umf_mempolicy_split_partition_t, target) != offsetof(umf_numa_split_partition_t, target)); - params.partitions = - (umf_numa_split_partition_t *)policy->ops.split.part; - params.partitions_len = policy->ops.split.part_len; + partitions = (umf_numa_split_partition_t *)policy->ops.split.part; + partitions_len = (unsigned)policy->ops.split.part_len; break; default: return UMF_RESULT_ERROR_INVALID_ARGUMENT; @@ -109,44 +113,80 @@ static umf_result_t numa_memory_provider_create_from_memspace( if (memspace == umfMemspaceHostAllGet()) { // For the default memspace, we use the default mode without any // call to mbind - params.numa_mode = UMF_NUMA_MODE_DEFAULT; + numa_mode = UMF_NUMA_MODE_DEFAULT; } else { - params.numa_mode = UMF_NUMA_MODE_BIND; + numa_mode = UMF_NUMA_MODE_BIND; } } if (memspace == umfMemspaceHostAllGet() && policy == NULL) { // For default memspace with default policy we use all numa nodes so // simply left numa list empty - params.numa_list_len = 0; - params.numa_list = NULL; + numa_list_len = 0; + numa_list = NULL; } else { - params.numa_list = - umf_ba_global_alloc(sizeof(*params.numa_list) * numNodesProvider); + numa_list = umf_ba_global_alloc(sizeof(*numa_list) * numNodesProvider); - if (!params.numa_list) { + if (!numa_list) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } for (size_t i = 0; i < numNodesProvider; i++) { - params.numa_list[i] = numaTargets[i]->physical_id; + numa_list[i] = numaTargets[i]->physical_id; } - params.numa_list_len = numNodesProvider; + numa_list_len = (unsigned)numNodesProvider; } - umf_memory_provider_handle_t numaProvider = NULL; - int ret = umfMemoryProviderCreate(umfOsMemoryProviderOps(), ¶ms, - &numaProvider); + umf_os_memory_provider_params_handle_t params = NULL; + umf_result_t ret = umfOsMemoryProviderParamsCreate(¶ms); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("Creating OS memory provider params failed"); + goto destroy_numa_list; + } - umf_ba_global_free(params.numa_list); + ret = umfOsMemoryProviderParamsSetNumaMode(params, numa_mode); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("Setting NUMA mode failed"); + goto destroy_provider_params; + } - if (ret) { - return ret; + ret = + umfOsMemoryProviderParamsSetNumaList(params, numa_list, numa_list_len); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("Setting NUMA list failed"); + goto destroy_provider_params; + } + + ret = umfOsMemoryProviderParamsSetPartitions(params, partitions, + partitions_len); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("Setting partitions failed"); + goto destroy_provider_params; + } + + ret = umfOsMemoryProviderParamsSetPartSize(params, part_size); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("Setting part size failed"); + goto destroy_provider_params; + } + + umf_memory_provider_handle_t numaProvider = NULL; + ret = umfMemoryProviderCreate(umfOsMemoryProviderOps(), params, + &numaProvider); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("Creating OS memory provider failed"); + goto destroy_provider_params; } *provider = numaProvider; - return UMF_RESULT_SUCCESS; +destroy_provider_params: + umfOsMemoryProviderParamsDestroy(params); + +destroy_numa_list: + umf_ba_global_free(numa_list); + + return ret; } static umf_result_t numa_pool_create_from_memspace( @@ -161,10 +201,9 @@ static umf_result_t numa_pool_create_from_memspace( } static umf_result_t numa_clone(void *memTarget, void **outMemTarget) { - struct numa_memory_target_t *numaTarget = - (struct numa_memory_target_t *)memTarget; - struct numa_memory_target_t *newNumaTarget = - umf_ba_global_alloc(sizeof(struct numa_memory_target_t)); + struct numa_memtarget_t *numaTarget = (struct numa_memtarget_t *)memTarget; + struct numa_memtarget_t *newNumaTarget = + umf_ba_global_alloc(sizeof(struct numa_memtarget_t)); if (!newNumaTarget) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } @@ -185,7 +224,7 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) { } hwloc_obj_t numaNode = hwloc_get_numanode_obj_by_os_index( - topology, ((struct numa_memory_target_t *)memTarget)->physical_id); + topology, ((struct numa_memtarget_t *)memTarget)->physical_id); if (!numaNode) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -226,7 +265,7 @@ static umf_result_t query_attribute_value(void *srcMemoryTarget, hwloc_obj_t srcNumaNode = hwloc_get_obj_by_type( topology, HWLOC_OBJ_NUMANODE, - ((struct numa_memory_target_t *)srcMemoryTarget)->physical_id); + ((struct numa_memtarget_t *)srcMemoryTarget)->physical_id); if (!srcNumaNode) { LOG_PERR("Getting HWLOC object by type failed"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; @@ -234,7 +273,7 @@ static umf_result_t query_attribute_value(void *srcMemoryTarget, hwloc_obj_t dstNumaNode = hwloc_get_obj_by_type( topology, HWLOC_OBJ_NUMANODE, - ((struct numa_memory_target_t *)dstMemoryTarget)->physical_id); + ((struct numa_memtarget_t *)dstMemoryTarget)->physical_id); if (!dstNumaNode) { LOG_PERR("Getting HWLOC object by type failed"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; @@ -292,8 +331,8 @@ static umf_result_t numa_get_bandwidth(void *srcMemoryTarget, bandwidth, MEMATTR_TYPE_BANDWIDTH); if (ret) { LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.", - ((struct numa_memory_target_t *)srcMemoryTarget)->physical_id, - ((struct numa_memory_target_t *)dstMemoryTarget)->physical_id); + ((struct numa_memtarget_t *)srcMemoryTarget)->physical_id, + ((struct numa_memtarget_t *)dstMemoryTarget)->physical_id); return ret; } @@ -310,15 +349,47 @@ static umf_result_t numa_get_latency(void *srcMemoryTarget, latency, MEMATTR_TYPE_LATENCY); if (ret) { LOG_ERR("Retrieving latency for initiator node %u to node %u failed.", - ((struct numa_memory_target_t *)srcMemoryTarget)->physical_id, - ((struct numa_memory_target_t *)dstMemoryTarget)->physical_id); + ((struct numa_memtarget_t *)srcMemoryTarget)->physical_id, + ((struct numa_memtarget_t *)dstMemoryTarget)->physical_id); return ret; } return UMF_RESULT_SUCCESS; } -struct umf_memory_target_ops_t UMF_MEMORY_TARGET_NUMA_OPS = { +static umf_result_t numa_get_type(void *memTarget, umf_memtarget_type_t *type) { + if (!memTarget || !type) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + *type = UMF_MEMTARGET_TYPE_NUMA; + return UMF_RESULT_SUCCESS; +} + +static umf_result_t numa_get_id(void *memTarget, unsigned *id) { + if (!memTarget || !id) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + *id = ((struct numa_memtarget_t *)memTarget)->physical_id; + return UMF_RESULT_SUCCESS; +} + +static umf_result_t numa_compare(void *memTarget, void *otherMemTarget, + int *result) { + if (!memTarget || !otherMemTarget || !result) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + struct numa_memtarget_t *numaTarget = (struct numa_memtarget_t *)memTarget; + struct numa_memtarget_t *otherNumaTarget = + (struct numa_memtarget_t *)otherMemTarget; + + *result = numaTarget->physical_id != otherNumaTarget->physical_id; + return UMF_RESULT_SUCCESS; +} + +struct umf_memtarget_ops_t UMF_MEMTARGET_NUMA_OPS = { .version = UMF_VERSION_CURRENT, .initialize = numa_initialize, .finalize = numa_finalize, @@ -327,5 +398,8 @@ struct umf_memory_target_ops_t UMF_MEMORY_TARGET_NUMA_OPS = { .get_capacity = numa_get_capacity, .get_bandwidth = numa_get_bandwidth, .get_latency = numa_get_latency, + .get_type = numa_get_type, + .get_id = numa_get_id, + .compare = numa_compare, .memory_provider_create_from_memspace = numa_memory_provider_create_from_memspace}; diff --git a/src/memory_targets/memory_target_numa.h b/src/memtargets/memtarget_numa.h similarity index 54% rename from src/memory_targets/memory_target_numa.h rename to src/memtargets/memtarget_numa.h index 843610a2a..2d3e3fd70 100644 --- a/src/memory_targets/memory_target_numa.h +++ b/src/memtargets/memtarget_numa.h @@ -7,27 +7,27 @@ * */ -#ifndef UMF_MEMORY_TARGET_NUMA_H -#define UMF_MEMORY_TARGET_NUMA_H 1 +#ifndef UMF_MEMTARGET_NUMA_H +#define UMF_MEMTARGET_NUMA_H 1 #include #include -#include "../memory_target.h" -#include "../memory_target_ops.h" +#include "../memtarget_internal.h" +#include "../memtarget_ops.h" #ifdef __cplusplus extern "C" { #endif -struct umf_numa_memory_target_config_t { +struct umf_numa_memtarget_config_t { size_t physical_id; }; -extern struct umf_memory_target_ops_t UMF_MEMORY_TARGET_NUMA_OPS; +extern struct umf_memtarget_ops_t UMF_MEMTARGET_NUMA_OPS; #ifdef __cplusplus } #endif -#endif /* UMF_MEMORY_TARGET_NUMA_H */ +#endif /* UMF_MEMTARGET_NUMA_H */ diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp index edb5fc649..e0298b43d 100644 --- a/src/pool/pool_disjoint.cpp +++ b/src/pool/pool_disjoint.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -57,13 +58,43 @@ typedef struct umf_disjoint_pool_shared_limits_t { std::atomic TotalSize; } umf_disjoint_pool_shared_limits_t; +// Configuration of Disjoint Pool +typedef struct umf_disjoint_pool_params_t { + // Minimum allocation size that will be requested from the memory provider. + size_t SlabMinSize; + + // Allocations up to this limit will be subject to chunking/pooling + size_t MaxPoolableSize; + + // When pooling, each bucket will hold a max of 'Capacity' unfreed slabs + size_t Capacity; + + // Holds the minimum bucket size valid for allocation of a memory type. + // This value must be a power of 2. + size_t MinBucketSize; + + // Holds size of the pool managed by the allocator. + size_t CurPoolSize; + + // Whether to print pool usage statistics + int PoolTrace; + + // Memory limits that can be shared between multitple pool instances, + // i.e. if multiple pools use the same SharedLimits sum of those pools' + // sizes cannot exceed MaxSize. + umf_disjoint_pool_shared_limits_handle_t SharedLimits; + + // Name used in traces + char *Name; +} umf_disjoint_pool_params_t; + class DisjointPool { public: class AllocImpl; using Config = umf_disjoint_pool_params_t; umf_result_t initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_t *parameters); + umf_disjoint_pool_params_handle_t parameters); void *malloc(size_t size); void *calloc(size_t, size_t); void *realloc(void *, size_t); @@ -85,8 +116,151 @@ umfDisjointPoolSharedLimitsCreate(size_t MaxSize) { } void umfDisjointPoolSharedLimitsDestroy( - umf_disjoint_pool_shared_limits_t *limits) { - delete limits; + umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { + delete hSharedLimits; +} + +umf_result_t +umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { + static const char *DEFAULT_NAME = "disjoint_pool"; + + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_disjoint_pool_params_handle_t params = new umf_disjoint_pool_params_t{}; + if (params == nullptr) { + LOG_ERR("cannot allocate memory for disjoint pool params"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + params->SlabMinSize = 0; + params->MaxPoolableSize = 0; + params->Capacity = 0; + params->MinBucketSize = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE; + params->CurPoolSize = 0; + params->PoolTrace = 0; + params->SharedLimits = nullptr; + params->Name = nullptr; + + umf_result_t ret = umfDisjointPoolParamsSetName(params, DEFAULT_NAME); + if (ret != UMF_RESULT_SUCCESS) { + delete params; + return ret; + } + + *hParams = params; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams) { + if (hParams) { + delete[] hParams->Name; + delete hParams; + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetSlabMinSize(umf_disjoint_pool_params_handle_t hParams, + size_t slabMinSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->SlabMinSize = slabMinSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDisjointPoolParamsSetMaxPoolableSize( + umf_disjoint_pool_params_handle_t hParams, size_t maxPoolableSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->MaxPoolableSize = maxPoolableSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetCapacity(umf_disjoint_pool_params_handle_t hParams, + size_t maxCapacity) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->Capacity = maxCapacity; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetMinBucketSize(umf_disjoint_pool_params_handle_t hParams, + size_t minBucketSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // minBucketSize parameter must be a power of 2 and greater than 0. + if (minBucketSize == 0 || (minBucketSize & (minBucketSize - 1))) { + LOG_ERR("minBucketSize must be a power of 2 and greater than 0"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->MinBucketSize = minBucketSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, + int poolTrace) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->PoolTrace = poolTrace; + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDisjointPoolParamsSetSharedLimits( + umf_disjoint_pool_params_handle_t hParams, + umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->SharedLimits = hSharedLimits; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, + const char *name) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + char *newName = new char[std::strlen(name) + 1]; + if (newName == nullptr) { + LOG_ERR("cannot allocate memory for disjoint pool name"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + delete[] hParams->Name; + hParams->Name = newName; + std::strcpy(hParams->Name, name); + + return UMF_RESULT_SUCCESS; } // Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is @@ -237,7 +411,7 @@ class Bucket { // When a slab becomes entirely free we have to decide whether to return it // to the provider or keep it allocated. A simple check for size of the // Available list is not sufficient to check whether any slab has been - // pooled yet.We would have to traverse the entire Available listand check + // pooled yet. We would have to traverse the entire Available list and check // if any of them is entirely free. Instead we keep a counter of entirely // empty slabs within the Available list to speed up the process of checking // if a slab in this bucket is already pooled. @@ -351,11 +525,15 @@ class DisjointPool::AllocImpl { public: AllocImpl(umf_memory_provider_handle_t hProvider, - umf_disjoint_pool_params_t *params) + umf_disjoint_pool_params_handle_t params) : MemHandle{hProvider}, params(*params) { VALGRIND_DO_CREATE_MEMPOOL(this, 0, 0); + // deep copy of the Name + this->params.Name = new char[std::strlen(params->Name) + 1]; + std::strcpy(this->params.Name, params->Name); + // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. // Powers of 2 and the value halfway between the powers of 2. auto Size1 = this->params.MinBucketSize; @@ -379,7 +557,10 @@ class DisjointPool::AllocImpl { } } - ~AllocImpl() { VALGRIND_DO_DESTROY_MEMPOOL(this); } + ~AllocImpl() { + VALGRIND_DO_DESTROY_MEMPOOL(this); + delete[] this->params.Name; + } void *allocate(size_t Size, size_t Alignment, bool &FromPool); void *allocate(size_t Size, bool &FromPool); @@ -1015,8 +1196,9 @@ void DisjointPool::AllocImpl::printStats(bool &TitlePrinted, } } -umf_result_t DisjointPool::initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_t *parameters) { +umf_result_t +DisjointPool::initialize(umf_memory_provider_handle_t provider, + umf_disjoint_pool_params_handle_t parameters) { if (!provider) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } diff --git a/src/pool/pool_jemalloc.c b/src/pool/pool_jemalloc.c index 094ceeaf7..3ec7c7805 100644 --- a/src/pool/pool_jemalloc.c +++ b/src/pool/pool_jemalloc.c @@ -41,6 +41,12 @@ typedef struct jemalloc_memory_pool_t { bool disable_provider_free; } jemalloc_memory_pool_t; +// Configuration of Jemalloc Pool +typedef struct umf_jemalloc_pool_params_t { + /// Set to true if umfMemoryProviderFree() should never be called. + bool disable_provider_free; +} umf_jemalloc_pool_params_t; + static __TLS umf_result_t TLS_last_allocation_error; static jemalloc_memory_pool_t *pool_by_arena_index[MALLCTL_ARENAS_ALL]; @@ -53,6 +59,52 @@ static jemalloc_memory_pool_t *get_pool_by_arena_index(unsigned arena_ind) { return pool_by_arena_index[arena_ind]; } +umf_result_t +umfJemallocPoolParamsCreate(umf_jemalloc_pool_params_handle_t *hParams) { + if (!hParams) { + LOG_ERR("jemalloc pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_jemalloc_pool_params_t *params_data = + umf_ba_global_alloc(sizeof(*params_data)); + if (!params_data) { + LOG_ERR("cannot allocate memory for jemalloc poolparams"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + params_data->disable_provider_free = false; + + *hParams = (umf_jemalloc_pool_params_handle_t)params_data; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfJemallocPoolParamsDestroy(umf_jemalloc_pool_params_handle_t hParams) { + if (!hParams) { + LOG_ERR("jemalloc pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_ba_global_free(hParams); + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfJemallocPoolParamsSetKeepAllMemory(umf_jemalloc_pool_params_handle_t hParams, + bool keepAllMemory) { + if (!hParams) { + LOG_ERR("jemalloc pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->disable_provider_free = keepAllMemory; + + return UMF_RESULT_SUCCESS; +} + // arena_extent_alloc - an extent allocation function conforms to the extent_alloc_t type and upon // success returns a pointer to size bytes of mapped memory on behalf of arena arena_ind such that // the extent's base address is a multiple of alignment, as well as setting *zero to indicate @@ -203,7 +255,7 @@ static bool arena_extent_decommit(extent_hooks_t *extent_hooks, void *addr, // physical pages within the virtual memory mapping associated with an extent at given addr and size // at offset bytes, extending for length on behalf of arena arena_ind. A lazy extent purge function // (e.g. implemented via madvise(...MADV_FREE)) can delay purging indefinitely and leave the pages -// within the purged virtual memory range in an indeterminite state, whereas a forced extent purge +// within the purged virtual memory range in an indeterminate state, whereas a forced extent purge // function immediately purges, and the pages within the virtual memory range will be zero-filled // the next time they are accessed. If the function returns true, this indicates failure to purge. // (from https://jemalloc.net/jemalloc.3.html) @@ -401,8 +453,8 @@ static umf_result_t op_initialize(umf_memory_provider_handle_t provider, assert(provider); assert(out_pool); - umf_jemalloc_pool_params_t *je_params = - (umf_jemalloc_pool_params_t *)params; + umf_jemalloc_pool_params_handle_t je_params = + (umf_jemalloc_pool_params_handle_t)params; extent_hooks_t *pHooks = &arena_extent_hooks; size_t unsigned_size = sizeof(unsigned); diff --git a/src/pool/pool_scalable.c b/src/pool/pool_scalable.c index cb5d5b157..6ee364344 100644 --- a/src/pool/pool_scalable.c +++ b/src/pool/pool_scalable.c @@ -19,6 +19,7 @@ #include #include "base_alloc_global.h" +#include "libumf.h" #include "utils_common.h" #include "utils_concurrency.h" #include "utils_load_library.h" @@ -31,6 +32,7 @@ typedef void (*raw_free_tbb_type)(intptr_t, void *, size_t); static __TLS umf_result_t TLS_last_allocation_error; static __TLS umf_result_t TLS_last_free_error; +static const size_t DEFAULT_GRANULARITY = 2 * 1024 * 1024; // 2MB typedef struct tbb_mem_pool_policy_t { raw_alloc_tbb_type pAlloc; raw_free_tbb_type pFree; @@ -39,6 +41,11 @@ typedef struct tbb_mem_pool_policy_t { unsigned fixed_pool : 1, keep_all_memory : 1, reserved : 30; } tbb_mem_pool_policy_t; +typedef struct umf_scalable_pool_params_t { + size_t granularity; + bool keep_all_memory; +} umf_scalable_pool_params_t; + typedef struct tbb_callbacks_t { void *(*pool_malloc)(void *, size_t); void *(*pool_realloc)(void *, void *, size_t); @@ -106,7 +113,7 @@ static int init_tbb_callbacks(tbb_callbacks_t *tbb_callbacks) { assert(tbb_callbacks); const char *lib_name = tbb_symbol[TBB_LIB_NAME]; - tbb_callbacks->lib_handle = util_open_library(lib_name, 0); + tbb_callbacks->lib_handle = utils_open_library(lib_name, 0); if (!tbb_callbacks->lib_handle) { LOG_ERR("%s required by Scalable Pool not found - install TBB malloc " "or make sure it is in the default search paths.", @@ -114,22 +121,22 @@ static int init_tbb_callbacks(tbb_callbacks_t *tbb_callbacks) { return -1; } - *(void **)&tbb_callbacks->pool_malloc = util_get_symbol_addr( + *(void **)&tbb_callbacks->pool_malloc = utils_get_symbol_addr( tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_MALLOC], lib_name); - *(void **)&tbb_callbacks->pool_realloc = util_get_symbol_addr( + *(void **)&tbb_callbacks->pool_realloc = utils_get_symbol_addr( tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_REALLOC], lib_name); *(void **)&tbb_callbacks->pool_aligned_malloc = - util_get_symbol_addr(tbb_callbacks->lib_handle, - tbb_symbol[TBB_POOL_ALIGNED_MALLOC], lib_name); - *(void **)&tbb_callbacks->pool_free = util_get_symbol_addr( + utils_get_symbol_addr(tbb_callbacks->lib_handle, + tbb_symbol[TBB_POOL_ALIGNED_MALLOC], lib_name); + *(void **)&tbb_callbacks->pool_free = utils_get_symbol_addr( tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_FREE], lib_name); - *(void **)&tbb_callbacks->pool_create_v1 = util_get_symbol_addr( + *(void **)&tbb_callbacks->pool_create_v1 = utils_get_symbol_addr( tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_CREATE_V1], lib_name); - *(void **)&tbb_callbacks->pool_destroy = util_get_symbol_addr( + *(void **)&tbb_callbacks->pool_destroy = utils_get_symbol_addr( tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_DESTROY], lib_name); - *(void **)&tbb_callbacks->pool_identify = util_get_symbol_addr( + *(void **)&tbb_callbacks->pool_identify = utils_get_symbol_addr( tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_IDENTIFY], lib_name); - *(void **)&tbb_callbacks->pool_msize = util_get_symbol_addr( + *(void **)&tbb_callbacks->pool_msize = utils_get_symbol_addr( tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_MSIZE], lib_name); if (!tbb_callbacks->pool_malloc || !tbb_callbacks->pool_realloc || @@ -137,7 +144,7 @@ static int init_tbb_callbacks(tbb_callbacks_t *tbb_callbacks) { !tbb_callbacks->pool_create_v1 || !tbb_callbacks->pool_destroy || !tbb_callbacks->pool_identify) { LOG_ERR("Could not find symbols in %s", lib_name); - util_close_library(tbb_callbacks->lib_handle); + utils_close_library(tbb_callbacks->lib_handle); return -1; } @@ -167,19 +174,89 @@ static void tbb_raw_free_wrapper(intptr_t pool_id, void *ptr, size_t bytes) { } } +umf_result_t +umfScalablePoolParamsCreate(umf_scalable_pool_params_handle_t *hParams) { + libumfInit(); + if (!hParams) { + LOG_ERR("scalable pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_scalable_pool_params_t *params_data = + umf_ba_global_alloc(sizeof(umf_scalable_pool_params_t)); + if (!params_data) { + LOG_ERR("cannot allocate memory for scalable pool params"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + params_data->granularity = DEFAULT_GRANULARITY; + params_data->keep_all_memory = false; + + *hParams = (umf_scalable_pool_params_handle_t)params_data; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfScalablePoolParamsDestroy(umf_scalable_pool_params_handle_t hParams) { + if (!hParams) { + LOG_ERR("scalable pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_ba_global_free(hParams); + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfScalablePoolParamsSetGranularity(umf_scalable_pool_params_handle_t hParams, + size_t granularity) { + if (!hParams) { + LOG_ERR("scalable pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (granularity == 0) { + LOG_ERR("granularity cannot be 0"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->granularity = granularity; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfScalablePoolParamsSetKeepAllMemory(umf_scalable_pool_params_handle_t hParams, + bool keepAllMemory) { + if (!hParams) { + LOG_ERR("scalable pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->keep_all_memory = keepAllMemory; + + return UMF_RESULT_SUCCESS; +} + static umf_result_t tbb_pool_initialize(umf_memory_provider_handle_t provider, void *params, void **pool) { - (void)params; // unused - - const size_t GRANULARITY = 2 * 1024 * 1024; tbb_mem_pool_policy_t policy = {.pAlloc = tbb_raw_alloc_wrapper, .pFree = tbb_raw_free_wrapper, - .granularity = GRANULARITY, + .granularity = DEFAULT_GRANULARITY, .version = 1, .fixed_pool = false, .keep_all_memory = false, .reserved = 0}; + if (params) { + umf_scalable_pool_params_handle_t scalable_params = + (umf_scalable_pool_params_handle_t)params; + policy.granularity = scalable_params->granularity; + policy.keep_all_memory = scalable_params->keep_all_memory; + } + tbb_memory_pool_t *pool_data = umf_ba_global_alloc(sizeof(tbb_memory_pool_t)); if (!pool_data) { @@ -208,7 +285,7 @@ static umf_result_t tbb_pool_initialize(umf_memory_provider_handle_t provider, static void tbb_pool_finalize(void *pool) { tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; pool_data->tbb_callbacks.pool_destroy(pool_data->tbb_pool); - util_close_library(pool_data->tbb_callbacks.lib_handle); + utils_close_library(pool_data->tbb_callbacks.lib_handle); umf_ba_global_free(pool_data); } diff --git a/src/provider/provider_coarse.c b/src/provider/provider_coarse.c new file mode 100644 index 000000000..c3027b91d --- /dev/null +++ b/src/provider/provider_coarse.c @@ -0,0 +1,1707 @@ +/* + * Copyright (C) 2023-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include +#include +#include +#include +#include +#include + +#include + +#include "base_alloc_global.h" +#include "memory_provider_internal.h" +#include "ravl.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" + +#define COARSE_BASE_NAME "coarse" + +#define IS_ORIGIN_OF_BLOCK(origin, block) \ + (((uintptr_t)(block)->data >= (uintptr_t)(origin)->data) && \ + ((uintptr_t)(block)->data + (block)->size <= \ + (uintptr_t)(origin)->data + (origin)->size)) + +typedef struct coarse_memory_provider_t { + umf_memory_provider_handle_t upstream_memory_provider; + + // destroy upstream_memory_provider in finalize() + bool destroy_upstream_memory_provider; + + // memory allocation strategy + coarse_memory_provider_strategy_t allocation_strategy; + + void *init_buffer; + + size_t used_size; + size_t alloc_size; + + // upstream_blocks - tree of all blocks allocated from the upstream provider + struct ravl *upstream_blocks; + + // all_blocks - tree of all blocks - sorted by an address of data + struct ravl *all_blocks; + + // free_blocks - tree of free blocks - sorted by a size of data, + // each node contains a pointer (ravl_free_blocks_head_t) + // to the head of the list of free blocks of the same size + struct ravl *free_blocks; + + struct utils_mutex_t lock; + + // Name of the provider with the upstream provider: + // "coarse ()" + // for example: "coarse (L0)" + char *name; + + // Set to true if the free() operation of the upstream memory provider is not supported + // (i.e. if (umfMemoryProviderFree(upstream_memory_provider, NULL, 0) == UMF_RESULT_ERROR_NOT_SUPPORTED) + bool disable_upstream_provider_free; +} coarse_memory_provider_t; + +typedef struct ravl_node ravl_node_t; + +typedef enum check_free_blocks_t { + CHECK_ONLY_THE_FIRST_BLOCK = 0, + CHECK_ALL_BLOCKS_OF_SIZE, +} check_free_blocks_t; + +typedef struct block_t { + size_t size; + unsigned char *data; + bool used; + + // Node in the list of free blocks of the same size pointing to this block. + // The list is located in the (coarse_provider->free_blocks) RAVL tree. + struct ravl_free_blocks_elem_t *free_list_ptr; +} block_t; + +// A general node in a RAVL tree. +// 1) coarse_provider->all_blocks RAVL tree (tree of all blocks - sorted by an address of data): +// key - pointer (block_t->data) to the beginning of the block data +// value - pointer (block_t) to the block of the allocation +// 2) coarse_provider->free_blocks RAVL tree (tree of free blocks - sorted by a size of data): +// key - size of the allocation (block_t->size) +// value - pointer (ravl_free_blocks_head_t) to the head of the list of free blocks of the same size +typedef struct ravl_data_t { + uintptr_t key; + void *value; +} ravl_data_t; + +// The head of the list of free blocks of the same size. +typedef struct ravl_free_blocks_head_t { + struct ravl_free_blocks_elem_t *head; +} ravl_free_blocks_head_t; + +// The node of the list of free blocks of the same size +typedef struct ravl_free_blocks_elem_t { + struct block_t *block; + struct ravl_free_blocks_elem_t *next; + struct ravl_free_blocks_elem_t *prev; +} ravl_free_blocks_elem_t; + +// The compare function of a RAVL tree +static int coarse_ravl_comp(const void *lhs, const void *rhs) { + const ravl_data_t *lhs_ravl = (const ravl_data_t *)lhs; + const ravl_data_t *rhs_ravl = (const ravl_data_t *)rhs; + + if (lhs_ravl->key < rhs_ravl->key) { + return -1; + } + + if (lhs_ravl->key > rhs_ravl->key) { + return 1; + } + + // lhs_ravl->key == rhs_ravl->key + return 0; +} + +static inline block_t *get_node_block(ravl_node_t *node) { + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + assert(node_data->value); + return node_data->value; +} + +static inline ravl_node_t *get_node_prev(ravl_node_t *node) { + return ravl_node_predecessor(node); +} + +static inline ravl_node_t *get_node_next(ravl_node_t *node) { + return ravl_node_successor(node); +} + +#ifndef NDEBUG +static block_t *get_block_prev(ravl_node_t *node) { + ravl_node_t *ravl_prev = ravl_node_predecessor(node); + if (!ravl_prev) { + return NULL; + } + + return get_node_block(ravl_prev); +} + +static block_t *get_block_next(ravl_node_t *node) { + ravl_node_t *ravl_next = ravl_node_successor(node); + if (!ravl_next) { + return NULL; + } + + return get_node_block(ravl_next); +} +#endif /* NDEBUG */ + +static bool is_same_origin(struct ravl *upstream_blocks, block_t *block1, + block_t *block2) { + ravl_data_t rdata1 = {(uintptr_t)block1->data, NULL}; + ravl_node_t *ravl_origin1 = + ravl_find(upstream_blocks, &rdata1, RAVL_PREDICATE_LESS_EQUAL); + assert(ravl_origin1); + + block_t *origin1 = get_node_block(ravl_origin1); + assert(IS_ORIGIN_OF_BLOCK(origin1, block1)); + + return (IS_ORIGIN_OF_BLOCK(origin1, block2)); +} + +// The functions "coarse_ravl_*" handle lists of blocks: +// - coarse_provider->all_blocks and coarse_provider->upstream_blocks +// sorted by a pointer (block_t->data) to the beginning of the block data. +// +// coarse_ravl_add_new - allocate and add a new block to the tree +// and link this block to the next and the previous one. +static block_t *coarse_ravl_add_new(struct ravl *rtree, unsigned char *data, + size_t size, ravl_node_t **node) { + assert(rtree); + assert(data); + assert(size); + + // TODO add valgrind annotations + block_t *block = umf_ba_global_alloc(sizeof(*block)); + if (block == NULL) { + return NULL; + } + + block->data = data; + block->size = size; + block->free_list_ptr = NULL; + + ravl_data_t rdata = {(uintptr_t)block->data, block}; + assert(NULL == ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL)); + int ret = ravl_emplace_copy(rtree, &rdata); + if (ret) { + umf_ba_global_free(block); + return NULL; + } + + ravl_node_t *new_node = ravl_find(rtree, &rdata, RAVL_PREDICATE_EQUAL); + assert(NULL != new_node); + + if (node) { + *node = new_node; + } + + return block; +} + +// coarse_ravl_find_node - find the node in the tree +static ravl_node_t *coarse_ravl_find_node(struct ravl *rtree, void *ptr) { + ravl_data_t data = {(uintptr_t)ptr, NULL}; + return ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL); +} + +// coarse_ravl_rm - remove the block from the tree +static block_t *coarse_ravl_rm(struct ravl *rtree, void *ptr) { + ravl_data_t data = {(uintptr_t)ptr, NULL}; + ravl_node_t *node; + node = ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL); + if (node) { + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + block_t *block = node_data->value; + assert(block); + ravl_remove(rtree, node); + assert(NULL == ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL)); + return block; + } + return NULL; +} + +// The functions "node_list_*" handle lists of free blocks of the same size. +// The heads (ravl_free_blocks_head_t) of those lists are stored in nodes of +// the coarse_provider->free_blocks RAVL tree. +// +// node_list_add - add a free block to the list of free blocks of the same size +static ravl_free_blocks_elem_t * +node_list_add(ravl_free_blocks_head_t *head_node, struct block_t *block) { + assert(head_node); + assert(block); + + ravl_free_blocks_elem_t *node = umf_ba_global_alloc(sizeof(*node)); + if (node == NULL) { + return NULL; + } + + if (head_node->head) { + head_node->head->prev = node; + } + + node->block = block; + node->next = head_node->head; + node->prev = NULL; + head_node->head = node; + + return node; +} + +// node_list_rm - remove the given free block from the list of free blocks of the same size +static block_t *node_list_rm(ravl_free_blocks_head_t *head_node, + ravl_free_blocks_elem_t *node) { + assert(head_node); + assert(node); + + if (!head_node->head) { + return NULL; + } + + if (node == head_node->head) { + assert(node->prev == NULL); + head_node->head = node->next; + } + + ravl_free_blocks_elem_t *node_next = node->next; + ravl_free_blocks_elem_t *node_prev = node->prev; + if (node_next) { + node_next->prev = node_prev; + } + + if (node_prev) { + node_prev->next = node_next; + } + + struct block_t *block = node->block; + block->free_list_ptr = NULL; + umf_ba_global_free(node); + + return block; +} + +// node_list_rm_first - remove the first free block from the list of free blocks of the same size only if it can be properly aligned +static block_t *node_list_rm_first(ravl_free_blocks_head_t *head_node, + size_t alignment) { + assert(head_node); + + if (!head_node->head) { + return NULL; + } + + ravl_free_blocks_elem_t *node = head_node->head; + assert(node->prev == NULL); + struct block_t *block = node->block; + + if (IS_NOT_ALIGNED(block->size, alignment)) { + return NULL; + } + + if (node->next) { + node->next->prev = NULL; + } + + head_node->head = node->next; + block->free_list_ptr = NULL; + umf_ba_global_free(node); + + return block; +} + +// node_list_rm_with_alignment - remove the first free block with the correct alignment from the list of free blocks of the same size +static block_t *node_list_rm_with_alignment(ravl_free_blocks_head_t *head_node, + size_t alignment) { + assert(head_node); + + if (!head_node->head) { + return NULL; + } + + assert(((ravl_free_blocks_elem_t *)head_node->head)->prev == NULL); + + ravl_free_blocks_elem_t *node; + for (node = head_node->head; node != NULL; node = node->next) { + if (IS_ALIGNED(node->block->size, alignment)) { + return node_list_rm(head_node, node); + } + } + + return NULL; +} + +// The functions "free_blocks_*" handle the coarse_provider->free_blocks RAVL tree +// sorted by a size of the allocation (block_t->size). +// This is a tree of heads (ravl_free_blocks_head_t) of lists of free blocks of the same size. +// +// free_blocks_add - add a free block to the list of free blocks of the same size +static int free_blocks_add(struct ravl *free_blocks, block_t *block) { + ravl_free_blocks_head_t *head_node = NULL; + int rv; + + ravl_data_t head_node_data = {(uintptr_t)block->size, NULL}; + ravl_node_t *node; + node = ravl_find(free_blocks, &head_node_data, RAVL_PREDICATE_EQUAL); + if (node) { + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + head_node = node_data->value; + assert(head_node); + } else { // no head_node + head_node = umf_ba_global_alloc(sizeof(*head_node)); + if (!head_node) { + return -1; + } + + head_node->head = NULL; + + ravl_data_t data = {(uintptr_t)block->size, head_node}; + rv = ravl_emplace_copy(free_blocks, &data); + if (rv) { + umf_ba_global_free(head_node); + return -1; + } + } + + block->free_list_ptr = node_list_add(head_node, block); + if (!block->free_list_ptr) { + return -1; + } + + assert(block->free_list_ptr->block->size == block->size); + + return 0; +} + +// free_blocks_rm_ge - remove the first free block of a size greater or equal to the given size only if it can be properly aligned +// If it was the last block, the head node is freed and removed from the tree. +// It is used during memory allocation (looking for a free block). +static block_t *free_blocks_rm_ge(struct ravl *free_blocks, size_t size, + size_t alignment, + check_free_blocks_t check_blocks) { + ravl_data_t data = {(uintptr_t)size, NULL}; + ravl_node_t *node; + node = ravl_find(free_blocks, &data, RAVL_PREDICATE_GREATER_EQUAL); + if (!node) { + return NULL; + } + + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + assert(node_data->key >= size); + + ravl_free_blocks_head_t *head_node = node_data->value; + assert(head_node); + + block_t *block; + switch (check_blocks) { + case CHECK_ONLY_THE_FIRST_BLOCK: + block = node_list_rm_first(head_node, alignment); + break; + case CHECK_ALL_BLOCKS_OF_SIZE: + block = node_list_rm_with_alignment(head_node, alignment); + break; + // wrong value of check_blocks + default: + abort(); + } + + if (head_node->head == NULL) { + umf_ba_global_free(head_node); + ravl_remove(free_blocks, node); + } + + return block; +} + +// free_blocks_rm_node - remove the free block pointed by the given node. +// If it was the last block, the head node is freed and removed from the tree. +// It is used during merging free blocks and destroying the coarse_provider->free_blocks tree. +static block_t *free_blocks_rm_node(struct ravl *free_blocks, + ravl_free_blocks_elem_t *node) { + assert(free_blocks); + assert(node); + size_t size = node->block->size; + ravl_data_t data = {(uintptr_t)size, NULL}; + ravl_node_t *ravl_node; + ravl_node = ravl_find(free_blocks, &data, RAVL_PREDICATE_EQUAL); + assert(ravl_node); + + ravl_data_t *node_data = ravl_data(ravl_node); + assert(node_data); + assert(node_data->key == size); + + ravl_free_blocks_head_t *head_node = node_data->value; + assert(head_node); + + block_t *block = node_list_rm(head_node, node); + + if (head_node->head == NULL) { + umf_ba_global_free(head_node); + ravl_remove(free_blocks, ravl_node); + } + + return block; +} + +// user_block_merge - merge two blocks from one of two lists of user blocks: all_blocks or free_blocks +static umf_result_t user_block_merge(coarse_memory_provider_t *coarse_provider, + ravl_node_t *node1, ravl_node_t *node2, + bool used, ravl_node_t **merged_node) { + assert(node1); + assert(node2); + assert(node1 == get_node_prev(node2)); + assert(node2 == get_node_next(node1)); + assert(merged_node); + + *merged_node = NULL; + + struct ravl *upstream_blocks = coarse_provider->upstream_blocks; + struct ravl *all_blocks = coarse_provider->all_blocks; + struct ravl *free_blocks = coarse_provider->free_blocks; + + block_t *block1 = get_node_block(node1); + block_t *block2 = get_node_block(node2); + assert(block1->data < block2->data); + + bool same_used = ((block1->used == used) && (block2->used == used)); + bool contignous_data = (block1->data + block1->size == block2->data); + bool same_origin = is_same_origin(upstream_blocks, block1, block2); + + // check if blocks can be merged + if (!same_used || !contignous_data || !same_origin) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (block1->free_list_ptr) { + free_blocks_rm_node(free_blocks, block1->free_list_ptr); + block1->free_list_ptr = NULL; + } + + if (block2->free_list_ptr) { + free_blocks_rm_node(free_blocks, block2->free_list_ptr); + block2->free_list_ptr = NULL; + } + + // update the size + block1->size += block2->size; + + block_t *block_rm = coarse_ravl_rm(all_blocks, block2->data); + assert(block_rm == block2); + (void)block_rm; // WA for unused variable error + umf_ba_global_free(block2); + + *merged_node = node1; + + return UMF_RESULT_SUCCESS; +} + +// free_block_merge_with_prev - merge the given free block +// with the previous one if both are unused and have continuous data. +// Remove the merged block from the tree of free blocks. +static ravl_node_t * +free_block_merge_with_prev(coarse_memory_provider_t *coarse_provider, + ravl_node_t *node) { + ravl_node_t *node_prev = get_node_prev(node); + if (!node_prev) { + return node; + } + + ravl_node_t *merged_node = NULL; + umf_result_t umf_result = + user_block_merge(coarse_provider, node_prev, node, false, &merged_node); + if (umf_result != UMF_RESULT_SUCCESS) { + return node; + } + + assert(merged_node != NULL); + + return merged_node; +} + +// free_block_merge_with_next - merge the given free block +// with the next one if both are unused and have continuous data. +// Remove the merged block from the tree of free blocks. +static ravl_node_t * +free_block_merge_with_next(coarse_memory_provider_t *coarse_provider, + ravl_node_t *node) { + ravl_node_t *node_next = get_node_next(node); + if (!node_next) { + return node; + } + + ravl_node_t *merged_node = NULL; + umf_result_t umf_result = + user_block_merge(coarse_provider, node, node_next, false, &merged_node); + if (umf_result != UMF_RESULT_SUCCESS) { + return node; + } + + assert(merged_node != NULL); + + return merged_node; +} + +// upstream_block_merge - merge the given two upstream blocks +static umf_result_t +upstream_block_merge(coarse_memory_provider_t *coarse_provider, + ravl_node_t *node1, ravl_node_t *node2, + ravl_node_t **merged_node) { + assert(node1); + assert(node2); + assert(merged_node); + + *merged_node = NULL; + + umf_memory_provider_handle_t upstream_provider = + coarse_provider->upstream_memory_provider; + if (!upstream_provider) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + block_t *block1 = get_node_block(node1); + block_t *block2 = get_node_block(node2); + assert(block1->data < block2->data); + + bool contignous_data = (block1->data + block1->size == block2->data); + if (!contignous_data) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // check if blocks can be merged by the upstream provider + umf_result_t merge_status = umfMemoryProviderAllocationMerge( + coarse_provider->upstream_memory_provider, block1->data, block2->data, + block1->size + block2->size); + if (merge_status != UMF_RESULT_SUCCESS) { + return merge_status; + } + + // update the size + block1->size += block2->size; + + struct ravl *upstream_blocks = coarse_provider->upstream_blocks; + block_t *block_rm = coarse_ravl_rm(upstream_blocks, block2->data); + assert(block_rm == block2); + (void)block_rm; // WA for unused variable error + umf_ba_global_free(block2); + + *merged_node = node1; + + return UMF_RESULT_SUCCESS; +} + +// upstream_block_merge_with_prev - merge the given upstream block +// with the previous one if both have continuous data. +// Remove the merged block from the tree of upstream blocks. +static ravl_node_t * +upstream_block_merge_with_prev(coarse_memory_provider_t *coarse_provider, + ravl_node_t *node) { + assert(node); + + ravl_node_t *node_prev = get_node_prev(node); + if (!node_prev) { + return node; + } + + ravl_node_t *merged_node = NULL; + umf_result_t umf_result = + upstream_block_merge(coarse_provider, node_prev, node, &merged_node); + if (umf_result != UMF_RESULT_SUCCESS) { + return node; + } + + assert(merged_node != NULL); + + return merged_node; +} + +// upstream_block_merge_with_next - merge the given upstream block +// with the next one if both have continuous data. +// Remove the merged block from the tree of upstream blocks. +static ravl_node_t * +upstream_block_merge_with_next(coarse_memory_provider_t *coarse_provider, + ravl_node_t *node) { + assert(node); + + ravl_node_t *node_next = get_node_next(node); + if (!node_next) { + return node; + } + + ravl_node_t *merged_node = NULL; + umf_result_t umf_result = + upstream_block_merge(coarse_provider, node, node_next, &merged_node); + if (umf_result != UMF_RESULT_SUCCESS) { + return node; + } + + assert(merged_node != NULL); + + return merged_node; +} + +#ifndef NDEBUG // begin of DEBUG code + +typedef struct debug_cb_args_t { + coarse_memory_provider_t *provider; + size_t sum_used; + size_t sum_blocks_size; + size_t num_all_blocks; + size_t num_free_blocks; + size_t num_alloc_blocks; + size_t sum_alloc_size; +} debug_cb_args_t; + +static void debug_verify_all_blocks_cb(void *data, void *arg) { + assert(data); + assert(arg); + + ravl_data_t *node_data = data; + block_t *block = node_data->value; + assert(block); + + debug_cb_args_t *cb_args = (debug_cb_args_t *)arg; + coarse_memory_provider_t *provider = cb_args->provider; + + ravl_node_t *node = + ravl_find(provider->all_blocks, data, RAVL_PREDICATE_EQUAL); + assert(node); + + block_t *block_next = get_block_next(node); + block_t *block_prev = get_block_prev(node); + + cb_args->num_all_blocks++; + if (!block->used) { + cb_args->num_free_blocks++; + } + + assert(block->data); + assert(block->size > 0); + + // There shouldn't be two adjacent unused blocks + // if they are continuous and have the same origin. + if (block_prev && !block_prev->used && !block->used && + (block_prev->data + block_prev->size == block->data)) { + assert(!is_same_origin(provider->upstream_blocks, block_prev, block)); + } + + if (block_next && !block_next->used && !block->used && + (block->data + block->size == block_next->data)) { + assert(!is_same_origin(provider->upstream_blocks, block, block_next)); + } + + // data addresses in the list are in ascending order + if (block_prev) { + assert(block_prev->data < block->data); + } + + if (block_next) { + assert(block->data < block_next->data); + } + + // two block's data should not overlap + if (block_next) { + assert((block->data + block->size) <= block_next->data); + } + + cb_args->sum_blocks_size += block->size; + if (block->used) { + cb_args->sum_used += block->size; + } +} + +static void debug_verify_upstream_blocks_cb(void *data, void *arg) { + assert(data); + assert(arg); + + ravl_data_t *node_data = data; + block_t *alloc = node_data->value; + assert(alloc); + + debug_cb_args_t *cb_args = (debug_cb_args_t *)arg; + coarse_memory_provider_t *provider = cb_args->provider; + + ravl_node_t *node = + ravl_find(provider->upstream_blocks, data, RAVL_PREDICATE_EQUAL); + assert(node); + + block_t *alloc_next = get_block_next(node); + block_t *alloc_prev = get_block_prev(node); + + cb_args->num_alloc_blocks++; + cb_args->sum_alloc_size += alloc->size; + + assert(alloc->data); + assert(alloc->size > 0); + + // data addresses in the list are in ascending order + if (alloc_prev) { + assert(alloc_prev->data < alloc->data); + } + + if (alloc_next) { + assert(alloc->data < alloc_next->data); + } + + // data should not overlap + if (alloc_next) { + assert((alloc->data + alloc->size) <= alloc_next->data); + } +} + +static umf_result_t +coarse_memory_provider_get_stats(void *provider, + coarse_memory_provider_stats_t *stats); + +static bool debug_check(coarse_memory_provider_t *provider) { + assert(provider); + + coarse_memory_provider_stats_t stats = {0}; + coarse_memory_provider_get_stats(provider, &stats); + + debug_cb_args_t cb_args = {0}; + cb_args.provider = provider; + + // verify the all_blocks list + ravl_foreach(provider->all_blocks, debug_verify_all_blocks_cb, &cb_args); + + assert(cb_args.num_all_blocks == stats.num_all_blocks); + assert(cb_args.num_free_blocks == stats.num_free_blocks); + assert(cb_args.sum_used == provider->used_size); + assert(cb_args.sum_blocks_size == provider->alloc_size); + assert(provider->alloc_size >= provider->used_size); + + // verify the upstream_blocks list + ravl_foreach(provider->upstream_blocks, debug_verify_upstream_blocks_cb, + &cb_args); + + assert(cb_args.sum_alloc_size == provider->alloc_size); + assert(cb_args.num_alloc_blocks == stats.num_upstream_blocks); + + return true; +} +#endif /* NDEBUG */ // end of DEBUG code + +static umf_result_t +coarse_add_upstream_block(coarse_memory_provider_t *coarse_provider, void *addr, + size_t size) { + ravl_node_t *alloc_node = NULL; + + block_t *alloc = coarse_ravl_add_new(coarse_provider->upstream_blocks, addr, + size, &alloc_node); + if (alloc == NULL) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + block_t *new_block = + coarse_ravl_add_new(coarse_provider->all_blocks, addr, size, NULL); + if (new_block == NULL) { + coarse_ravl_rm(coarse_provider->upstream_blocks, addr); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + // check if the new upstream block can be merged with its neighbours + alloc_node = upstream_block_merge_with_prev(coarse_provider, alloc_node); + alloc_node = upstream_block_merge_with_next(coarse_provider, alloc_node); + + new_block->used = true; + coarse_provider->alloc_size += size; + coarse_provider->used_size += size; + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t +coarse_memory_provider_set_name(coarse_memory_provider_t *coarse_provider) { + if (coarse_provider->upstream_memory_provider == NULL) { + // COARSE_BASE_NAME will be used + coarse_provider->name = NULL; + return UMF_RESULT_SUCCESS; + } + + const char *up_name = + umfMemoryProviderGetName(coarse_provider->upstream_memory_provider); + if (!up_name) { + return UMF_RESULT_ERROR_UNKNOWN; + } + + size_t length = + strlen(COARSE_BASE_NAME) + strlen(up_name) + 3; // + 3 for " ()" + + coarse_provider->name = umf_ba_global_alloc(length + 1); // + 1 for '\0' + if (coarse_provider->name == NULL) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + sprintf(coarse_provider->name, "%s (%s)", COARSE_BASE_NAME, up_name); + + return UMF_RESULT_SUCCESS; +} + +// needed for coarse_memory_provider_initialize() +static umf_result_t coarse_memory_provider_alloc(void *provider, size_t size, + size_t alignment, + void **resultPtr); + +// needed for coarse_memory_provider_initialize() +static umf_result_t coarse_memory_provider_free(void *provider, void *ptr, + size_t bytes); + +static umf_result_t coarse_memory_provider_initialize(void *params, + void **provider) { + assert(provider); + + if (params == NULL) { + LOG_ERR("coarse provider parameters are missing"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + coarse_memory_provider_params_t *coarse_params = + (coarse_memory_provider_params_t *)params; + + // check params + if (!coarse_params->upstream_memory_provider == + !coarse_params->init_buffer) { + LOG_ERR("either upstream provider or init buffer has to be provided in " + "the parameters (exactly one of them)"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (coarse_params->init_buffer_size == 0 && + (coarse_params->immediate_init_from_upstream || + coarse_params->init_buffer != NULL)) { + LOG_ERR("init_buffer_size has to be greater than 0 if " + "immediate_init_from_upstream or init_buffer is set"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (coarse_params->init_buffer_size != 0 && + (!coarse_params->immediate_init_from_upstream && + coarse_params->init_buffer == NULL)) { + LOG_ERR("init_buffer_size is greater than 0 but none of " + "immediate_init_from_upstream nor init_buffer is set"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (coarse_params->destroy_upstream_memory_provider && + !coarse_params->upstream_memory_provider) { + LOG_ERR("destroy_upstream_memory_provider is true, but an upstream " + "provider is not provided"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + coarse_memory_provider_t *coarse_provider = + umf_ba_global_alloc(sizeof(*coarse_provider)); + if (!coarse_provider) { + LOG_ERR("out of the host memory"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + memset(coarse_provider, 0, sizeof(*coarse_provider)); + + coarse_provider->upstream_memory_provider = + coarse_params->upstream_memory_provider; + coarse_provider->destroy_upstream_memory_provider = + coarse_params->destroy_upstream_memory_provider; + coarse_provider->allocation_strategy = coarse_params->allocation_strategy; + coarse_provider->init_buffer = coarse_params->init_buffer; + + if (coarse_provider->upstream_memory_provider) { + coarse_provider->disable_upstream_provider_free = + umfIsFreeOpDefault(coarse_provider->upstream_memory_provider); + } else { + coarse_provider->disable_upstream_provider_free = false; + } + + umf_result_t umf_result = coarse_memory_provider_set_name(coarse_provider); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("name initialization failed"); + goto err_free_coarse_provider; + } + + // most of the error handling paths below set this error + umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + + coarse_provider->upstream_blocks = + ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); + if (coarse_provider->upstream_blocks == NULL) { + LOG_ERR("out of the host memory"); + goto err_free_name; + } + + coarse_provider->free_blocks = + ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); + if (coarse_provider->free_blocks == NULL) { + LOG_ERR("out of the host memory"); + goto err_delete_ravl_upstream_blocks; + } + + coarse_provider->all_blocks = + ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); + if (coarse_provider->all_blocks == NULL) { + LOG_ERR("out of the host memory"); + goto err_delete_ravl_free_blocks; + } + + coarse_provider->alloc_size = 0; + coarse_provider->used_size = 0; + + if (utils_mutex_init(&coarse_provider->lock) == NULL) { + LOG_ERR("lock initialization failed"); + umf_result = UMF_RESULT_ERROR_UNKNOWN; + goto err_delete_ravl_all_blocks; + } + + if (coarse_params->upstream_memory_provider && + coarse_params->immediate_init_from_upstream) { + // allocate and immediately deallocate memory using the upstream provider + void *init_buffer = NULL; + coarse_memory_provider_alloc( + coarse_provider, coarse_params->init_buffer_size, 0, &init_buffer); + if (init_buffer == NULL) { + goto err_destroy_mutex; + } + + coarse_memory_provider_free(coarse_provider, init_buffer, + coarse_params->init_buffer_size); + + } else if (coarse_params->init_buffer) { + umf_result = coarse_add_upstream_block(coarse_provider, + coarse_provider->init_buffer, + coarse_params->init_buffer_size); + if (umf_result != UMF_RESULT_SUCCESS) { + goto err_destroy_mutex; + } + + LOG_DEBUG("coarse_ALLOC (init_buffer) %zu used %zu alloc %zu", + coarse_params->init_buffer_size, coarse_provider->used_size, + coarse_provider->alloc_size); + + coarse_memory_provider_free(coarse_provider, + coarse_provider->init_buffer, + coarse_params->init_buffer_size); + } + + assert(coarse_provider->used_size == 0); + assert(coarse_provider->alloc_size == coarse_params->init_buffer_size); + assert(debug_check(coarse_provider)); + + *provider = coarse_provider; + + return UMF_RESULT_SUCCESS; + +err_destroy_mutex: + utils_mutex_destroy_not_free(&coarse_provider->lock); +err_delete_ravl_all_blocks: + ravl_delete(coarse_provider->all_blocks); +err_delete_ravl_free_blocks: + ravl_delete(coarse_provider->free_blocks); +err_delete_ravl_upstream_blocks: + ravl_delete(coarse_provider->upstream_blocks); +err_free_name: + umf_ba_global_free(coarse_provider->name); +err_free_coarse_provider: + umf_ba_global_free(coarse_provider); + return umf_result; +} + +static void coarse_ravl_cb_rm_upstream_blocks_node(void *data, void *arg) { + assert(data); + assert(arg); + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)arg; + ravl_data_t *node_data = data; + block_t *alloc = node_data->value; + assert(alloc); + + if (coarse_provider->upstream_memory_provider && + !coarse_provider->disable_upstream_provider_free) { + // We continue to deallocate alloc blocks even if the upstream provider doesn't return success. + umfMemoryProviderFree(coarse_provider->upstream_memory_provider, + alloc->data, alloc->size); + } + + assert(coarse_provider->alloc_size >= alloc->size); + coarse_provider->alloc_size -= alloc->size; + + umf_ba_global_free(alloc); +} + +static void coarse_ravl_cb_rm_all_blocks_node(void *data, void *arg) { + assert(data); + assert(arg); + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)arg; + ravl_data_t *node_data = data; + block_t *block = node_data->value; + assert(block); + + if (block->used) { + assert(coarse_provider->used_size >= block->size); + coarse_provider->used_size -= block->size; + } + + if (block->free_list_ptr) { + free_blocks_rm_node(coarse_provider->free_blocks, block->free_list_ptr); + } + + umf_ba_global_free(block); +} + +static void coarse_memory_provider_finalize(void *provider) { + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + utils_mutex_destroy_not_free(&coarse_provider->lock); + + ravl_foreach(coarse_provider->all_blocks, coarse_ravl_cb_rm_all_blocks_node, + coarse_provider); + assert(coarse_provider->used_size == 0); + + ravl_foreach(coarse_provider->upstream_blocks, + coarse_ravl_cb_rm_upstream_blocks_node, coarse_provider); + assert(coarse_provider->alloc_size == 0); + + ravl_delete(coarse_provider->upstream_blocks); + ravl_delete(coarse_provider->all_blocks); + ravl_delete(coarse_provider->free_blocks); + + umf_ba_global_free(coarse_provider->name); + + if (coarse_provider->destroy_upstream_memory_provider && + coarse_provider->upstream_memory_provider) { + umfMemoryProviderDestroy(coarse_provider->upstream_memory_provider); + } + + umf_ba_global_free(coarse_provider); +} + +static umf_result_t +create_aligned_block(coarse_memory_provider_t *coarse_provider, + size_t orig_size, size_t alignment, block_t **current) { + (void)orig_size; // unused in the Release version + int rv; + + block_t *curr = *current; + + // In case of non-zero alignment create an aligned block what would be further used. + uintptr_t orig_data = (uintptr_t)curr->data; + uintptr_t aligned_data = ALIGN_UP(orig_data, alignment); + size_t padding = aligned_data - orig_data; + if (alignment > 0 && padding > 0) { + block_t *aligned_block = coarse_ravl_add_new( + coarse_provider->all_blocks, curr->data + padding, + curr->size - padding, NULL); + if (aligned_block == NULL) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + curr->used = false; + curr->size = padding; + + rv = free_blocks_add(coarse_provider->free_blocks, curr); + if (rv) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + // use aligned block + *current = aligned_block; + assert((*current)->size >= orig_size); + } + + return UMF_RESULT_SUCCESS; +} + +// Split the current block and put the new block after the one that we use. +static umf_result_t +split_current_block(coarse_memory_provider_t *coarse_provider, block_t *curr, + size_t size) { + ravl_node_t *new_node = NULL; + + block_t *new_block = + coarse_ravl_add_new(coarse_provider->all_blocks, curr->data + size, + curr->size - size, &new_node); + if (new_block == NULL) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + new_block->used = false; + + int rv = + free_blocks_add(coarse_provider->free_blocks, get_node_block(new_node)); + if (rv) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return UMF_RESULT_SUCCESS; +} + +static block_t * +find_free_block(struct ravl *free_blocks, size_t size, size_t alignment, + coarse_memory_provider_strategy_t allocation_strategy) { + block_t *block; + + switch (allocation_strategy) { + case UMF_COARSE_MEMORY_STRATEGY_FASTEST: + // Always allocate a free block of the (size + alignment) size + // and later cut out the properly aligned part leaving two remaining parts. + return free_blocks_rm_ge(free_blocks, size + alignment, 0, + CHECK_ONLY_THE_FIRST_BLOCK); + + case UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE: + // First check if the first free block of the 'size' size has the correct alignment. + block = free_blocks_rm_ge(free_blocks, size, alignment, + CHECK_ONLY_THE_FIRST_BLOCK); + if (block) { + return block; + } + + // If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + return free_blocks_rm_ge(free_blocks, size + alignment, 0, + CHECK_ONLY_THE_FIRST_BLOCK); + + case UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE: + // First look through all free blocks of the 'size' size + // and choose the first one with the correct alignment. + block = free_blocks_rm_ge(free_blocks, size, alignment, + CHECK_ALL_BLOCKS_OF_SIZE); + if (block) { + return block; + } + + // If none of them had the correct alignment, + // use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + return free_blocks_rm_ge(free_blocks, size + alignment, 0, + CHECK_ONLY_THE_FIRST_BLOCK); + + // unknown memory allocation strategy + default: + abort(); + } +} + +static umf_result_t coarse_memory_provider_alloc(void *provider, size_t size, + size_t alignment, + void **resultPtr) { + umf_result_t umf_result = UMF_RESULT_ERROR_UNKNOWN; + + if (resultPtr == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + if (utils_mutex_lock(&coarse_provider->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse_provider)); + + // Find a block with greater or equal size using the given memory allocation strategy + block_t *curr = + find_free_block(coarse_provider->free_blocks, size, alignment, + coarse_provider->allocation_strategy); + + // If the block that we want to reuse has a greater size, split it. + // Try to merge the split part with the successor if it is not used. + enum { ACTION_NONE = 0, ACTION_USE, ACTION_SPLIT } action = ACTION_NONE; + + if (curr && curr->size > size) { + action = ACTION_SPLIT; + } else if (curr && curr->size == size) { + action = ACTION_USE; + } + + if (action) { // ACTION_SPLIT or ACTION_USE + assert(curr->used == false); + + // In case of non-zero alignment create an aligned block what would be further used. + if (alignment > 0) { + umf_result = + create_aligned_block(coarse_provider, size, alignment, &curr); + if (umf_result != UMF_RESULT_SUCCESS) { + utils_mutex_unlock(&coarse_provider->lock); + return umf_result; + } + } + + if (action == ACTION_SPLIT) { + // Split the current block and put the new block after the one that we use. + umf_result = split_current_block(coarse_provider, curr, size); + if (umf_result != UMF_RESULT_SUCCESS) { + utils_mutex_unlock(&coarse_provider->lock); + return umf_result; + } + + curr->size = size; + + LOG_DEBUG("coarse_ALLOC (split_block) %zu used %zu alloc %zu", size, + coarse_provider->used_size, coarse_provider->alloc_size); + + } else { // action == ACTION_USE + LOG_DEBUG("coarse_ALLOC (same_block) %zu used %zu alloc %zu", size, + coarse_provider->used_size, coarse_provider->alloc_size); + } + + curr->used = true; + *resultPtr = curr->data; + coarse_provider->used_size += size; + + assert(debug_check(coarse_provider)); + utils_mutex_unlock(&coarse_provider->lock); + + return UMF_RESULT_SUCCESS; + } + + // no suitable block found - try to get more memory from the upstream provider + umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + + if (coarse_provider->upstream_memory_provider == NULL) { + LOG_ERR("out of memory - no upstream memory provider given"); + goto err_unlock; + } + + umfMemoryProviderAlloc(coarse_provider->upstream_memory_provider, size, + alignment, resultPtr); + if (*resultPtr == NULL) { + LOG_ERR("out of memory - upstream memory provider allocation failed"); + goto err_unlock; + } + + ASSERT_IS_ALIGNED(((uintptr_t)(*resultPtr)), alignment); + + umf_result = coarse_add_upstream_block(coarse_provider, *resultPtr, size); + if (umf_result != UMF_RESULT_SUCCESS) { + if (!coarse_provider->disable_upstream_provider_free) { + umfMemoryProviderFree(coarse_provider->upstream_memory_provider, + *resultPtr, size); + } + goto err_unlock; + } + + LOG_DEBUG("coarse_ALLOC (upstream) %zu used %zu alloc %zu", size, + coarse_provider->used_size, coarse_provider->alloc_size); + + umf_result = UMF_RESULT_SUCCESS; + +err_unlock: + assert(debug_check(coarse_provider)); + utils_mutex_unlock(&coarse_provider->lock); + + return umf_result; +} + +static umf_result_t coarse_memory_provider_free(void *provider, void *ptr, + size_t bytes) { + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + if (utils_mutex_lock(&coarse_provider->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse_provider)); + + ravl_node_t *node = coarse_ravl_find_node(coarse_provider->all_blocks, ptr); + if (node == NULL) { + // the block was not found + utils_mutex_unlock(&coarse_provider->lock); + LOG_ERR("memory block not found (ptr = %p, size = %zu)", ptr, bytes); + return UMF_RESULT_ERROR_UNKNOWN; + } + + block_t *block = get_node_block(node); + if (!block->used) { + // the block is already free + utils_mutex_unlock(&coarse_provider->lock); + LOG_ERR("the block is already free"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (bytes > 0 && bytes != block->size) { + // wrong size of allocation + utils_mutex_unlock(&coarse_provider->lock); + LOG_ERR("wrong size of allocation"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + LOG_DEBUG("coarse_FREE (return_block_to_pool) %zu used %zu alloc %zu", + block->size, coarse_provider->used_size - block->size, + coarse_provider->alloc_size); + + assert(coarse_provider->used_size >= block->size); + coarse_provider->used_size -= block->size; + + block->used = false; + + // Merge with prev and/or next block if they are unused and have continuous data. + node = free_block_merge_with_prev(coarse_provider, node); + node = free_block_merge_with_next(coarse_provider, node); + + int rv = + free_blocks_add(coarse_provider->free_blocks, get_node_block(node)); + if (rv) { + utils_mutex_unlock(&coarse_provider->lock); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + assert(debug_check(coarse_provider)); + utils_mutex_unlock(&coarse_provider->lock); + + return UMF_RESULT_SUCCESS; +} + +static void coarse_memory_provider_get_last_native_error(void *provider, + const char **ppMessage, + int32_t *pError) { + (void)provider; // unused + + if (ppMessage == NULL || pError == NULL) { + assert(0); + return; + } + + // Nothing more is needed here, since + // there is no UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC error used. +} + +static umf_result_t coarse_memory_provider_get_min_page_size(void *provider, + void *ptr, + size_t *pageSize) { + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + if (!coarse_provider->upstream_memory_provider) { + *pageSize = utils_get_page_size(); + return UMF_RESULT_SUCCESS; + } + + return umfMemoryProviderGetMinPageSize( + coarse_provider->upstream_memory_provider, ptr, pageSize); +} + +static umf_result_t +coarse_memory_provider_get_recommended_page_size(void *provider, size_t size, + size_t *pageSize) { + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + if (!coarse_provider->upstream_memory_provider) { + *pageSize = utils_get_page_size(); + return UMF_RESULT_SUCCESS; + } + + return umfMemoryProviderGetRecommendedPageSize( + coarse_provider->upstream_memory_provider, size, pageSize); +} + +static const char *coarse_memory_provider_get_name(void *provider) { + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + if (!coarse_provider->name) { + return COARSE_BASE_NAME; + } + + return coarse_provider->name; +} + +static void ravl_cb_count(void *data, void *arg) { + assert(arg); + (void)data; /* unused */ + + size_t *num_all_blocks = arg; + (*num_all_blocks)++; +} + +static void ravl_cb_count_free(void *data, void *arg) { + assert(data); + assert(arg); + + ravl_data_t *node_data = data; + assert(node_data); + ravl_free_blocks_head_t *head_node = node_data->value; + assert(head_node); + struct ravl_free_blocks_elem_t *free_block = head_node->head; + assert(free_block); + + size_t *num_all_blocks = arg; + while (free_block) { + (*num_all_blocks)++; + free_block = free_block->next; + } +} + +static umf_result_t +coarse_memory_provider_get_stats(void *provider, + coarse_memory_provider_stats_t *stats) { + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + // count blocks + size_t num_upstream_blocks = 0; + ravl_foreach(coarse_provider->upstream_blocks, ravl_cb_count, + &num_upstream_blocks); + + size_t num_all_blocks = 0; + ravl_foreach(coarse_provider->all_blocks, ravl_cb_count, &num_all_blocks); + + size_t num_free_blocks = 0; + ravl_foreach(coarse_provider->free_blocks, ravl_cb_count_free, + &num_free_blocks); + + stats->alloc_size = coarse_provider->alloc_size; + stats->used_size = coarse_provider->used_size; + stats->num_upstream_blocks = num_upstream_blocks; + stats->num_all_blocks = num_all_blocks; + stats->num_free_blocks = num_free_blocks; + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t coarse_memory_provider_purge_lazy(void *provider, void *ptr, + size_t size) { + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + if (coarse_provider->upstream_memory_provider == NULL) { + LOG_ERR("no upstream memory provider given"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + return umfMemoryProviderPurgeLazy(coarse_provider->upstream_memory_provider, + ptr, size); +} + +static umf_result_t coarse_memory_provider_purge_force(void *provider, + void *ptr, size_t size) { + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + if (coarse_provider->upstream_memory_provider == NULL) { + LOG_ERR("no upstream memory provider given"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + return umfMemoryProviderPurgeForce( + coarse_provider->upstream_memory_provider, ptr, size); +} + +static umf_result_t coarse_memory_provider_allocation_split(void *provider, + void *ptr, + size_t totalSize, + size_t firstSize) { + umf_result_t umf_result; + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + if (utils_mutex_lock(&coarse_provider->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse_provider)); + + ravl_node_t *node = coarse_ravl_find_node(coarse_provider->all_blocks, ptr); + if (node == NULL) { + LOG_ERR("memory block not found"); + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_mutex_unlock; + } + + block_t *block = get_node_block(node); + + if (block->size != totalSize) { + LOG_ERR("wrong totalSize"); + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_mutex_unlock; + } + + if (!block->used) { + LOG_ERR("block is not allocated"); + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_mutex_unlock; + } + + block_t *new_block = coarse_ravl_add_new(coarse_provider->all_blocks, + block->data + firstSize, + block->size - firstSize, NULL); + if (new_block == NULL) { + umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + goto err_mutex_unlock; + } + + block->size = firstSize; + new_block->used = true; + + assert(new_block->size == (totalSize - firstSize)); + + umf_result = UMF_RESULT_SUCCESS; + +err_mutex_unlock: + assert(debug_check(coarse_provider)); + utils_mutex_unlock(&coarse_provider->lock); + + return umf_result; +} + +static umf_result_t coarse_memory_provider_allocation_merge(void *provider, + void *lowPtr, + void *highPtr, + size_t totalSize) { + umf_result_t umf_result; + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + if (utils_mutex_lock(&coarse_provider->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse_provider)); + + ravl_node_t *low_node = + coarse_ravl_find_node(coarse_provider->all_blocks, lowPtr); + if (low_node == NULL) { + LOG_ERR("the lowPtr memory block not found"); + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_mutex_unlock; + } + + block_t *low_block = get_node_block(low_node); + if (!low_block->used) { + LOG_ERR("the lowPtr block is not allocated"); + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_mutex_unlock; + } + + ravl_node_t *high_node = + coarse_ravl_find_node(coarse_provider->all_blocks, highPtr); + if (high_node == NULL) { + LOG_ERR("the highPtr memory block not found"); + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_mutex_unlock; + } + + block_t *high_block = get_node_block(high_node); + if (!high_block->used) { + LOG_ERR("the highPtr block is not allocated"); + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_mutex_unlock; + } + + if (get_node_next(low_node) != high_node) { + LOG_ERR("given pointers cannot be merged"); + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_mutex_unlock; + } + + if (get_node_prev(high_node) != low_node) { + LOG_ERR("given pointers cannot be merged"); + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_mutex_unlock; + } + + if (low_block->size + high_block->size != totalSize) { + LOG_ERR("wrong totalSize"); + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_mutex_unlock; + } + + if ((uintptr_t)highPtr != ((uintptr_t)lowPtr + low_block->size)) { + LOG_ERR("given pointers cannot be merged"); + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_mutex_unlock; + } + + ravl_node_t *merged_node = NULL; + + umf_result = user_block_merge(coarse_provider, low_node, high_node, true, + &merged_node); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("merging failed"); + goto err_mutex_unlock; + } + + assert(merged_node == low_node); + assert(low_block->size == totalSize); + + umf_result = UMF_RESULT_SUCCESS; + +err_mutex_unlock: + assert(debug_check(coarse_provider)); + utils_mutex_unlock(&coarse_provider->lock); + + return umf_result; +} + +umf_memory_provider_ops_t UMF_COARSE_MEMORY_PROVIDER_OPS = { + .version = UMF_VERSION_CURRENT, + .initialize = coarse_memory_provider_initialize, + .finalize = coarse_memory_provider_finalize, + .alloc = coarse_memory_provider_alloc, + .get_last_native_error = coarse_memory_provider_get_last_native_error, + .get_recommended_page_size = + coarse_memory_provider_get_recommended_page_size, + .get_min_page_size = coarse_memory_provider_get_min_page_size, + .get_name = coarse_memory_provider_get_name, + .ext.free = coarse_memory_provider_free, + .ext.purge_lazy = coarse_memory_provider_purge_lazy, + .ext.purge_force = coarse_memory_provider_purge_force, + .ext.allocation_merge = coarse_memory_provider_allocation_merge, + .ext.allocation_split = coarse_memory_provider_allocation_split, + // TODO + /* + .ipc.get_ipc_handle_size = coarse_memory_provider_get_ipc_handle_size, + .ipc.get_ipc_handle = coarse_memory_provider_get_ipc_handle, + .ipc.put_ipc_handle = coarse_memory_provider_put_ipc_handle, + .ipc.open_ipc_handle = coarse_memory_provider_open_ipc_handle, + .ipc.close_ipc_handle = coarse_memory_provider_close_ipc_handle, + */ +}; + +umf_memory_provider_ops_t *umfCoarseMemoryProviderOps(void) { + return &UMF_COARSE_MEMORY_PROVIDER_OPS; +} + +coarse_memory_provider_stats_t +umfCoarseMemoryProviderGetStats(umf_memory_provider_handle_t provider) { + coarse_memory_provider_stats_t stats = {0}; + + if (provider == NULL) { + return stats; + } + + void *priv = umfMemoryProviderGetPriv(provider); + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)priv; + + if (utils_mutex_lock(&coarse_provider->lock) != 0) { + LOG_ERR("locking the lock failed"); + return stats; + } + + coarse_memory_provider_get_stats(priv, &stats); + + utils_mutex_unlock(&coarse_provider->lock); + + return stats; +} diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c new file mode 100644 index 000000000..baccbd023 --- /dev/null +++ b/src/provider/provider_cuda.c @@ -0,0 +1,625 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include +#include +#include + +#include +#include + +#if defined(UMF_NO_CUDA_PROVIDER) + +umf_result_t umfCUDAMemoryProviderParamsCreate( + umf_cuda_memory_provider_params_handle_t *hParams) { + (void)hParams; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfCUDAMemoryProviderParamsDestroy( + umf_cuda_memory_provider_params_handle_t hParams) { + (void)hParams; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfCUDAMemoryProviderParamsSetContext( + umf_cuda_memory_provider_params_handle_t hParams, void *hContext) { + (void)hParams; + (void)hContext; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfCUDAMemoryProviderParamsSetDevice( + umf_cuda_memory_provider_params_handle_t hParams, int hDevice) { + (void)hParams; + (void)hDevice; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfCUDAMemoryProviderParamsSetMemoryType( + umf_cuda_memory_provider_params_handle_t hParams, + umf_usm_memory_type_t memoryType) { + (void)hParams; + (void)memoryType; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void) { + // not supported + return NULL; +} + +#else // !defined(UMF_NO_CUDA_PROVIDER) + +// disable warning 4201: nonstandard extension used: nameless struct/union +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4201) +#endif // _MSC_VER + +#include "cuda.h" + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif // _MSC_VER + +#include "base_alloc_global.h" +#include "libumf.h" +#include "utils_assert.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_load_library.h" +#include "utils_log.h" +#include "utils_sanitizers.h" + +typedef struct cu_memory_provider_t { + CUcontext context; + CUdevice device; + umf_usm_memory_type_t memory_type; + size_t min_alignment; +} cu_memory_provider_t; + +// CUDA Memory Provider settings struct +typedef struct umf_cuda_memory_provider_params_t { + void *cuda_context_handle; ///< Handle to the CUDA context + int cuda_device_handle; ///< Handle to the CUDA device + umf_usm_memory_type_t memory_type; ///< Allocation memory type +} umf_cuda_memory_provider_params_t; + +typedef struct cu_ops_t { + CUresult (*cuMemGetAllocationGranularity)( + size_t *granularity, const CUmemAllocationProp *prop, + CUmemAllocationGranularity_flags option); + CUresult (*cuMemAlloc)(CUdeviceptr *dptr, size_t bytesize); + CUresult (*cuMemAllocHost)(void **pp, size_t bytesize); + CUresult (*cuMemAllocManaged)(CUdeviceptr *dptr, size_t bytesize, + unsigned int flags); + CUresult (*cuMemFree)(CUdeviceptr dptr); + CUresult (*cuMemFreeHost)(void *p); + + CUresult (*cuGetErrorName)(CUresult error, const char **pStr); + CUresult (*cuGetErrorString)(CUresult error, const char **pStr); + CUresult (*cuCtxGetCurrent)(CUcontext *pctx); + CUresult (*cuCtxSetCurrent)(CUcontext ctx); + CUresult (*cuIpcGetMemHandle)(CUipcMemHandle *pHandle, CUdeviceptr dptr); + CUresult (*cuIpcOpenMemHandle)(CUdeviceptr *pdptr, CUipcMemHandle handle, + unsigned int Flags); + CUresult (*cuIpcCloseMemHandle)(CUdeviceptr dptr); +} cu_ops_t; + +typedef CUipcMemHandle cu_ipc_data_t; + +static cu_ops_t g_cu_ops; +static UTIL_ONCE_FLAG cu_is_initialized = UTIL_ONCE_FLAG_INIT; +static bool Init_cu_global_state_failed; + +// forward decl needed for alloc +static umf_result_t cu_memory_provider_free(void *provider, void *ptr, + size_t bytes); + +#define TLS_MSG_BUF_LEN 1024 + +typedef struct cu_last_native_error_t { + CUresult native_error; + char msg_buff[TLS_MSG_BUF_LEN]; +} cu_last_native_error_t; + +static __TLS cu_last_native_error_t TLS_last_native_error; + +static void cu_store_last_native_error(CUresult native_error) { + TLS_last_native_error.native_error = native_error; +} + +static umf_result_t cu2umf_result(CUresult result) { + switch (result) { + case CUDA_SUCCESS: + return UMF_RESULT_SUCCESS; + case CUDA_ERROR_OUT_OF_MEMORY: + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + case CUDA_ERROR_INVALID_VALUE: + case CUDA_ERROR_INVALID_HANDLE: + case CUDA_ERROR_INVALID_RESOURCE_TYPE: + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + default: + cu_store_last_native_error(result); + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } +} + +static void init_cu_global_state(void) { +#ifdef _WIN32 + const char *lib_name = "nvcuda.dll"; +#else + const char *lib_name = "libcuda.so"; +#endif + // check if CUDA shared library is already loaded + // we pass 0 as a handle to search the global symbol table + + // NOTE: some symbols defined in the lib have _vX postfixes - it is + // important to load the proper version of functions + *(void **)&g_cu_ops.cuMemGetAllocationGranularity = + utils_get_symbol_addr(0, "cuMemGetAllocationGranularity", lib_name); + *(void **)&g_cu_ops.cuMemAlloc = + utils_get_symbol_addr(0, "cuMemAlloc_v2", lib_name); + *(void **)&g_cu_ops.cuMemAllocHost = + utils_get_symbol_addr(0, "cuMemAllocHost_v2", lib_name); + *(void **)&g_cu_ops.cuMemAllocManaged = + utils_get_symbol_addr(0, "cuMemAllocManaged", lib_name); + *(void **)&g_cu_ops.cuMemFree = + utils_get_symbol_addr(0, "cuMemFree_v2", lib_name); + *(void **)&g_cu_ops.cuMemFreeHost = + utils_get_symbol_addr(0, "cuMemFreeHost", lib_name); + *(void **)&g_cu_ops.cuGetErrorName = + utils_get_symbol_addr(0, "cuGetErrorName", lib_name); + *(void **)&g_cu_ops.cuGetErrorString = + utils_get_symbol_addr(0, "cuGetErrorString", lib_name); + *(void **)&g_cu_ops.cuCtxGetCurrent = + utils_get_symbol_addr(0, "cuCtxGetCurrent", lib_name); + *(void **)&g_cu_ops.cuCtxSetCurrent = + utils_get_symbol_addr(0, "cuCtxSetCurrent", lib_name); + *(void **)&g_cu_ops.cuIpcGetMemHandle = + utils_get_symbol_addr(0, "cuIpcGetMemHandle", lib_name); + *(void **)&g_cu_ops.cuIpcOpenMemHandle = + utils_get_symbol_addr(0, "cuIpcOpenMemHandle_v2", lib_name); + *(void **)&g_cu_ops.cuIpcCloseMemHandle = + utils_get_symbol_addr(0, "cuIpcCloseMemHandle", lib_name); + + if (!g_cu_ops.cuMemGetAllocationGranularity || !g_cu_ops.cuMemAlloc || + !g_cu_ops.cuMemAllocHost || !g_cu_ops.cuMemAllocManaged || + !g_cu_ops.cuMemFree || !g_cu_ops.cuMemFreeHost || + !g_cu_ops.cuGetErrorName || !g_cu_ops.cuGetErrorString || + !g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxSetCurrent || + !g_cu_ops.cuIpcGetMemHandle || !g_cu_ops.cuIpcOpenMemHandle || + !g_cu_ops.cuIpcCloseMemHandle) { + LOG_ERR("Required CUDA symbols not found."); + Init_cu_global_state_failed = true; + } +} + +umf_result_t umfCUDAMemoryProviderParamsCreate( + umf_cuda_memory_provider_params_handle_t *hParams) { + libumfInit(); + if (!hParams) { + LOG_ERR("CUDA Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_cuda_memory_provider_params_handle_t params_data = + umf_ba_global_alloc(sizeof(umf_cuda_memory_provider_params_t)); + if (!params_data) { + LOG_ERR("Cannot allocate memory for CUDA Memory Provider params"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + params_data->cuda_context_handle = NULL; + params_data->cuda_device_handle = -1; + params_data->memory_type = UMF_MEMORY_TYPE_UNKNOWN; + + *hParams = params_data; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfCUDAMemoryProviderParamsDestroy( + umf_cuda_memory_provider_params_handle_t hParams) { + umf_ba_global_free(hParams); + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfCUDAMemoryProviderParamsSetContext( + umf_cuda_memory_provider_params_handle_t hParams, void *hContext) { + if (!hParams) { + LOG_ERR("CUDA Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->cuda_context_handle = hContext; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfCUDAMemoryProviderParamsSetDevice( + umf_cuda_memory_provider_params_handle_t hParams, int hDevice) { + if (!hParams) { + LOG_ERR("CUDA Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->cuda_device_handle = hDevice; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfCUDAMemoryProviderParamsSetMemoryType( + umf_cuda_memory_provider_params_handle_t hParams, + umf_usm_memory_type_t memoryType) { + if (!hParams) { + LOG_ERR("CUDA Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->memory_type = memoryType; + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t cu_memory_provider_initialize(void *params, + void **provider) { + if (params == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_cuda_memory_provider_params_handle_t cu_params = + (umf_cuda_memory_provider_params_handle_t)params; + + if (cu_params->memory_type == UMF_MEMORY_TYPE_UNKNOWN || + cu_params->memory_type > UMF_MEMORY_TYPE_SHARED) { + LOG_ERR("Invalid memory type value"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (cu_params->cuda_context_handle == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + utils_init_once(&cu_is_initialized, init_cu_global_state); + if (Init_cu_global_state_failed) { + LOG_ERR("Loading CUDA symbols failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + cu_memory_provider_t *cu_provider = + umf_ba_global_alloc(sizeof(cu_memory_provider_t)); + if (!cu_provider) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + // CUDA alloc functions doesn't allow to provide user alignment - get the + // minimum one from the driver + size_t min_alignment = 0; + CUmemAllocationProp allocProps = {0}; + allocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE; + allocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED; + allocProps.location.id = cu_provider->device; + CUresult cu_result = g_cu_ops.cuMemGetAllocationGranularity( + &min_alignment, &allocProps, CU_MEM_ALLOC_GRANULARITY_MINIMUM); + if (cu_result != CUDA_SUCCESS) { + umf_ba_global_free(cu_provider); + return cu2umf_result(cu_result); + } + + cu_provider->context = cu_params->cuda_context_handle; + cu_provider->device = cu_params->cuda_device_handle; + cu_provider->memory_type = cu_params->memory_type; + cu_provider->min_alignment = min_alignment; + + *provider = cu_provider; + + return UMF_RESULT_SUCCESS; +} + +static void cu_memory_provider_finalize(void *provider) { + umf_ba_global_free(provider); +} + +/* + * This function is used by the CUDA provider to make sure that + * the required context is set. If the current context is + * not the required one, it will be saved in restore_ctx. + */ +static inline umf_result_t set_context(CUcontext required_ctx, + CUcontext *restore_ctx) { + CUcontext current_ctx = NULL; + CUresult cu_result = g_cu_ops.cuCtxGetCurrent(¤t_ctx); + if (cu_result != CUDA_SUCCESS) { + LOG_ERR("cuCtxGetCurrent() failed."); + return cu2umf_result(cu_result); + } + *restore_ctx = current_ctx; + if (current_ctx != required_ctx) { + cu_result = g_cu_ops.cuCtxSetCurrent(required_ctx); + if (cu_result != CUDA_SUCCESS) { + LOG_ERR("cuCtxSetCurrent() failed."); + return cu2umf_result(cu_result); + } + } + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t cu_memory_provider_alloc(void *provider, size_t size, + size_t alignment, + void **resultPtr) { + cu_memory_provider_t *cu_provider = (cu_memory_provider_t *)provider; + + if (alignment > cu_provider->min_alignment) { + // alignment of CUDA allocations is controlled by the CUDA driver - + // currently UMF doesn't support alignment larger than default + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + // Remember current context and set the one from the provider + CUcontext restore_ctx = NULL; + umf_result_t umf_result = set_context(cu_provider->context, &restore_ctx); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("Failed to set CUDA context, ret = %d", umf_result); + return umf_result; + } + + CUresult cu_result = CUDA_SUCCESS; + switch (cu_provider->memory_type) { + case UMF_MEMORY_TYPE_HOST: { + cu_result = g_cu_ops.cuMemAllocHost(resultPtr, size); + break; + } + case UMF_MEMORY_TYPE_DEVICE: { + cu_result = g_cu_ops.cuMemAlloc((CUdeviceptr *)resultPtr, size); + break; + } + case UMF_MEMORY_TYPE_SHARED: { + cu_result = g_cu_ops.cuMemAllocManaged((CUdeviceptr *)resultPtr, size, + CU_MEM_ATTACH_GLOBAL); + break; + } + default: + // this shouldn't happen as we check the memory_type settings during + // the initialization + LOG_ERR("unsupported USM memory type"); + assert(false); + return UMF_RESULT_ERROR_UNKNOWN; + } + + umf_result = set_context(restore_ctx, &restore_ctx); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("Failed to restore CUDA context, ret = %d", umf_result); + } + + umf_result = cu2umf_result(cu_result); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("Failed to allocate memory, cu_result = %d, ret = %d", + cu_result, umf_result); + return umf_result; + } + + // check the alignment + if (alignment > 0 && ((uintptr_t)(*resultPtr) % alignment) != 0) { + cu_memory_provider_free(provider, *resultPtr, size); + LOG_ERR("unsupported alignment size"); + return UMF_RESULT_ERROR_INVALID_ALIGNMENT; + } + return umf_result; +} + +static umf_result_t cu_memory_provider_free(void *provider, void *ptr, + size_t bytes) { + (void)bytes; + + if (ptr == NULL) { + return UMF_RESULT_SUCCESS; + } + + cu_memory_provider_t *cu_provider = (cu_memory_provider_t *)provider; + + CUresult cu_result = CUDA_SUCCESS; + switch (cu_provider->memory_type) { + case UMF_MEMORY_TYPE_HOST: { + cu_result = g_cu_ops.cuMemFreeHost(ptr); + break; + } + case UMF_MEMORY_TYPE_SHARED: + case UMF_MEMORY_TYPE_DEVICE: { + cu_result = g_cu_ops.cuMemFree((CUdeviceptr)ptr); + break; + } + default: + // this shouldn't happen as we check the memory_type settings during + // the initialization + LOG_ERR("unsupported USM memory type"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + return cu2umf_result(cu_result); +} + +static void cu_memory_provider_get_last_native_error(void *provider, + const char **ppMessage, + int32_t *pError) { + (void)provider; + + if (ppMessage == NULL || pError == NULL) { + ASSERT(0); + return; + } + + const char *error_name = 0; + const char *error_string = 0; + g_cu_ops.cuGetErrorName(TLS_last_native_error.native_error, &error_name); + g_cu_ops.cuGetErrorString(TLS_last_native_error.native_error, + &error_string); + + size_t buf_size = 0; + strncpy(TLS_last_native_error.msg_buff, error_name, TLS_MSG_BUF_LEN - 1); + buf_size = strlen(TLS_last_native_error.msg_buff); + + strncat(TLS_last_native_error.msg_buff, " - ", + TLS_MSG_BUF_LEN - buf_size - 1); + buf_size = strlen(TLS_last_native_error.msg_buff); + + strncat(TLS_last_native_error.msg_buff, error_string, + TLS_MSG_BUF_LEN - buf_size - 1); + + *pError = TLS_last_native_error.native_error; + *ppMessage = TLS_last_native_error.msg_buff; +} + +static umf_result_t cu_memory_provider_get_min_page_size(void *provider, + void *ptr, + size_t *pageSize) { + (void)ptr; + + cu_memory_provider_t *cu_provider = (cu_memory_provider_t *)provider; + + CUmemAllocationProp allocProps = {0}; + allocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE; + allocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED; + allocProps.location.id = cu_provider->device; + + CUresult cu_result = g_cu_ops.cuMemGetAllocationGranularity( + pageSize, &allocProps, CU_MEM_ALLOC_GRANULARITY_MINIMUM); + + return cu2umf_result(cu_result); +} + +static umf_result_t +cu_memory_provider_get_recommended_page_size(void *provider, size_t size, + size_t *pageSize) { + (void)size; + + cu_memory_provider_t *cu_provider = (cu_memory_provider_t *)provider; + + CUmemAllocationProp allocProps = {0}; + allocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE; + allocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED; + allocProps.location.id = cu_provider->device; + + CUresult cu_result = g_cu_ops.cuMemGetAllocationGranularity( + pageSize, &allocProps, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED); + + return cu2umf_result(cu_result); +} + +static const char *cu_memory_provider_get_name(void *provider) { + (void)provider; + return "CUDA"; +} + +static umf_result_t cu_memory_provider_get_ipc_handle_size(void *provider, + size_t *size) { + (void)provider; + *size = sizeof(cu_ipc_data_t); + return UMF_RESULT_SUCCESS; +} + +static umf_result_t cu_memory_provider_get_ipc_handle(void *provider, + const void *ptr, + size_t size, + void *providerIpcData) { + (void)provider; + (void)size; + + CUresult cu_result; + cu_ipc_data_t *cu_ipc_data = (cu_ipc_data_t *)providerIpcData; + + cu_result = g_cu_ops.cuIpcGetMemHandle(cu_ipc_data, (CUdeviceptr)ptr); + if (cu_result != CUDA_SUCCESS) { + LOG_ERR("cuIpcGetMemHandle() failed."); + return cu2umf_result(cu_result); + } + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t cu_memory_provider_put_ipc_handle(void *provider, + void *providerIpcData) { + (void)provider; + (void)providerIpcData; + return UMF_RESULT_SUCCESS; +} + +static umf_result_t cu_memory_provider_open_ipc_handle(void *provider, + void *providerIpcData, + void **ptr) { + cu_memory_provider_t *cu_provider = (cu_memory_provider_t *)provider; + + CUresult cu_result; + cu_ipc_data_t *cu_ipc_data = (cu_ipc_data_t *)providerIpcData; + + // Remember current context and set the one from the provider + CUcontext restore_ctx = NULL; + umf_result_t umf_result = set_context(cu_provider->context, &restore_ctx); + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; + } + + cu_result = g_cu_ops.cuIpcOpenMemHandle((CUdeviceptr *)ptr, *cu_ipc_data, + CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS); + + if (cu_result != CUDA_SUCCESS) { + LOG_ERR("cuIpcOpenMemHandle() failed."); + } + + set_context(restore_ctx, &restore_ctx); + + return cu2umf_result(cu_result); +} + +static umf_result_t +cu_memory_provider_close_ipc_handle(void *provider, void *ptr, size_t size) { + (void)provider; + (void)size; + + CUresult cu_result; + + cu_result = g_cu_ops.cuIpcCloseMemHandle((CUdeviceptr)ptr); + if (cu_result != CUDA_SUCCESS) { + LOG_ERR("cuIpcCloseMemHandle() failed."); + return cu2umf_result(cu_result); + } + + return UMF_RESULT_SUCCESS; +} + +static struct umf_memory_provider_ops_t UMF_CUDA_MEMORY_PROVIDER_OPS = { + .version = UMF_VERSION_CURRENT, + .initialize = cu_memory_provider_initialize, + .finalize = cu_memory_provider_finalize, + .alloc = cu_memory_provider_alloc, + .get_last_native_error = cu_memory_provider_get_last_native_error, + .get_recommended_page_size = cu_memory_provider_get_recommended_page_size, + .get_min_page_size = cu_memory_provider_get_min_page_size, + .get_name = cu_memory_provider_get_name, + .ext.free = cu_memory_provider_free, + // TODO + /* + .ext.purge_lazy = cu_memory_provider_purge_lazy, + .ext.purge_force = cu_memory_provider_purge_force, + .ext.allocation_merge = cu_memory_provider_allocation_merge, + .ext.allocation_split = cu_memory_provider_allocation_split, + */ + .ipc.get_ipc_handle_size = cu_memory_provider_get_ipc_handle_size, + .ipc.get_ipc_handle = cu_memory_provider_get_ipc_handle, + .ipc.put_ipc_handle = cu_memory_provider_put_ipc_handle, + .ipc.open_ipc_handle = cu_memory_provider_open_ipc_handle, + .ipc.close_ipc_handle = cu_memory_provider_close_ipc_handle, +}; + +umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void) { + return &UMF_CUDA_MEMORY_PROVIDER_OPS; +} + +#endif // !defined(UMF_NO_CUDA_PROVIDER) diff --git a/src/provider/provider_devdax_memory.c b/src/provider/provider_devdax_memory.c new file mode 100644 index 000000000..32407acbb --- /dev/null +++ b/src/provider/provider_devdax_memory.c @@ -0,0 +1,659 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#if defined(_WIN32) || defined(UMF_NO_HWLOC) + +umf_memory_provider_ops_t *umfDevDaxMemoryProviderOps(void) { + // not supported + return NULL; +} + +umf_result_t umfDevDaxMemoryProviderParamsCreate( + umf_devdax_memory_provider_params_handle_t *hParams, const char *path, + size_t size) { + (void)hParams; + (void)path; + (void)size; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfDevDaxMemoryProviderParamsDestroy( + umf_devdax_memory_provider_params_handle_t hParams) { + (void)hParams; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfDevDaxMemoryProviderParamsSetDeviceDax( + umf_devdax_memory_provider_params_handle_t hParams, const char *path, + size_t size) { + (void)hParams; + (void)path; + (void)size; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfDevDaxMemoryProviderParamsSetProtection( + umf_devdax_memory_provider_params_handle_t hParams, unsigned protection) { + (void)hParams; + (void)protection; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +#else // !defined(_WIN32) && !defined(UMF_NO_HWLOC) + +#include "base_alloc_global.h" +#include "libumf.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" + +#define DEVDAX_PAGE_SIZE_2MB ((size_t)(2 * 1024 * 1024)) // == 2 MB + +#define TLS_MSG_BUF_LEN 1024 + +typedef struct devdax_memory_provider_t { + char path[PATH_MAX]; // a path to the device DAX + size_t size; // size of the file used for memory mapping + void *base; // base address of memory mapping + size_t offset; // offset in the file used for memory mapping + utils_mutex_t lock; // lock of ptr and offset + unsigned protection; // combination of OS-specific protection flags +} devdax_memory_provider_t; + +// DevDax Memory provider settings struct +typedef struct umf_devdax_memory_provider_params_t { + char *path; + size_t size; + unsigned protection; +} umf_devdax_memory_provider_params_t; + +typedef struct devdax_last_native_error_t { + int32_t native_error; + int errno_value; + char msg_buff[TLS_MSG_BUF_LEN]; +} devdax_last_native_error_t; + +static __TLS devdax_last_native_error_t TLS_last_native_error; + +// helper values used only in the Native_error_str array +#define _UMF_DEVDAX_RESULT_SUCCESS \ + (UMF_DEVDAX_RESULT_SUCCESS - UMF_DEVDAX_RESULT_SUCCESS) +#define _UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED \ + (UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED - UMF_DEVDAX_RESULT_SUCCESS) +#define _UMF_DEVDAX_RESULT_ERROR_ADDRESS_NOT_ALIGNED \ + (UMF_DEVDAX_RESULT_ERROR_ADDRESS_NOT_ALIGNED - UMF_DEVDAX_RESULT_SUCCESS) +#define _UMF_DEVDAX_RESULT_ERROR_FREE_FAILED \ + (UMF_DEVDAX_RESULT_ERROR_FREE_FAILED - UMF_DEVDAX_RESULT_SUCCESS) +#define _UMF_DEVDAX_RESULT_ERROR_PURGE_FORCE_FAILED \ + (UMF_DEVDAX_RESULT_ERROR_PURGE_FORCE_FAILED - UMF_DEVDAX_RESULT_SUCCESS) + +static const char *Native_error_str[] = { + [_UMF_DEVDAX_RESULT_SUCCESS] = "success", + [_UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED] = "memory allocation failed", + [_UMF_DEVDAX_RESULT_ERROR_ADDRESS_NOT_ALIGNED] = + "allocated address is not aligned", + [_UMF_DEVDAX_RESULT_ERROR_FREE_FAILED] = "memory deallocation failed", + [_UMF_DEVDAX_RESULT_ERROR_PURGE_FORCE_FAILED] = "force purging failed", +}; + +static void devdax_store_last_native_error(int32_t native_error, + int errno_value) { + TLS_last_native_error.native_error = native_error; + TLS_last_native_error.errno_value = errno_value; +} + +static umf_result_t +devdax_translate_params(umf_devdax_memory_provider_params_t *in_params, + devdax_memory_provider_t *provider) { + umf_result_t result; + + result = utils_translate_mem_protection_flags(in_params->protection, + &provider->protection); + if (result != UMF_RESULT_SUCCESS) { + LOG_ERR("incorrect memory protection flags: %u", in_params->protection); + return result; + } + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t devdax_initialize(void *params, void **provider) { + umf_result_t ret; + + if (params == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_devdax_memory_provider_params_t *in_params = + (umf_devdax_memory_provider_params_t *)params; + + if (in_params->path == NULL) { + LOG_ERR("devdax path is missing"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (in_params->size == 0) { + LOG_ERR("devdax size is 0"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + devdax_memory_provider_t *devdax_provider = + umf_ba_global_alloc(sizeof(*devdax_provider)); + if (!devdax_provider) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + memset(devdax_provider, 0, sizeof(*devdax_provider)); + + ret = devdax_translate_params(in_params, devdax_provider); + if (ret != UMF_RESULT_SUCCESS) { + goto err_free_devdax_provider; + } + + devdax_provider->size = in_params->size; + if (utils_copy_path(in_params->path, devdax_provider->path, PATH_MAX)) { + goto err_free_devdax_provider; + } + + int fd = utils_devdax_open(in_params->path); + if (fd == -1) { + LOG_ERR("cannot open the device DAX: %s", in_params->path); + ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_free_devdax_provider; + } + + bool is_dax = false; + + // mmap /dev/dax with the MAP_SYNC + devdax_provider->base = utils_mmap_file( + NULL, devdax_provider->size, devdax_provider->protection, 0 /* flags */, + fd, 0 /* offset */, &is_dax); + utils_close_fd(fd); + if (devdax_provider->base == NULL) { + LOG_PDEBUG("mapping the devdax failed (path=%s, size=%zu)", + in_params->path, devdax_provider->size); + ret = UMF_RESULT_ERROR_UNKNOWN; + goto err_free_devdax_provider; + } + + if (!is_dax) { + LOG_ERR("mapping the devdax with MAP_SYNC failed: %s", in_params->path); + ret = UMF_RESULT_ERROR_UNKNOWN; + + if (devdax_provider->base) { + utils_munmap(devdax_provider->base, devdax_provider->size); + } + + goto err_free_devdax_provider; + } + + LOG_DEBUG("devdax memory mapped (path=%s, size=%zu, addr=%p)", + in_params->path, devdax_provider->size, devdax_provider->base); + + if (utils_mutex_init(&devdax_provider->lock) == NULL) { + LOG_ERR("lock init failed"); + ret = UMF_RESULT_ERROR_UNKNOWN; + goto err_unmap_devdax; + } + + *provider = devdax_provider; + + return UMF_RESULT_SUCCESS; + +err_unmap_devdax: + utils_munmap(devdax_provider->base, devdax_provider->size); +err_free_devdax_provider: + umf_ba_global_free(devdax_provider); + return ret; +} + +static void devdax_finalize(void *provider) { + devdax_memory_provider_t *devdax_provider = provider; + utils_mutex_destroy_not_free(&devdax_provider->lock); + utils_munmap(devdax_provider->base, devdax_provider->size); + umf_ba_global_free(devdax_provider); +} + +static int devdax_alloc_aligned(size_t length, size_t alignment, void *base, + size_t size, utils_mutex_t *lock, + void **out_addr, size_t *offset) { + assert(out_addr); + + if (utils_mutex_lock(lock)) { + LOG_ERR("locking file offset failed"); + return -1; + } + + uintptr_t ptr = (uintptr_t)base + *offset; + uintptr_t rest_of_div = alignment ? (ptr % alignment) : 0; + + if (alignment > 0 && rest_of_div > 0) { + ptr += alignment - rest_of_div; + } + + size_t new_offset = ptr - (uintptr_t)base + length; + + if (new_offset > size) { + utils_mutex_unlock(lock); + LOG_ERR("cannot allocate more memory than the device DAX size: %zu", + size); + return -1; + } + + *offset = new_offset; + *out_addr = (void *)ptr; + + utils_mutex_unlock(lock); + + return 0; +} + +static umf_result_t devdax_alloc(void *provider, size_t size, size_t alignment, + void **resultPtr) { + int ret; + + // alignment must be a power of two and a multiple or a divider of the page size + if (alignment && ((alignment & (alignment - 1)) || + ((alignment % DEVDAX_PAGE_SIZE_2MB) && + (DEVDAX_PAGE_SIZE_2MB % alignment)))) { + LOG_ERR("wrong alignment: %zu (not a power of 2 or a multiple or a " + "divider of the page size (%zu))", + alignment, DEVDAX_PAGE_SIZE_2MB); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (IS_NOT_ALIGNED(alignment, DEVDAX_PAGE_SIZE_2MB)) { + alignment = ALIGN_UP(alignment, DEVDAX_PAGE_SIZE_2MB); + } + + devdax_memory_provider_t *devdax_provider = + (devdax_memory_provider_t *)provider; + + void *addr = NULL; + errno = 0; + ret = devdax_alloc_aligned(size, alignment, devdax_provider->base, + devdax_provider->size, &devdax_provider->lock, + &addr, &devdax_provider->offset); + if (ret) { + devdax_store_last_native_error(UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED, 0); + LOG_ERR("memory allocation failed"); + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + *resultPtr = addr; + + return UMF_RESULT_SUCCESS; +} + +static void devdax_get_last_native_error(void *provider, const char **ppMessage, + int32_t *pError) { + (void)provider; // unused + + if (ppMessage == NULL || pError == NULL) { + assert(0); + return; + } + + *pError = TLS_last_native_error.native_error; + if (TLS_last_native_error.errno_value == 0) { + *ppMessage = Native_error_str[*pError - UMF_DEVDAX_RESULT_SUCCESS]; + return; + } + + const char *msg; + size_t len; + size_t pos = 0; + + msg = Native_error_str[*pError - UMF_DEVDAX_RESULT_SUCCESS]; + len = strlen(msg); + memcpy(TLS_last_native_error.msg_buff + pos, msg, len + 1); + pos += len; + + msg = ": "; + len = strlen(msg); + memcpy(TLS_last_native_error.msg_buff + pos, msg, len + 1); + pos += len; + + utils_strerror(TLS_last_native_error.errno_value, + TLS_last_native_error.msg_buff + pos, TLS_MSG_BUF_LEN - pos); + + *ppMessage = TLS_last_native_error.msg_buff; +} + +static umf_result_t devdax_get_recommended_page_size(void *provider, + size_t size, + size_t *page_size) { + (void)provider; // unused + (void)size; // unused + + *page_size = DEVDAX_PAGE_SIZE_2MB; + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t devdax_get_min_page_size(void *provider, void *ptr, + size_t *page_size) { + (void)ptr; // unused + + return devdax_get_recommended_page_size(provider, 0, page_size); +} + +static umf_result_t devdax_purge_lazy(void *provider, void *ptr, size_t size) { + (void)provider; // unused + (void)ptr; // unused + (void)size; // unused + // purge_lazy is unsupported in case of the devdax memory provider, + // because the MADV_FREE operation can be applied + // only to private anonymous pages (see madvise(2)). + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +static umf_result_t devdax_purge_force(void *provider, void *ptr, size_t size) { + (void)provider; // unused + errno = 0; + if (utils_purge(ptr, size, UMF_PURGE_FORCE)) { + devdax_store_last_native_error( + UMF_DEVDAX_RESULT_ERROR_PURGE_FORCE_FAILED, errno); + LOG_PERR("force purging failed"); + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + return UMF_RESULT_SUCCESS; +} + +static const char *devdax_get_name(void *provider) { + (void)provider; // unused + return "DEVDAX"; +} + +static umf_result_t devdax_allocation_split(void *provider, void *ptr, + size_t totalSize, + size_t firstSize) { + (void)provider; + (void)ptr; + (void)totalSize; + (void)firstSize; + return UMF_RESULT_SUCCESS; +} + +static umf_result_t devdax_allocation_merge(void *provider, void *lowPtr, + void *highPtr, size_t totalSize) { + (void)provider; + (void)lowPtr; + (void)highPtr; + (void)totalSize; + return UMF_RESULT_SUCCESS; +} + +typedef struct devdax_ipc_data_t { + char path[PATH_MAX]; // path to the /dev/dax + unsigned protection; // combination of OS-specific memory protection flags + // offset of the data (from the beginning of the devdax mapping) - see devdax_get_ipc_handle() + size_t offset; + size_t length; // length of the data +} devdax_ipc_data_t; + +static umf_result_t devdax_get_ipc_handle_size(void *provider, size_t *size) { + (void)provider; + + *size = sizeof(devdax_ipc_data_t); + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t devdax_get_ipc_handle(void *provider, const void *ptr, + size_t size, void *providerIpcData) { + devdax_memory_provider_t *devdax_provider = + (devdax_memory_provider_t *)provider; + + devdax_ipc_data_t *devdax_ipc_data = (devdax_ipc_data_t *)providerIpcData; + strncpy(devdax_ipc_data->path, devdax_provider->path, PATH_MAX - 1); + devdax_ipc_data->path[PATH_MAX - 1] = '\0'; + devdax_ipc_data->protection = devdax_provider->protection; + devdax_ipc_data->offset = + (size_t)((uintptr_t)ptr - (uintptr_t)devdax_provider->base); + devdax_ipc_data->length = size; + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t devdax_put_ipc_handle(void *provider, + void *providerIpcData) { + devdax_memory_provider_t *devdax_provider = + (devdax_memory_provider_t *)provider; + devdax_ipc_data_t *devdax_ipc_data = (devdax_ipc_data_t *)providerIpcData; + + // verify the path of the /dev/dax + if (strncmp(devdax_ipc_data->path, devdax_provider->path, PATH_MAX)) { + LOG_ERR("devdax path mismatch (local: %s, ipc: %s)", + devdax_provider->path, devdax_ipc_data->path); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t devdax_open_ipc_handle(void *provider, + void *providerIpcData, void **ptr) { + (void)provider; // unused + *ptr = NULL; + + devdax_ipc_data_t *devdax_ipc_data = (devdax_ipc_data_t *)providerIpcData; + + int fd = utils_devdax_open(devdax_ipc_data->path); + if (fd == -1) { + LOG_PERR("opening the devdax (%s) failed", devdax_ipc_data->path); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // It is just a workaround for case when + // devdax_alloc() was called with the size argument + // that is not a multiplier of DEVDAX_PAGE_SIZE_2MB. + size_t offset_aligned = devdax_ipc_data->offset; + size_t length_aligned = devdax_ipc_data->length; + utils_align_ptr_down_size_up((void **)&offset_aligned, &length_aligned, + DEVDAX_PAGE_SIZE_2MB); + + bool is_dax = false; + + // mmap /dev/dax with the MAP_SYNC + char *addr = + utils_mmap_file(NULL, length_aligned, devdax_ipc_data->protection, + 0 /* flags */, fd, offset_aligned, &is_dax); + (void)utils_close_fd(fd); + if (addr == NULL) { + LOG_PERR("devdax mapping failed (path: %s, size: %zu, protection: %i, " + "fd: %i, offset: %zu)", + devdax_ipc_data->path, length_aligned, + devdax_ipc_data->protection, fd, offset_aligned); + + devdax_store_last_native_error(UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED, + errno); + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + if (!is_dax) { + LOG_ERR("mapping the devdax with MAP_SYNC failed: %s", + devdax_ipc_data->path); + + if (addr) { + utils_munmap(addr, length_aligned); + } + + return UMF_RESULT_ERROR_UNKNOWN; + } + + LOG_DEBUG("devdax mapped (path: %s, size: %zu, protection: %i, fd: %i, " + "offset: %zu) to address %p", + devdax_ipc_data->path, length_aligned, + devdax_ipc_data->protection, fd, offset_aligned, (void *)addr); + + *ptr = addr; + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t devdax_close_ipc_handle(void *provider, void *ptr, + size_t size) { + (void)provider; // unused + size = ALIGN_UP(size, DEVDAX_PAGE_SIZE_2MB); + + errno = 0; + int ret = utils_munmap(ptr, size); + // ignore error when size == 0 + if (ret && (size > 0)) { + devdax_store_last_native_error(UMF_DEVDAX_RESULT_ERROR_FREE_FAILED, + errno); + LOG_PERR("memory unmapping failed (ptr: %p, size: %zu)", ptr, size); + + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + return UMF_RESULT_SUCCESS; +} + +static umf_memory_provider_ops_t UMF_DEVDAX_MEMORY_PROVIDER_OPS = { + .version = UMF_VERSION_CURRENT, + .initialize = devdax_initialize, + .finalize = devdax_finalize, + .alloc = devdax_alloc, + .get_last_native_error = devdax_get_last_native_error, + .get_recommended_page_size = devdax_get_recommended_page_size, + .get_min_page_size = devdax_get_min_page_size, + .get_name = devdax_get_name, + .ext.purge_lazy = devdax_purge_lazy, + .ext.purge_force = devdax_purge_force, + .ext.allocation_merge = devdax_allocation_merge, + .ext.allocation_split = devdax_allocation_split, + .ipc.get_ipc_handle_size = devdax_get_ipc_handle_size, + .ipc.get_ipc_handle = devdax_get_ipc_handle, + .ipc.put_ipc_handle = devdax_put_ipc_handle, + .ipc.open_ipc_handle = devdax_open_ipc_handle, + .ipc.close_ipc_handle = devdax_close_ipc_handle}; + +umf_memory_provider_ops_t *umfDevDaxMemoryProviderOps(void) { + return &UMF_DEVDAX_MEMORY_PROVIDER_OPS; +} + +umf_result_t umfDevDaxMemoryProviderParamsCreate( + umf_devdax_memory_provider_params_handle_t *hParams, const char *path, + size_t size) { + libumfInit(); + if (hParams == NULL) { + LOG_ERR("DevDax Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (path == NULL) { + LOG_ERR("DevDax path is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_devdax_memory_provider_params_handle_t params = + umf_ba_global_alloc(sizeof(*params)); + if (params == NULL) { + LOG_ERR( + "Allocating memory for the DevDax Memory Provider params failed"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + params->path = NULL; + params->size = 0; + params->protection = UMF_PROTECTION_READ | UMF_PROTECTION_WRITE; + + umf_result_t res = + umfDevDaxMemoryProviderParamsSetDeviceDax(params, path, size); + if (res != UMF_RESULT_SUCCESS) { + umf_ba_global_free(params); + return res; + } + + *hParams = params; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDevDaxMemoryProviderParamsDestroy( + umf_devdax_memory_provider_params_handle_t hParams) { + if (hParams != NULL) { + umf_ba_global_free(hParams->path); + umf_ba_global_free(hParams); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDevDaxMemoryProviderParamsSetDeviceDax( + umf_devdax_memory_provider_params_handle_t hParams, const char *path, + size_t size) { + if (hParams == NULL) { + LOG_ERR("DevDax Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (path == NULL) { + LOG_ERR("DevDax path is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + size_t path_len = strlen(path); + if (path_len == 0) { + LOG_ERR("DevDax path is empty"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + path_len += 1; // for the null terminator + char *new_path = umf_ba_global_alloc(path_len); + if (new_path == NULL) { + LOG_ERR("Allocating memory for the DevDax path failed"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + strncpy(new_path, path, path_len); + + umf_ba_global_free(hParams->path); + + hParams->path = new_path; + hParams->size = size; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDevDaxMemoryProviderParamsSetProtection( + umf_devdax_memory_provider_params_handle_t hParams, unsigned protection) { + if (hParams == NULL) { + LOG_ERR("DevDax Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // verify that protection contains only valid bits set + // (UMF_PROTECTION_MAX-1) - highest possible bit + // (UMF_PROTECTION_MAX-1) << 1 - next after highest possible bit + // ((UMF_PROTECTION_MAX-1) << 1) - 1 - all valid bits set + const unsigned VALID_FLAGS_ALL = ((UMF_PROTECTION_MAX - 1) << 1) - 1; + if (protection & ~VALID_FLAGS_ALL || protection == 0) { + LOG_ERR("Incorrect memory protection flags: %u", protection); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->protection = protection; + + return UMF_RESULT_SUCCESS; +} + +#endif // !defined(_WIN32) && !defined(UMF_NO_HWLOC) diff --git a/src/provider/provider_file_memory.c b/src/provider/provider_file_memory.c new file mode 100644 index 000000000..32383a5ec --- /dev/null +++ b/src/provider/provider_file_memory.c @@ -0,0 +1,898 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#if defined(_WIN32) || defined(UMF_NO_HWLOC) + +umf_memory_provider_ops_t *umfFileMemoryProviderOps(void) { + // not supported + return NULL; +} + +umf_result_t umfFileMemoryProviderParamsCreate( + umf_file_memory_provider_params_handle_t *hParams, const char *path) { + (void)hParams; + (void)path; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfFileMemoryProviderParamsDestroy( + umf_file_memory_provider_params_handle_t hParams) { + (void)hParams; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfFileMemoryProviderParamsSetPath( + umf_file_memory_provider_params_handle_t hParams, const char *path) { + (void)hParams; + (void)path; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfFileMemoryProviderParamsSetProtection( + umf_file_memory_provider_params_handle_t hParams, unsigned protection) { + (void)hParams; + (void)protection; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfFileMemoryProviderParamsSetVisibility( + umf_file_memory_provider_params_handle_t hParams, + umf_memory_visibility_t visibility) { + (void)hParams; + (void)visibility; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +#else // !defined(_WIN32) && !defined(UMF_NO_HWLOC) + +#include "base_alloc_global.h" +#include "critnib.h" +#include "libumf.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" + +#define FSDAX_PAGE_SIZE_2MB ((size_t)(2 * 1024 * 1024)) // == 2 MB + +#define TLS_MSG_BUF_LEN 1024 + +typedef struct file_memory_provider_t { + utils_mutex_t lock; // lock for file parameters (size and offsets) + + char path[PATH_MAX]; // a path to the file + bool is_fsdax; // true if file is located on FSDAX + int fd; // file descriptor for memory mapping + size_t size_fd; // size of the file used for memory mappings + size_t offset_fd; // offset in the file used for memory mappings + + void *base_mmap; // base address of the current memory mapping + size_t size_mmap; // size of the current memory mapping + size_t offset_mmap; // data offset in the current memory mapping + + unsigned protection; // combination of OS-specific protection flags + unsigned visibility; // memory visibility mode + size_t page_size; // minimum page size + + // IPC is enabled only for the UMF_MEM_MAP_SHARED visibility + bool IPC_enabled; + + critnib *mmaps; // a critnib map storing mmap mappings (addr, size) + + // A critnib map storing (ptr, fd_offset + 1) pairs. We add 1 to fd_offset + // in order to be able to store fd_offset equal 0, because + // critnib_get() returns value or NULL, so a value cannot equal 0. + // It is needed mainly in the get_ipc_handle and open_ipc_handle hooks + // to mmap a specific part of a file. + critnib *fd_offset_map; +} file_memory_provider_t; + +// File Memory Provider settings struct +typedef struct umf_file_memory_provider_params_t { + char *path; + unsigned protection; + umf_memory_visibility_t visibility; +} umf_file_memory_provider_params_t; + +typedef struct file_last_native_error_t { + int32_t native_error; + int errno_value; + char msg_buff[TLS_MSG_BUF_LEN]; +} file_last_native_error_t; + +static __TLS file_last_native_error_t TLS_last_native_error; + +// helper values used only in the Native_error_str array +#define _UMF_FILE_RESULT_SUCCESS \ + (UMF_FILE_RESULT_SUCCESS - UMF_FILE_RESULT_SUCCESS) +#define _UMF_FILE_RESULT_ERROR_ALLOC_FAILED \ + (UMF_FILE_RESULT_ERROR_ALLOC_FAILED - UMF_FILE_RESULT_SUCCESS) +#define _UMF_FILE_RESULT_ERROR_FREE_FAILED \ + (UMF_FILE_RESULT_ERROR_FREE_FAILED - UMF_FILE_RESULT_SUCCESS) +#define _UMF_FILE_RESULT_ERROR_PURGE_FORCE_FAILED \ + (UMF_FILE_RESULT_ERROR_PURGE_FORCE_FAILED - UMF_FILE_RESULT_SUCCESS) + +static const char *Native_error_str[] = { + [_UMF_FILE_RESULT_SUCCESS] = "success", + [_UMF_FILE_RESULT_ERROR_ALLOC_FAILED] = "memory allocation failed", + [_UMF_FILE_RESULT_ERROR_FREE_FAILED] = "memory deallocation failed", + [_UMF_FILE_RESULT_ERROR_PURGE_FORCE_FAILED] = "force purging failed", +}; + +static void file_store_last_native_error(int32_t native_error, + int errno_value) { + TLS_last_native_error.native_error = native_error; + TLS_last_native_error.errno_value = errno_value; +} + +static umf_result_t +file_translate_params(umf_file_memory_provider_params_t *in_params, + file_memory_provider_t *provider) { + umf_result_t result; + + result = utils_translate_mem_protection_flags(in_params->protection, + &provider->protection); + if (result != UMF_RESULT_SUCCESS) { + LOG_ERR("incorrect memory protection flags: %u", in_params->protection); + return result; + } + + result = utils_translate_mem_visibility_flag(in_params->visibility, + &provider->visibility); + if (result != UMF_RESULT_SUCCESS) { + LOG_ERR("incorrect memory visibility flag: %u", in_params->visibility); + return result; + } + + // IPC is enabled only for the UMF_MEM_MAP_SHARED visibility + provider->IPC_enabled = (in_params->visibility == UMF_MEM_MAP_SHARED); + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t file_initialize(void *params, void **provider) { + umf_result_t ret; + + if (params == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_file_memory_provider_params_t *in_params = + (umf_file_memory_provider_params_t *)params; + + if (in_params->path == NULL) { + LOG_ERR("file path is missing"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + file_memory_provider_t *file_provider = + umf_ba_global_alloc(sizeof(*file_provider)); + if (!file_provider) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + memset(file_provider, 0, sizeof(*file_provider)); + + ret = file_translate_params(in_params, file_provider); + if (ret != UMF_RESULT_SUCCESS) { + goto err_free_file_provider; + } + + if (utils_copy_path(in_params->path, file_provider->path, PATH_MAX)) { + goto err_free_file_provider; + } + + file_provider->fd = utils_file_open_or_create(in_params->path); + if (file_provider->fd == -1) { + LOG_ERR("cannot open the file: %s", in_params->path); + ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_free_file_provider; + } + + if (utils_set_file_size(file_provider->fd, FSDAX_PAGE_SIZE_2MB)) { + LOG_ERR("cannot set size of the file: %s", in_params->path); + ret = UMF_RESULT_ERROR_UNKNOWN; + goto err_close_fd; + } + + file_provider->size_fd = FSDAX_PAGE_SIZE_2MB; + + LOG_DEBUG("size of the file %s is: %zu", in_params->path, + file_provider->size_fd); + + if (!(in_params->visibility & UMF_MEM_MAP_PRIVATE)) { + // check if file is located on FSDAX + void *addr = utils_mmap_file( + NULL, file_provider->size_fd, file_provider->protection, + file_provider->visibility, file_provider->fd, 0, + &file_provider->is_fsdax); + if (addr) { + utils_munmap(addr, file_provider->size_fd); + } + } + + if (file_provider->is_fsdax) { + file_provider->page_size = FSDAX_PAGE_SIZE_2MB; + } else { + file_provider->page_size = utils_get_page_size(); + } + + if (utils_mutex_init(&file_provider->lock) == NULL) { + LOG_ERR("lock init failed"); + ret = UMF_RESULT_ERROR_UNKNOWN; + goto err_close_fd; + } + + file_provider->fd_offset_map = critnib_new(); + if (!file_provider->fd_offset_map) { + LOG_ERR("creating the map of file descriptor offsets failed"); + ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + goto err_mutex_destroy_not_free; + } + + file_provider->mmaps = critnib_new(); + if (!file_provider->mmaps) { + LOG_ERR("creating the map of memory mappings failed"); + ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + goto err_delete_fd_offset_map; + } + + *provider = file_provider; + + return UMF_RESULT_SUCCESS; + +err_delete_fd_offset_map: + critnib_delete(file_provider->fd_offset_map); +err_mutex_destroy_not_free: + utils_mutex_destroy_not_free(&file_provider->lock); +err_close_fd: + utils_close_fd(file_provider->fd); +err_free_file_provider: + umf_ba_global_free(file_provider); + return ret; +} + +static void file_finalize(void *provider) { + file_memory_provider_t *file_provider = provider; + + uintptr_t key = 0; + uintptr_t rkey = 0; + void *rvalue = NULL; + while (1 == + critnib_find(file_provider->mmaps, key, FIND_G, &rkey, &rvalue)) { + utils_munmap((void *)rkey, (size_t)rvalue); + critnib_remove(file_provider->mmaps, rkey); + key = rkey; + } + + utils_mutex_destroy_not_free(&file_provider->lock); + utils_close_fd(file_provider->fd); + critnib_delete(file_provider->fd_offset_map); + critnib_delete(file_provider->mmaps); + umf_ba_global_free(file_provider); +} + +static umf_result_t file_mmap_aligned(file_memory_provider_t *file_provider, + size_t size, size_t alignment) { + int prot = file_provider->protection; + int flag = file_provider->visibility; + int fd = file_provider->fd; + size_t size_fd = file_provider->size_fd; + size_t offset_fd = file_provider->offset_fd; + size_t page_size = file_provider->page_size; + + assert(fd > 0); + + // We have to increase size by alignment to be able to "cut out" + // the correctly aligned part of the memory + size_t extended_size = size + alignment; + if (extended_size < size) { + LOG_ERR("invalid size of allocation"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; // arithmetic overflow + } + + size_t rest = extended_size & (page_size - 1); + if (rest) { + extended_size += page_size - rest; + } + if (extended_size < size) { + LOG_ERR("invalid size of allocation"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; // arithmetic overflow + } + + // offset_fd has to be also page-aligned since it is the offset of mmap() + size_t aligned_offset_fd = offset_fd; + rest = aligned_offset_fd & (page_size - 1); + if (rest) { + aligned_offset_fd += page_size - rest; + } + if (aligned_offset_fd < offset_fd) { + LOG_ERR("arithmetic overflow of file offset"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; // arithmetic overflow + } + + if (aligned_offset_fd + extended_size > size_fd) { + size_t new_size_fd = aligned_offset_fd + extended_size; + if (utils_fallocate(fd, size_fd, new_size_fd - size_fd)) { + LOG_ERR("cannot grow the file size from %zu to %zu", size_fd, + new_size_fd); + return UMF_RESULT_ERROR_UNKNOWN; + } + + LOG_DEBUG("file size grown from %zu to %zu", size_fd, new_size_fd); + file_provider->size_fd = new_size_fd; + } + + if (aligned_offset_fd > offset_fd) { + file_provider->offset_fd = aligned_offset_fd; + } + + ASSERT_IS_ALIGNED(extended_size, page_size); + ASSERT_IS_ALIGNED(aligned_offset_fd, page_size); + + void *ptr = utils_mmap_file(NULL, extended_size, prot, flag, fd, + aligned_offset_fd, NULL); + if (ptr == NULL) { + LOG_PERR("memory mapping failed"); + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + int ret = critnib_insert(file_provider->mmaps, (uintptr_t)ptr, + (void *)(uintptr_t)extended_size, 0 /* update */); + if (ret) { + LOG_ERR("inserting a value to the map of memory mapping failed " + "(addr=%p, size=%zu)", + ptr, extended_size); + } + + LOG_DEBUG( + "inserted a value to the map of memory mapping (addr=%p, size=%zu)", + ptr, extended_size); + + file_provider->base_mmap = ptr; + file_provider->size_mmap = extended_size; + file_provider->offset_mmap = 0; + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t file_alloc_aligned(file_memory_provider_t *file_provider, + size_t size, size_t alignment, + void **out_addr, + size_t *alloc_offset_fd) { + assert(alloc_offset_fd); + assert(out_addr); + + umf_result_t umf_result; + + if (utils_mutex_lock(&file_provider->lock)) { + LOG_ERR("locking file data failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + if (file_provider->size_mmap - file_provider->offset_mmap < size) { + umf_result = file_mmap_aligned(file_provider, size, alignment); + if (umf_result != UMF_RESULT_SUCCESS) { + utils_mutex_unlock(&file_provider->lock); + return umf_result; + } + } + + void *base_mmap = file_provider->base_mmap; + assert(base_mmap); + + uintptr_t new_aligned_ptr = + (uintptr_t)base_mmap + file_provider->offset_mmap; + if (alignment) { + uintptr_t rest = new_aligned_ptr & (alignment - 1); + if (rest) { + new_aligned_ptr += alignment - rest; + } + ASSERT_IS_ALIGNED(new_aligned_ptr, alignment); + } + + size_t new_offset_mmap = new_aligned_ptr - (uintptr_t)base_mmap; + size_t new_offset_fd = + file_provider->offset_fd + new_offset_mmap - file_provider->offset_mmap; + + if (file_provider->size_mmap - new_offset_mmap < size) { + umf_result = file_mmap_aligned(file_provider, size, alignment); + if (umf_result != UMF_RESULT_SUCCESS) { + utils_mutex_unlock(&file_provider->lock); + return umf_result; + } + + assert(file_provider->base_mmap); + + // file_provider-> base_mmap, offset_mmap, offset_fd + // were updated by file_mmap_aligned(): + new_aligned_ptr = (uintptr_t)file_provider->base_mmap; + new_offset_mmap = 0; // == file_provider->offset_mmap + new_offset_fd = file_provider->offset_fd; + + ASSERT_IS_ALIGNED(new_aligned_ptr, alignment); + } + + *alloc_offset_fd = new_offset_fd; + + file_provider->offset_fd = new_offset_fd + size; + file_provider->offset_mmap = new_offset_mmap + size; + + *out_addr = (void *)new_aligned_ptr; + + utils_mutex_unlock(&file_provider->lock); + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t file_alloc(void *provider, size_t size, size_t alignment, + void **resultPtr) { + umf_result_t umf_result; + int ret; + + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + + // alignment must be a power of two and a multiple or a divider of the page size + if (alignment && ((alignment & (alignment - 1)) || + ((alignment % file_provider->page_size) && + (file_provider->page_size % alignment)))) { + LOG_ERR("wrong alignment: %zu (not a power of 2 or a multiple or a " + "divider of the page size (%zu))", + alignment, file_provider->page_size); + return UMF_RESULT_ERROR_INVALID_ALIGNMENT; + } + + if (IS_NOT_ALIGNED(alignment, file_provider->page_size)) { + alignment = ALIGN_UP(alignment, file_provider->page_size); + } + + void *addr = NULL; + size_t alloc_offset_fd; // needed for critnib_insert() + umf_result = file_alloc_aligned(file_provider, size, alignment, &addr, + &alloc_offset_fd); + if (umf_result != UMF_RESULT_SUCCESS) { + file_store_last_native_error(UMF_FILE_RESULT_ERROR_ALLOC_FAILED, 0); + LOG_ERR("memory allocation failed"); + return umf_result; + } + + // store (offset_fd + 1) to be able to store offset_fd == 0 + ret = critnib_insert(file_provider->fd_offset_map, (uintptr_t)addr, + (void *)(uintptr_t)(alloc_offset_fd + 1), + 0 /* update */); + if (ret) { + LOG_ERR("inserting a value to the file descriptor offset map failed " + "(addr=%p, offset=%zu)", + addr, alloc_offset_fd); + } + + *resultPtr = addr; + + return UMF_RESULT_SUCCESS; +} + +static void file_get_last_native_error(void *provider, const char **ppMessage, + int32_t *pError) { + (void)provider; // unused + + if (ppMessage == NULL || pError == NULL) { + assert(0); + return; + } + + *pError = TLS_last_native_error.native_error; + if (TLS_last_native_error.errno_value == 0) { + *ppMessage = Native_error_str[*pError - UMF_FILE_RESULT_SUCCESS]; + return; + } + + const char *msg; + size_t len; + size_t pos = 0; + + msg = Native_error_str[*pError - UMF_FILE_RESULT_SUCCESS]; + len = strlen(msg); + memcpy(TLS_last_native_error.msg_buff + pos, msg, len + 1); + pos += len; + + msg = ": "; + len = strlen(msg); + memcpy(TLS_last_native_error.msg_buff + pos, msg, len + 1); + pos += len; + + utils_strerror(TLS_last_native_error.errno_value, + TLS_last_native_error.msg_buff + pos, TLS_MSG_BUF_LEN - pos); + + *ppMessage = TLS_last_native_error.msg_buff; +} + +static umf_result_t file_get_recommended_page_size(void *provider, size_t size, + size_t *page_size) { + (void)provider; // unused + (void)size; // unused + + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + *page_size = file_provider->page_size; + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t file_get_min_page_size(void *provider, void *ptr, + size_t *page_size) { + (void)ptr; // unused + + return file_get_recommended_page_size(provider, 0, page_size); +} + +static umf_result_t file_purge_lazy(void *provider, void *ptr, size_t size) { + (void)provider; // unused + (void)ptr; // unused + (void)size; // unused + // purge_lazy is unsupported in case of the file memory provider, + // because the MADV_FREE operation can be applied + // only to private anonymous pages (see madvise(2)). + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +static umf_result_t file_purge_force(void *provider, void *ptr, size_t size) { + (void)provider; // unused + + errno = 0; + if (utils_purge(ptr, size, UMF_PURGE_FORCE)) { + file_store_last_native_error(UMF_FILE_RESULT_ERROR_PURGE_FORCE_FAILED, + errno); + LOG_PERR("force purging failed"); + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + return UMF_RESULT_SUCCESS; +} + +static const char *file_get_name(void *provider) { + (void)provider; // unused + return "FILE"; +} + +// This function is supposed to be thread-safe, so it should NOT be called concurrently +// with file_allocation_merge() with the same pointer. +static umf_result_t file_allocation_split(void *provider, void *ptr, + size_t totalSize, size_t firstSize) { + (void)totalSize; + + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + if (file_provider->fd <= 0) { + return UMF_RESULT_SUCCESS; + } + + void *value = critnib_get(file_provider->fd_offset_map, (uintptr_t)ptr); + if (value == NULL) { + LOG_ERR("file_allocation_split(): getting a value from the file " + "descriptor offset map failed (addr=%p)", + ptr); + return UMF_RESULT_ERROR_UNKNOWN; + } + + uintptr_t new_key = (uintptr_t)ptr + firstSize; + void *new_value = (void *)((uintptr_t)value + firstSize); + int ret = critnib_insert(file_provider->fd_offset_map, new_key, new_value, + 0 /* update */); + if (ret) { + LOG_ERR("file_allocation_split(): inserting a value to the file " + "descriptor offset map failed (addr=%p, offset=%zu)", + (void *)new_key, (size_t)new_value - 1); + return UMF_RESULT_ERROR_UNKNOWN; + } + + return UMF_RESULT_SUCCESS; +} + +// It should NOT be called concurrently with file_allocation_split() with the same pointer. +static umf_result_t file_allocation_merge(void *provider, void *lowPtr, + void *highPtr, size_t totalSize) { + (void)lowPtr; + (void)totalSize; + + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + if (file_provider->fd <= 0) { + return UMF_RESULT_SUCCESS; + } + + void *value = + critnib_remove(file_provider->fd_offset_map, (uintptr_t)highPtr); + if (value == NULL) { + LOG_ERR("file_allocation_merge(): removing a value from the file " + "descriptor offset map failed (addr=%p)", + highPtr); + return UMF_RESULT_ERROR_UNKNOWN; + } + + return UMF_RESULT_SUCCESS; +} + +typedef struct file_ipc_data_t { + char path[PATH_MAX]; + size_t offset_fd; + size_t size; + unsigned protection; // combination of OS-specific protection flags + unsigned visibility; // memory visibility mode +} file_ipc_data_t; + +static umf_result_t file_get_ipc_handle_size(void *provider, size_t *size) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + if (!file_provider->IPC_enabled) { + LOG_ERR("memory visibility mode is not UMF_MEM_MAP_SHARED") + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + *size = sizeof(file_ipc_data_t); + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t file_get_ipc_handle(void *provider, const void *ptr, + size_t size, void *providerIpcData) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + if (!file_provider->IPC_enabled) { + LOG_ERR("memory visibility mode is not UMF_MEM_MAP_SHARED") + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + void *value = critnib_get(file_provider->fd_offset_map, (uintptr_t)ptr); + if (value == NULL) { + LOG_ERR("file_get_ipc_handle(): getting a value from the IPC cache " + "failed (addr=%p)", + ptr); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + file_ipc_data_t *file_ipc_data = (file_ipc_data_t *)providerIpcData; + file_ipc_data->offset_fd = (size_t)value - 1; + file_ipc_data->size = size; + strncpy(file_ipc_data->path, file_provider->path, PATH_MAX - 1); + file_ipc_data->path[PATH_MAX - 1] = '\0'; + file_ipc_data->protection = file_provider->protection; + file_ipc_data->visibility = file_provider->visibility; + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t file_put_ipc_handle(void *provider, void *providerIpcData) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + if (!file_provider->IPC_enabled) { + LOG_ERR("memory visibility mode is not UMF_MEM_MAP_SHARED") + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + file_ipc_data_t *file_ipc_data = (file_ipc_data_t *)providerIpcData; + + if (strncmp(file_ipc_data->path, file_provider->path, PATH_MAX)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t file_open_ipc_handle(void *provider, void *providerIpcData, + void **ptr) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + if (!file_provider->IPC_enabled) { + LOG_ERR("memory visibility mode is not UMF_MEM_MAP_SHARED") + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + file_ipc_data_t *file_ipc_data = (file_ipc_data_t *)providerIpcData; + umf_result_t ret = UMF_RESULT_SUCCESS; + int fd; + + size_t offset_aligned = file_ipc_data->offset_fd; + size_t size_aligned = file_ipc_data->size; + + if (file_provider->is_fsdax) { + // It is just a workaround for case when + // file_alloc() was called with the size argument + // that is not a multiplier of FSDAX_PAGE_SIZE_2MB. + utils_align_ptr_down_size_up((void **)&offset_aligned, &size_aligned, + FSDAX_PAGE_SIZE_2MB); + } + + fd = utils_file_open(file_ipc_data->path); + if (fd == -1) { + LOG_PERR("opening the file to be mapped (%s) failed", + file_ipc_data->path); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + char *addr = + utils_mmap_file(NULL, size_aligned, file_ipc_data->protection, + file_ipc_data->visibility, fd, offset_aligned, NULL); + (void)utils_close_fd(fd); + if (addr == NULL) { + file_store_last_native_error(UMF_FILE_RESULT_ERROR_ALLOC_FAILED, errno); + LOG_PERR("file mapping failed (path: %s, size: %zu, protection: %u, " + "visibility: %u, fd: %i, offset: %zu)", + file_ipc_data->path, size_aligned, file_ipc_data->protection, + file_ipc_data->visibility, fd, offset_aligned); + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + LOG_DEBUG("file mapped (path: %s, size: %zu, protection: %u, visibility: " + "%u, fd: %i, offset: %zu) at address %p", + file_ipc_data->path, size_aligned, file_ipc_data->protection, + file_ipc_data->visibility, fd, offset_aligned, (void *)addr); + + *ptr = addr; + + return ret; +} + +static umf_result_t file_close_ipc_handle(void *provider, void *ptr, + size_t size) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + if (!file_provider->IPC_enabled) { + LOG_ERR("memory visibility mode is not UMF_MEM_MAP_SHARED") + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (file_provider->is_fsdax) { + // It is just a workaround for case when + // file_alloc() was called with the size argument + // that is not a multiplier of FSDAX_PAGE_SIZE_2MB. + utils_align_ptr_down_size_up(&ptr, &size, FSDAX_PAGE_SIZE_2MB); + } + + errno = 0; + int ret = utils_munmap(ptr, size); + // ignore error when size == 0 + if (ret && (size > 0)) { + file_store_last_native_error(UMF_FILE_RESULT_ERROR_FREE_FAILED, errno); + LOG_PERR("memory unmapping failed"); + + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + return UMF_RESULT_SUCCESS; +} + +static umf_memory_provider_ops_t UMF_FILE_MEMORY_PROVIDER_OPS = { + .version = UMF_VERSION_CURRENT, + .initialize = file_initialize, + .finalize = file_finalize, + .alloc = file_alloc, + .get_last_native_error = file_get_last_native_error, + .get_recommended_page_size = file_get_recommended_page_size, + .get_min_page_size = file_get_min_page_size, + .get_name = file_get_name, + .ext.purge_lazy = file_purge_lazy, + .ext.purge_force = file_purge_force, + .ext.allocation_merge = file_allocation_merge, + .ext.allocation_split = file_allocation_split, + .ipc.get_ipc_handle_size = file_get_ipc_handle_size, + .ipc.get_ipc_handle = file_get_ipc_handle, + .ipc.put_ipc_handle = file_put_ipc_handle, + .ipc.open_ipc_handle = file_open_ipc_handle, + .ipc.close_ipc_handle = file_close_ipc_handle}; + +umf_memory_provider_ops_t *umfFileMemoryProviderOps(void) { + return &UMF_FILE_MEMORY_PROVIDER_OPS; +} + +umf_result_t umfFileMemoryProviderParamsCreate( + umf_file_memory_provider_params_handle_t *hParams, const char *path) { + libumfInit(); + if (hParams == NULL) { + LOG_ERR("File Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (path == NULL) { + LOG_ERR("File path is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_file_memory_provider_params_handle_t params = + umf_ba_global_alloc(sizeof(*params)); + if (params == NULL) { + LOG_ERR("allocating memory for File Memory Provider params failed"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + params->path = NULL; + params->protection = UMF_PROTECTION_READ | UMF_PROTECTION_WRITE; + params->visibility = UMF_MEM_MAP_PRIVATE; + + umf_result_t res = umfFileMemoryProviderParamsSetPath(params, path); + if (res != UMF_RESULT_SUCCESS) { + umf_ba_global_free(params); + return res; + } + + *hParams = params; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfFileMemoryProviderParamsDestroy( + umf_file_memory_provider_params_handle_t hParams) { + if (hParams != NULL) { + umf_ba_global_free(hParams->path); + umf_ba_global_free(hParams); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfFileMemoryProviderParamsSetPath( + umf_file_memory_provider_params_handle_t hParams, const char *path) { + if (hParams == NULL) { + LOG_ERR("File Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (path == NULL) { + LOG_ERR("File path is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + size_t len = strlen(path); + if (len == 0) { + LOG_ERR("File path is empty"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + len += 1; // for the null terminator + char *new_path = NULL; + new_path = umf_ba_global_alloc(len); + if (new_path == NULL) { + LOG_ERR("allocating memory for the file path failed"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + strncpy(new_path, path, len); + + umf_ba_global_free(hParams->path); + hParams->path = new_path; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfFileMemoryProviderParamsSetProtection( + umf_file_memory_provider_params_handle_t hParams, unsigned protection) { + if (hParams == NULL) { + LOG_ERR("File Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->protection = protection; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfFileMemoryProviderParamsSetVisibility( + umf_file_memory_provider_params_handle_t hParams, + umf_memory_visibility_t visibility) { + if (hParams == NULL) { + LOG_ERR("File Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->visibility = visibility; + + return UMF_RESULT_SUCCESS; +} + +#endif // !defined(_WIN32) && !defined(UMF_NO_HWLOC) diff --git a/src/provider/provider_level_zero.c b/src/provider/provider_level_zero.c index 3f7340556..f4a3e97c2 100644 --- a/src/provider/provider_level_zero.c +++ b/src/provider/provider_level_zero.c @@ -14,7 +14,62 @@ #include #include +#if defined(UMF_NO_LEVEL_ZERO_PROVIDER) + +umf_result_t umfLevelZeroMemoryProviderParamsCreate( + umf_level_zero_memory_provider_params_handle_t *hParams) { + (void)hParams; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfLevelZeroMemoryProviderParamsDestroy( + umf_level_zero_memory_provider_params_handle_t hParams) { + (void)hParams; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfLevelZeroMemoryProviderParamsSetContext( + umf_level_zero_memory_provider_params_handle_t hParams, + ze_context_handle_t hContext) { + (void)hParams; + (void)hContext; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfLevelZeroMemoryProviderParamsSetDevice( + umf_level_zero_memory_provider_params_handle_t hParams, + ze_device_handle_t hDevice) { + (void)hParams; + (void)hDevice; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfLevelZeroMemoryProviderParamsSetMemoryType( + umf_level_zero_memory_provider_params_handle_t hParams, + umf_usm_memory_type_t memoryType) { + (void)hParams; + (void)memoryType; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( + umf_level_zero_memory_provider_params_handle_t hParams, + ze_device_handle_t *hDevices, uint32_t deviceCount) { + (void)hParams; + (void)hDevices; + (void)deviceCount; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_memory_provider_ops_t *umfLevelZeroMemoryProviderOps(void) { + // not supported + return NULL; +} + +#else // !defined(UMF_NO_LEVEL_ZERO_PROVIDER) + #include "base_alloc_global.h" +#include "libumf.h" #include "utils_assert.h" #include "utils_common.h" #include "utils_concurrency.h" @@ -23,10 +78,30 @@ #include "utils_sanitizers.h" #include "ze_api.h" +// Level Zero Memory Provider settings struct +typedef struct umf_level_zero_memory_provider_params_t { + ze_context_handle_t + level_zero_context_handle; ///< Handle to the Level Zero context + ze_device_handle_t + level_zero_device_handle; ///< Handle to the Level Zero device + + umf_usm_memory_type_t memory_type; ///< Allocation memory type + + ze_device_handle_t * + resident_device_handles; ///< Array of devices for which the memory should be made resident + uint32_t + resident_device_count; ///< Number of devices for which the memory should be made resident +} umf_level_zero_memory_provider_params_t; + typedef struct ze_memory_provider_t { ze_context_handle_t context; ze_device_handle_t device; ze_memory_type_t memory_type; + + ze_device_handle_t *resident_device_handles; + uint32_t resident_device_count; + + ze_device_properties_t device_properties; } ze_memory_provider_t; typedef struct ze_ops_t { @@ -48,11 +123,35 @@ typedef struct ze_ops_t { ze_ipc_mem_handle_t, ze_ipc_memory_flags_t, void **); ze_result_t (*zeMemCloseIpcHandle)(ze_context_handle_t, void *); + ze_result_t (*zeContextMakeMemoryResident)(ze_context_handle_t, + ze_device_handle_t, void *, + size_t); + ze_result_t (*zeDeviceGetProperties)(ze_device_handle_t, + ze_device_properties_t *); } ze_ops_t; static ze_ops_t g_ze_ops; static UTIL_ONCE_FLAG ze_is_initialized = UTIL_ONCE_FLAG_INIT; static bool Init_ze_global_state_failed; +static __TLS ze_result_t TLS_last_native_error; + +static void store_last_native_error(int32_t native_error) { + TLS_last_native_error = native_error; +} + +static umf_result_t ze2umf_result(ze_result_t result) { + switch (result) { + case ZE_RESULT_SUCCESS: + return UMF_RESULT_SUCCESS; + case ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY: + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + case ZE_RESULT_ERROR_INVALID_ARGUMENT: + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + default: + store_last_native_error(result); + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } +} static void init_ze_global_state(void) { #ifdef _WIN32 @@ -63,26 +162,32 @@ static void init_ze_global_state(void) { // check if Level Zero shared library is already loaded // we pass 0 as a handle to search the global symbol table *(void **)&g_ze_ops.zeMemAllocHost = - util_get_symbol_addr(0, "zeMemAllocHost", lib_name); + utils_get_symbol_addr(0, "zeMemAllocHost", lib_name); *(void **)&g_ze_ops.zeMemAllocDevice = - util_get_symbol_addr(0, "zeMemAllocDevice", lib_name); + utils_get_symbol_addr(0, "zeMemAllocDevice", lib_name); *(void **)&g_ze_ops.zeMemAllocShared = - util_get_symbol_addr(0, "zeMemAllocShared", lib_name); + utils_get_symbol_addr(0, "zeMemAllocShared", lib_name); *(void **)&g_ze_ops.zeMemFree = - util_get_symbol_addr(0, "zeMemFree", lib_name); + utils_get_symbol_addr(0, "zeMemFree", lib_name); *(void **)&g_ze_ops.zeMemGetIpcHandle = - util_get_symbol_addr(0, "zeMemGetIpcHandle", lib_name); + utils_get_symbol_addr(0, "zeMemGetIpcHandle", lib_name); *(void **)&g_ze_ops.zeMemPutIpcHandle = - util_get_symbol_addr(0, "zeMemPutIpcHandle", lib_name); + utils_get_symbol_addr(0, "zeMemPutIpcHandle", lib_name); *(void **)&g_ze_ops.zeMemOpenIpcHandle = - util_get_symbol_addr(0, "zeMemOpenIpcHandle", lib_name); + utils_get_symbol_addr(0, "zeMemOpenIpcHandle", lib_name); *(void **)&g_ze_ops.zeMemCloseIpcHandle = - util_get_symbol_addr(0, "zeMemCloseIpcHandle", lib_name); + utils_get_symbol_addr(0, "zeMemCloseIpcHandle", lib_name); + *(void **)&g_ze_ops.zeContextMakeMemoryResident = + utils_get_symbol_addr(0, "zeContextMakeMemoryResident", lib_name); + *(void **)&g_ze_ops.zeDeviceGetProperties = + utils_get_symbol_addr(0, "zeDeviceGetProperties", lib_name); if (!g_ze_ops.zeMemAllocHost || !g_ze_ops.zeMemAllocDevice || !g_ze_ops.zeMemAllocShared || !g_ze_ops.zeMemFree || !g_ze_ops.zeMemGetIpcHandle || !g_ze_ops.zeMemOpenIpcHandle || - !g_ze_ops.zeMemCloseIpcHandle) { + !g_ze_ops.zeMemCloseIpcHandle || + !g_ze_ops.zeContextMakeMemoryResident || + !g_ze_ops.zeDeviceGetProperties) { // g_ze_ops.zeMemPutIpcHandle can be NULL because it was introduced // starting from Level Zero 1.6 LOG_ERR("Required Level Zero symbols not found."); @@ -90,15 +195,131 @@ static void init_ze_global_state(void) { } } -umf_result_t ze_memory_provider_initialize(void *params, void **provider) { - if (provider == NULL || params == NULL) { +umf_result_t umfLevelZeroMemoryProviderParamsCreate( + umf_level_zero_memory_provider_params_handle_t *hParams) { + libumfInit(); + if (!hParams) { + LOG_ERR("Level zero memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_level_zero_memory_provider_params_t *params = + umf_ba_global_alloc(sizeof(umf_level_zero_memory_provider_params_t)); + if (!params) { + LOG_ERR("Cannot allocate memory for Level Zero memory provider params"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + // Assign default values + params->level_zero_context_handle = NULL; + params->level_zero_device_handle = NULL; + params->memory_type = UMF_MEMORY_TYPE_UNKNOWN; + params->resident_device_handles = NULL; + params->resident_device_count = 0; + + *hParams = params; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfLevelZeroMemoryProviderParamsDestroy( + umf_level_zero_memory_provider_params_handle_t hParams) { + umf_ba_global_free(hParams); + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfLevelZeroMemoryProviderParamsSetContext( + umf_level_zero_memory_provider_params_handle_t hParams, + ze_context_handle_t hContext) { + if (!hParams) { + LOG_ERR("Level zero memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (!hContext) { + LOG_ERR("Level zero context handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->level_zero_context_handle = hContext; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfLevelZeroMemoryProviderParamsSetDevice( + umf_level_zero_memory_provider_params_handle_t hParams, + ze_device_handle_t hDevice) { + if (!hParams) { + LOG_ERR("Level zero memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->level_zero_device_handle = hDevice; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfLevelZeroMemoryProviderParamsSetMemoryType( + umf_level_zero_memory_provider_params_handle_t hParams, + umf_usm_memory_type_t memoryType) { + if (!hParams) { + LOG_ERR("Level zero memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->memory_type = memoryType; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( + umf_level_zero_memory_provider_params_handle_t hParams, + ze_device_handle_t *hDevices, uint32_t deviceCount) { + if (!hParams) { + LOG_ERR("Level zero memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (deviceCount && !hDevices) { + LOG_ERR("Resident devices array is NULL, but deviceCount is not zero"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->resident_device_handles = hDevices; + hParams->resident_device_count = deviceCount; + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t ze_memory_provider_initialize(void *params, + void **provider) { + if (params == NULL) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - level_zero_memory_provider_params_t *ze_params = - (level_zero_memory_provider_params_t *)params; + umf_level_zero_memory_provider_params_handle_t ze_params = + (umf_level_zero_memory_provider_params_handle_t)params; - util_init_once(&ze_is_initialized, init_ze_global_state); + if (!ze_params->level_zero_context_handle) { + LOG_ERR("Level Zero context handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if ((ze_params->memory_type == UMF_MEMORY_TYPE_HOST) == + (ze_params->level_zero_device_handle != NULL)) { + LOG_ERR("Level Zero device handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if ((bool)ze_params->resident_device_count && + (ze_params->resident_device_handles == NULL)) { + LOG_ERR("Resident devices handles array is NULL, but device_count is " + "not zero"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + utils_init_once(&ze_is_initialized, init_ze_global_state); if (Init_ze_global_state_failed) { LOG_ERR("Loading Level Zero symbols failed"); return UMF_RESULT_ERROR_UNKNOWN; @@ -107,6 +328,7 @@ umf_result_t ze_memory_provider_initialize(void *params, void **provider) { ze_memory_provider_t *ze_provider = umf_ba_global_alloc(sizeof(ze_memory_provider_t)); if (!ze_provider) { + LOG_ERR("Cannot allocate memory for Level Zero Memory Provider"); return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } @@ -114,28 +336,69 @@ umf_result_t ze_memory_provider_initialize(void *params, void **provider) { ze_provider->device = ze_params->level_zero_device_handle; ze_provider->memory_type = (ze_memory_type_t)ze_params->memory_type; + if (ze_provider->device) { + umf_result_t ret = ze2umf_result(g_ze_ops.zeDeviceGetProperties( + ze_provider->device, &ze_provider->device_properties)); + + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("Cannot get device properties"); + umf_ba_global_free(ze_provider); + return ret; + } + } else { + memset(&ze_provider->device_properties, 0, + sizeof(ze_provider->device_properties)); + } + + if (ze_params->resident_device_count) { + ze_provider->resident_device_handles = umf_ba_global_alloc( + sizeof(ze_device_handle_t) * ze_params->resident_device_count); + if (!ze_provider->resident_device_handles) { + LOG_ERR("Cannot allocate memory for resident devices"); + umf_ba_global_free(ze_provider); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + ze_provider->resident_device_count = ze_params->resident_device_count; + + for (uint32_t i = 0; i < ze_provider->resident_device_count; i++) { + ze_provider->resident_device_handles[i] = + ze_params->resident_device_handles[i]; + } + } else { + ze_provider->resident_device_handles = NULL; + ze_provider->resident_device_count = 0; + } + *provider = ze_provider; return UMF_RESULT_SUCCESS; } -void ze_memory_provider_finalize(void *provider) { - assert(provider); +static void ze_memory_provider_finalize(void *provider) { + ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)provider; + umf_ba_global_free(ze_provider->resident_device_handles); - util_init_once(&ze_is_initialized, init_ze_global_state); umf_ba_global_free(provider); +} + +static bool use_relaxed_allocation(ze_memory_provider_t *ze_provider, + size_t size) { + assert(ze_provider); + assert(ze_provider->device); + assert(ze_provider->device_properties.maxMemAllocSize > 0); - // portable version of "ze_is_initialized = UTIL_ONCE_FLAG_INIT;" - static UTIL_ONCE_FLAG is_initialized = UTIL_ONCE_FLAG_INIT; - memcpy(&ze_is_initialized, &is_initialized, sizeof(ze_is_initialized)); + return size > ze_provider->device_properties.maxMemAllocSize; } +static ze_relaxed_allocation_limits_exp_desc_t relaxed_device_allocation_desc = + {.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC, + .pNext = NULL, + .flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE}; + static umf_result_t ze_memory_provider_alloc(void *provider, size_t size, size_t alignment, void **resultPtr) { - assert(provider); - assert(resultPtr); - ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)provider; ze_result_t ze_result = ZE_RESULT_SUCCESS; @@ -151,8 +414,10 @@ static umf_result_t ze_memory_provider_alloc(void *provider, size_t size, } case UMF_MEMORY_TYPE_DEVICE: { ze_device_mem_alloc_desc_t dev_desc = { - .stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC, - .pNext = NULL, + .stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC, + .pNext = use_relaxed_allocation(ze_provider, size) + ? &relaxed_device_allocation_desc + : NULL, .flags = 0, .ordinal = 0 // TODO }; @@ -168,7 +433,9 @@ static umf_result_t ze_memory_provider_alloc(void *provider, size_t size, .flags = 0}; ze_device_mem_alloc_desc_t dev_desc = { .stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC, - .pNext = NULL, + .pNext = use_relaxed_allocation(ze_provider, size) + ? &relaxed_device_allocation_desc + : NULL, .flags = 0, .ordinal = 0 // TODO }; @@ -178,38 +445,52 @@ static umf_result_t ze_memory_provider_alloc(void *provider, size_t size, break; } default: - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + // this shouldn't happen as we check the memory_type settings during + // the initialization + LOG_ERR("unsupported USM memory type"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + if (ze_result != ZE_RESULT_SUCCESS) { + return ze2umf_result(ze_result); + } + + for (uint32_t i = 0; i < ze_provider->resident_device_count; i++) { + ze_result = g_ze_ops.zeContextMakeMemoryResident( + ze_provider->context, ze_provider->resident_device_handles[i], + *resultPtr, size); + if (ze_result != ZE_RESULT_SUCCESS) { + return ze2umf_result(ze_result); + } } - // TODO add error reporting - return (ze_result == ZE_RESULT_SUCCESS) - ? UMF_RESULT_SUCCESS - : UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + return ze2umf_result(ze_result); } static umf_result_t ze_memory_provider_free(void *provider, void *ptr, size_t bytes) { (void)bytes; - assert(provider); + if (ptr == NULL) { + return UMF_RESULT_SUCCESS; + } + ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)provider; ze_result_t ze_result = g_ze_ops.zeMemFree(ze_provider->context, ptr); - - // TODO add error reporting - return (ze_result == ZE_RESULT_SUCCESS) - ? UMF_RESULT_SUCCESS - : UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + return ze2umf_result(ze_result); } -void ze_memory_provider_get_last_native_error(void *provider, - const char **ppMessage, - int32_t *pError) { +static void ze_memory_provider_get_last_native_error(void *provider, + const char **ppMessage, + int32_t *pError) { (void)provider; - (void)ppMessage; - // TODO - assert(pError); - *pError = 0; + if (ppMessage == NULL || pError == NULL) { + ASSERT(0); + return; + } + + *pError = TLS_last_native_error; } static umf_result_t ze_memory_provider_get_min_page_size(void *provider, @@ -254,7 +535,7 @@ ze_memory_provider_get_recommended_page_size(void *provider, size_t size, return UMF_RESULT_SUCCESS; } -const char *ze_memory_provider_get_name(void *provider) { +static const char *ze_memory_provider_get_name(void *provider) { (void)provider; return "LEVEL_ZERO"; } @@ -293,7 +574,7 @@ typedef struct ze_ipc_data_t { static umf_result_t ze_memory_provider_get_ipc_handle_size(void *provider, size_t *size) { (void)provider; - ASSERT(size != NULL); + *size = sizeof(ze_ipc_data_t); return UMF_RESULT_SUCCESS; } @@ -302,9 +583,8 @@ static umf_result_t ze_memory_provider_get_ipc_handle(void *provider, const void *ptr, size_t size, void *providerIpcData) { - ASSERT(ptr != NULL); - ASSERT(providerIpcData != NULL); (void)size; + ze_result_t ze_result; ze_ipc_data_t *ze_ipc_data = (ze_ipc_data_t *)providerIpcData; struct ze_memory_provider_t *ze_provider = @@ -314,7 +594,7 @@ static umf_result_t ze_memory_provider_get_ipc_handle(void *provider, &ze_ipc_data->ze_handle); if (ze_result != ZE_RESULT_SUCCESS) { LOG_ERR("zeMemGetIpcHandle() failed."); - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + return ze2umf_result(ze_result); } ze_ipc_data->pid = utils_getpid(); @@ -324,8 +604,6 @@ static umf_result_t ze_memory_provider_get_ipc_handle(void *provider, static umf_result_t ze_memory_provider_put_ipc_handle(void *provider, void *providerIpcData) { - ASSERT(provider != NULL); - ASSERT(providerIpcData != NULL); ze_result_t ze_result; struct ze_memory_provider_t *ze_provider = (struct ze_memory_provider_t *)provider; @@ -342,7 +620,7 @@ static umf_result_t ze_memory_provider_put_ipc_handle(void *provider, ze_ipc_data->ze_handle); if (ze_result != ZE_RESULT_SUCCESS) { LOG_ERR("zeMemPutIpcHandle() failed."); - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + return ze2umf_result(ze_result); } return UMF_RESULT_SUCCESS; } @@ -350,9 +628,6 @@ static umf_result_t ze_memory_provider_put_ipc_handle(void *provider, static umf_result_t ze_memory_provider_open_ipc_handle(void *provider, void *providerIpcData, void **ptr) { - ASSERT(provider != NULL); - ASSERT(providerIpcData != NULL); - ASSERT(ptr != NULL); ze_result_t ze_result; ze_ipc_data_t *ze_ipc_data = (ze_ipc_data_t *)providerIpcData; struct ze_memory_provider_t *ze_provider = @@ -379,7 +654,7 @@ static umf_result_t ze_memory_provider_open_ipc_handle(void *provider, } if (ze_result != ZE_RESULT_SUCCESS) { LOG_ERR("zeMemOpenIpcHandle() failed."); - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + return ze2umf_result(ze_result); } return UMF_RESULT_SUCCESS; @@ -387,9 +662,8 @@ static umf_result_t ze_memory_provider_open_ipc_handle(void *provider, static umf_result_t ze_memory_provider_close_ipc_handle(void *provider, void *ptr, size_t size) { - ASSERT(provider != NULL); - ASSERT(ptr != NULL); (void)size; + ze_result_t ze_result; struct ze_memory_provider_t *ze_provider = (struct ze_memory_provider_t *)provider; @@ -397,7 +671,7 @@ ze_memory_provider_close_ipc_handle(void *provider, void *ptr, size_t size) { ze_result = g_ze_ops.zeMemCloseIpcHandle(ze_provider->context, ptr); if (ze_result != ZE_RESULT_SUCCESS) { LOG_ERR("zeMemCloseIpcHandle() failed."); - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + return ze2umf_result(ze_result); } return UMF_RESULT_SUCCESS; @@ -408,11 +682,11 @@ static struct umf_memory_provider_ops_t UMF_LEVEL_ZERO_MEMORY_PROVIDER_OPS = { .initialize = ze_memory_provider_initialize, .finalize = ze_memory_provider_finalize, .alloc = ze_memory_provider_alloc, - .free = ze_memory_provider_free, .get_last_native_error = ze_memory_provider_get_last_native_error, .get_recommended_page_size = ze_memory_provider_get_recommended_page_size, .get_min_page_size = ze_memory_provider_get_min_page_size, .get_name = ze_memory_provider_get_name, + .ext.free = ze_memory_provider_free, .ext.purge_lazy = ze_memory_provider_purge_lazy, .ext.purge_force = ze_memory_provider_purge_force, .ext.allocation_merge = ze_memory_provider_allocation_merge, @@ -427,3 +701,5 @@ static struct umf_memory_provider_ops_t UMF_LEVEL_ZERO_MEMORY_PROVIDER_OPS = { umf_memory_provider_ops_t *umfLevelZeroMemoryProviderOps(void) { return &UMF_LEVEL_ZERO_MEMORY_PROVIDER_OPS; } + +#endif // !defined(UMF_NO_LEVEL_ZERO_PROVIDER) diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index 00251e53b..4c19944a9 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -13,21 +13,121 @@ #include #include +#include +#include +#include + +// OS Memory Provider requires HWLOC +#if defined(UMF_NO_HWLOC) + +umf_memory_provider_ops_t *umfOsMemoryProviderOps(void) { return NULL; } + +umf_result_t umfOsMemoryProviderParamsCreate( + umf_os_memory_provider_params_handle_t *hParams) { + (void)hParams; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfOsMemoryProviderParamsDestroy( + umf_os_memory_provider_params_handle_t hParams) { + (void)hParams; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfOsMemoryProviderParamsSetProtection( + umf_os_memory_provider_params_handle_t hParams, unsigned protection) { + (void)hParams; + (void)protection; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfOsMemoryProviderParamsSetVisibility( + umf_os_memory_provider_params_handle_t hParams, + umf_memory_visibility_t visibility) { + (void)hParams; + (void)visibility; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfOsMemoryProviderParamsSetShmName( + umf_os_memory_provider_params_handle_t hParams, const char *shm_name) { + (void)hParams; + (void)shm_name; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfOsMemoryProviderParamsSetNumaList( + umf_os_memory_provider_params_handle_t hParams, unsigned *numa_list, + unsigned numa_list_len) { + (void)hParams; + (void)numa_list; + (void)numa_list_len; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfOsMemoryProviderParamsSetNumaMode( + umf_os_memory_provider_params_handle_t hParams, umf_numa_mode_t numa_mode) { + (void)hParams; + (void)numa_mode; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfOsMemoryProviderParamsSetPartSize( + umf_os_memory_provider_params_handle_t hParams, size_t part_size) { + (void)hParams; + (void)part_size; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfOsMemoryProviderParamsSetPartitions( + umf_os_memory_provider_params_handle_t hParams, + umf_numa_split_partition_t *partitions, unsigned partitions_len) { + (void)hParams; + (void)partitions; + (void)partitions_len; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +#else // !defined(UMF_NO_HWLOC) + #include "base_alloc_global.h" #include "critnib.h" +#include "libumf.h" #include "provider_os_memory_internal.h" #include "utils_common.h" #include "utils_concurrency.h" #include "utils_log.h" -#include -#include -#include - #define NODESET_STR_BUF_LEN 1024 #define TLS_MSG_BUF_LEN 1024 +typedef struct umf_os_memory_provider_params_t { + // Combination of 'umf_mem_protection_flags_t' flags + unsigned protection; + /// memory visibility mode + umf_memory_visibility_t visibility; + /// (optional) a name of a shared memory file (valid only in case of the shared memory visibility) + char *shm_name; + + // NUMA config + /// ordered list of numa nodes + unsigned *numa_list; + /// length of numa_list + unsigned numa_list_len; + + /// Describes how node list is interpreted + umf_numa_mode_t numa_mode; + /// part size for interleave mode - 0 means default (system specific) + /// It might be rounded up because of HW constraints + size_t part_size; + + /// ordered list of the partitions for the split mode + umf_numa_split_partition_t *partitions; + /// len of the partitions array + unsigned partitions_len; +} umf_os_memory_provider_params_t; + typedef struct os_last_native_error_t { int32_t native_error; int errno_value; @@ -134,31 +234,6 @@ static umf_result_t initialize_nodeset(os_memory_provider_t *os_provider, return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } -umf_result_t os_translate_flags(unsigned in_flags, unsigned max, - umf_result_t (*translate_flag)(unsigned, - unsigned *), - unsigned *out_flags) { - unsigned out_f = 0; - for (unsigned n = 1; n < max; n <<= 1) { - if (in_flags & n) { - unsigned flag; - umf_result_t result = translate_flag(n, &flag); - if (result != UMF_RESULT_SUCCESS) { - return result; - } - out_f |= flag; - in_flags &= ~n; // clear this bit - } - } - - if (in_flags != 0) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - *out_flags = out_f; - return UMF_RESULT_SUCCESS; -} - static umf_result_t validate_numa_mode(umf_numa_mode_t mode, int nodemaskEmpty) { switch (mode) { @@ -289,7 +364,7 @@ create_fd_for_mmap(umf_os_memory_provider_params_t *in_params, /* create a new shared memory file */ provider->fd = - os_shm_create(in_params->shm_name, provider->max_size_fd); + utils_shm_create(in_params->shm_name, provider->max_size_fd); if (provider->fd == -1) { LOG_ERR("creating a shared memory file /dev/shm/%s of size %zu for " "memory mapping failed", @@ -304,14 +379,14 @@ create_fd_for_mmap(umf_os_memory_provider_params_t *in_params, return UMF_RESULT_SUCCESS; } - provider->fd = os_create_anonymous_fd(); + provider->fd = utils_create_anonymous_fd(); if (provider->fd <= 0) { LOG_ERR( "creating an anonymous file descriptor for memory mapping failed"); return UMF_RESULT_ERROR_UNKNOWN; } - int ret = os_set_file_size(provider->fd, provider->max_size_fd); + int ret = utils_set_file_size(provider->fd, provider->max_size_fd); if (ret) { LOG_ERR("setting size %zu of an anonymous file failed", provider->max_size_fd); @@ -359,6 +434,22 @@ validatePartitions(umf_os_memory_provider_params_t *params) { return UMF_RESULT_SUCCESS; } +static umf_result_t os_get_min_page_size(void *provider, void *ptr, + size_t *page_size); + +static umf_result_t validatePartSize(os_memory_provider_t *provider, + umf_os_memory_provider_params_t *params) { + size_t page_size; + os_get_min_page_size(provider, NULL, &page_size); + if (ALIGN_UP(params->part_size, page_size) < params->part_size) { + LOG_ERR("partition size (%zu) is too big, cannot align with a page " + "size (%zu)", + params->part_size, page_size); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + return UMF_RESULT_SUCCESS; +} + static void free_bitmaps(os_memory_provider_t *provider) { for (unsigned i = 0; i < provider->nodeset_len; i++) { hwloc_bitmap_free(provider->nodeset[i]); @@ -413,20 +504,23 @@ static umf_result_t translate_params(umf_os_memory_provider_params_t *in_params, os_memory_provider_t *provider) { umf_result_t result; - result = os_translate_mem_protection_flags(in_params->protection, - &provider->protection); + result = utils_translate_mem_protection_flags(in_params->protection, + &provider->protection); if (result != UMF_RESULT_SUCCESS) { LOG_ERR("incorrect memory protection flags: %u", in_params->protection); return result; } - result = os_translate_mem_visibility_flag(in_params->visibility, - &provider->visibility); + result = utils_translate_mem_visibility_flag(in_params->visibility, + &provider->visibility); if (result != UMF_RESULT_SUCCESS) { LOG_ERR("incorrect memory visibility flag: %u", in_params->visibility); return result; } + // IPC API requires in_params->visibility == UMF_MEM_MAP_SHARED + provider->IPC_enabled = (in_params->visibility == UMF_MEM_MAP_SHARED); + // NUMA config int emptyNodeset = in_params->numa_list_len == 0; result = validate_numa_mode(in_params->numa_mode, emptyNodeset); @@ -442,6 +536,14 @@ static umf_result_t translate_params(umf_os_memory_provider_params_t *in_params, return result; } + if (in_params->numa_mode == UMF_NUMA_MODE_INTERLEAVE) { + result = validatePartSize(provider, in_params); + if (result != UMF_RESULT_SUCCESS) { + LOG_ERR("incorrect partition size: %zu", in_params->part_size); + return result; + } + } + int is_dedicated_node_bind = dedicated_node_bind(in_params); provider->numa_policy = translate_numa_mode(in_params->numa_mode, is_dedicated_node_bind); @@ -469,7 +571,7 @@ static umf_result_t translate_params(umf_os_memory_provider_params_t *in_params, static umf_result_t os_initialize(void *params, void **provider) { umf_result_t ret; - if (provider == NULL || params == NULL) { + if (params == NULL) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -526,7 +628,7 @@ static umf_result_t os_initialize(void *params, void **provider) { } if (os_provider->fd > 0) { - if (util_mutex_init(&os_provider->lock_fd) == NULL) { + if (utils_mutex_init(&os_provider->lock_fd) == NULL) { LOG_ERR("initializing the file size lock failed"); ret = UMF_RESULT_ERROR_UNKNOWN; goto err_destroy_bitmaps; @@ -563,15 +665,10 @@ static umf_result_t os_initialize(void *params, void **provider) { } static void os_finalize(void *provider) { - if (provider == NULL) { - assert(0); - return; - } - os_memory_provider_t *os_provider = provider; if (os_provider->fd > 0) { - util_mutex_destroy_not_free(&os_provider->lock_fd); + utils_mutex_destroy_not_free(&os_provider->lock_fd); } critnib_delete(os_provider->fd_offset_map); @@ -589,9 +686,6 @@ static void os_finalize(void *provider) { umf_ba_global_free(os_provider); } -static umf_result_t os_get_min_page_size(void *provider, void *ptr, - size_t *page_size); - // TODO: this function should be re-enabled when CTL is implemented #if 0 static void print_numa_nodes(os_memory_provider_t *os_provider, void *addr, @@ -634,11 +728,11 @@ static inline void assert_is_page_aligned(uintptr_t ptr, size_t page_size) { (void)page_size; // unused in Release build } -static int os_mmap_aligned(void *hint_addr, size_t length, size_t alignment, - size_t page_size, int prot, int flag, int fd, - size_t max_fd_size, os_mutex_t *lock_fd, - void **out_addr, size_t *fd_size, - size_t *fd_offset) { +static int utils_mmap_aligned(void *hint_addr, size_t length, size_t alignment, + size_t page_size, int prot, int flag, int fd, + size_t max_fd_size, utils_mutex_t *lock_fd, + void **out_addr, size_t *fd_size, + size_t *fd_offset) { assert(out_addr); size_t extended_length = length; @@ -654,23 +748,24 @@ static int os_mmap_aligned(void *hint_addr, size_t length, size_t alignment, *fd_offset = 0; if (fd > 0) { - if (util_mutex_lock(lock_fd)) { + if (utils_mutex_lock(lock_fd)) { LOG_ERR("locking file size failed"); return -1; } if (*fd_size + extended_length > max_fd_size) { - util_mutex_unlock(lock_fd); + utils_mutex_unlock(lock_fd); LOG_ERR("cannot grow a file size beyond %zu", max_fd_size); return -1; } *fd_offset = *fd_size; *fd_size += extended_length; - util_mutex_unlock(lock_fd); + utils_mutex_unlock(lock_fd); } - void *ptr = os_mmap(hint_addr, extended_length, prot, flag, fd, *fd_offset); + void *ptr = + utils_mmap(hint_addr, extended_length, prot, flag, fd, *fd_offset); if (ptr == NULL) { LOG_PDEBUG("memory mapping failed"); return -1; @@ -689,7 +784,7 @@ static int os_mmap_aligned(void *hint_addr, size_t length, size_t alignment, size_t head_len = aligned_addr - addr; if (head_len > 0) { - os_munmap(ptr, head_len); + utils_munmap(ptr, head_len); } // tail address has to page-aligned @@ -703,7 +798,7 @@ static int os_mmap_aligned(void *hint_addr, size_t length, size_t alignment, size_t tail_len = (addr + extended_length) - tail; if (tail_len > 0) { - os_munmap((void *)tail, tail_len); + utils_munmap((void *)tail, tail_len); } *out_addr = (void *)aligned_addr; @@ -827,19 +922,19 @@ static membind_t membindFirst(os_memory_provider_t *provider, void *addr, membind_t membind; memset(&membind, 0, sizeof(membind)); - membind.alloc_size = ALIGN_UP(size, page_size); + membind.alloc_size = size; membind.page_size = page_size; membind.addr = addr; membind.pages = membind.alloc_size / membind.page_size; if (provider->nodeset_len == 1) { - membind.bind_size = ALIGN_UP(size, membind.page_size); + membind.bind_size = size; membind.bitmap = provider->nodeset[0]; return membind; } if (provider->mode == UMF_NUMA_MODE_INTERLEAVE) { assert(provider->part_size != 0); - size_t s = util_fetch_and_add64(&provider->alloc_sum, size); + size_t s = utils_fetch_and_add64(&provider->alloc_sum, size); membind.node = (s / provider->part_size) % provider->nodeset_len; membind.bitmap = provider->nodeset[membind.node]; membind.bind_size = ALIGN_UP(provider->part_size, membind.page_size); @@ -893,10 +988,6 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment, void **resultPtr) { int ret; - if (provider == NULL || resultPtr == NULL) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - os_memory_provider_t *os_provider = (os_memory_provider_t *)provider; size_t page_size; @@ -917,7 +1008,7 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment, void *addr = NULL; errno = 0; - ret = os_mmap_aligned( + ret = utils_mmap_aligned( NULL, size, alignment, page_size, os_provider->protection, os_provider->visibility, os_provider->fd, os_provider->max_size_fd, &os_provider->lock_fd, &addr, &os_provider->size_fd, &fd_offset); @@ -938,7 +1029,15 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment, // Bind memory to NUMA nodes if numa_policy is other than DEFAULT if (os_provider->numa_policy != HWLOC_MEMBIND_DEFAULT) { - membind_t membind = membindFirst(os_provider, addr, size, page_size); + size_t first_size = ALIGN_UP_SAFE(size, page_size); + if (first_size == 0) { + LOG_ERR("size is too big, page align failed"); + (void)utils_munmap(addr, size); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + membind_t membind = + membindFirst(os_provider, addr, first_size, page_size); if (membind.bitmap == NULL) { goto err_unmap; } @@ -984,15 +1083,11 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment, return UMF_RESULT_SUCCESS; err_unmap: - (void)os_munmap(addr, size); + (void)utils_munmap(addr, size); return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; } static umf_result_t os_free(void *provider, void *ptr, size_t size) { - if (provider == NULL) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - if (ptr == NULL) { return UMF_RESULT_SUCCESS; } @@ -1004,7 +1099,7 @@ static umf_result_t os_free(void *provider, void *ptr, size_t size) { } errno = 0; - int ret = os_munmap(ptr, size); + int ret = utils_munmap(ptr, size); if (ret) { os_store_last_native_error(UMF_OS_RESULT_ERROR_FREE_FAILED, errno); LOG_PERR("memory deallocation failed"); @@ -1044,21 +1139,18 @@ static void os_get_last_native_error(void *provider, const char **ppMessage, memcpy(TLS_last_native_error.msg_buff + pos, msg, len + 1); pos += len; - os_strerror(TLS_last_native_error.errno_value, - TLS_last_native_error.msg_buff + pos, TLS_MSG_BUF_LEN - pos); + utils_strerror(TLS_last_native_error.errno_value, + TLS_last_native_error.msg_buff + pos, TLS_MSG_BUF_LEN - pos); *ppMessage = TLS_last_native_error.msg_buff; } static umf_result_t os_get_recommended_page_size(void *provider, size_t size, size_t *page_size) { - (void)size; // unused - - if (provider == NULL || page_size == NULL) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } + (void)provider; // unused + (void)size; // unused - *page_size = os_get_page_size(); + *page_size = utils_get_page_size(); return UMF_RESULT_SUCCESS; } @@ -1071,12 +1163,10 @@ static umf_result_t os_get_min_page_size(void *provider, void *ptr, } static umf_result_t os_purge_lazy(void *provider, void *ptr, size_t size) { - if (provider == NULL || ptr == NULL) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } + (void)provider; // unused errno = 0; - if (os_purge(ptr, size, UMF_PURGE_LAZY)) { + if (utils_purge(ptr, size, UMF_PURGE_LAZY)) { os_store_last_native_error(UMF_OS_RESULT_ERROR_PURGE_LAZY_FAILED, errno); LOG_PERR("lazy purging failed"); @@ -1087,12 +1177,10 @@ static umf_result_t os_purge_lazy(void *provider, void *ptr, size_t size) { } static umf_result_t os_purge_force(void *provider, void *ptr, size_t size) { - if (provider == NULL || ptr == NULL) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } + (void)provider; // unused errno = 0; - if (os_purge(ptr, size, UMF_PURGE_FORCE)) { + if (utils_purge(ptr, size, UMF_PURGE_FORCE)) { os_store_last_native_error(UMF_OS_RESULT_ERROR_PURGE_FORCE_FAILED, errno); LOG_PERR("force purging failed"); @@ -1113,7 +1201,7 @@ static umf_result_t os_allocation_split(void *provider, void *ptr, (void)totalSize; os_memory_provider_t *os_provider = (os_memory_provider_t *)provider; - if (os_provider->fd <= 0) { + if (os_provider->fd < 0) { return UMF_RESULT_SUCCESS; } @@ -1146,7 +1234,7 @@ static umf_result_t os_allocation_merge(void *provider, void *lowPtr, (void)totalSize; os_memory_provider_t *os_provider = (os_memory_provider_t *)provider; - if (os_provider->fd <= 0) { + if (os_provider->fd < 0) { return UMF_RESULT_SUCCESS; } @@ -1167,35 +1255,32 @@ typedef struct os_ipc_data_t { int fd; size_t fd_offset; size_t size; - char shm_name[NAME_MAX]; // optional - can be used or not (see below) + unsigned protection; // combination of OS-specific protection flags + unsigned visibility; // memory visibility mode + // shm_name is a Flexible Array Member because it is optional and its size + // varies on the Shared Memory object name + size_t shm_name_len; + char shm_name[]; } os_ipc_data_t; static umf_result_t os_get_ipc_handle_size(void *provider, size_t *size) { - if (provider == NULL || size == NULL) { + os_memory_provider_t *os_provider = (os_memory_provider_t *)provider; + if (!os_provider->IPC_enabled) { + LOG_ERR("memory visibility mode is not UMF_MEM_MAP_SHARED") return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - os_memory_provider_t *os_provider = (os_memory_provider_t *)provider; - - if (os_provider->shm_name[0]) { - // os_ipc_data_t->shm_name will be used - *size = sizeof(os_ipc_data_t); - } else { - // os_ipc_data_t->shm_name will NOT be used - *size = sizeof(os_ipc_data_t) - NAME_MAX; - } + // NOTE: +1 for '\0' at the end of the string + *size = sizeof(os_ipc_data_t) + strlen(os_provider->shm_name) + 1; return UMF_RESULT_SUCCESS; } static umf_result_t os_get_ipc_handle(void *provider, const void *ptr, size_t size, void *providerIpcData) { - if (provider == NULL || ptr == NULL || providerIpcData == NULL) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - os_memory_provider_t *os_provider = (os_memory_provider_t *)provider; - if (os_provider->fd <= 0) { + if (!os_provider->IPC_enabled) { + LOG_ERR("memory visibility mode is not UMF_MEM_MAP_SHARED") return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -1211,9 +1296,13 @@ static umf_result_t os_get_ipc_handle(void *provider, const void *ptr, os_ipc_data->pid = utils_getpid(); os_ipc_data->fd_offset = (size_t)value - 1; os_ipc_data->size = size; - if (os_provider->shm_name[0]) { - strncpy(os_ipc_data->shm_name, os_provider->shm_name, NAME_MAX - 1); - os_ipc_data->shm_name[NAME_MAX - 1] = '\0'; + os_ipc_data->protection = os_provider->protection; + os_ipc_data->visibility = os_provider->visibility; + os_ipc_data->shm_name_len = strlen(os_provider->shm_name); + if (os_ipc_data->shm_name_len > 0) { + // NOTE: +1 for '\0' at the end of the string + strncpy(os_ipc_data->shm_name, os_provider->shm_name, + os_ipc_data->shm_name_len + 1); } else { os_ipc_data->fd = os_provider->fd; } @@ -1222,19 +1311,24 @@ static umf_result_t os_get_ipc_handle(void *provider, const void *ptr, } static umf_result_t os_put_ipc_handle(void *provider, void *providerIpcData) { - if (provider == NULL || providerIpcData == NULL) { + os_memory_provider_t *os_provider = (os_memory_provider_t *)provider; + if (!os_provider->IPC_enabled) { + LOG_ERR("memory visibility mode is not UMF_MEM_MAP_SHARED") return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - os_memory_provider_t *os_provider = (os_memory_provider_t *)provider; os_ipc_data_t *os_ipc_data = (os_ipc_data_t *)providerIpcData; if (os_ipc_data->pid != utils_getpid()) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - if (os_provider->shm_name[0]) { - if (strncmp(os_ipc_data->shm_name, os_provider->shm_name, NAME_MAX)) { + size_t shm_name_len = strlen(os_provider->shm_name); + if (shm_name_len > 0) { + if (os_ipc_data->shm_name_len != shm_name_len) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } else if (strncmp(os_ipc_data->shm_name, os_provider->shm_name, + shm_name_len)) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } } else { @@ -1248,23 +1342,24 @@ static umf_result_t os_put_ipc_handle(void *provider, void *providerIpcData) { static umf_result_t os_open_ipc_handle(void *provider, void *providerIpcData, void **ptr) { - if (provider == NULL || providerIpcData == NULL || ptr == NULL) { + os_memory_provider_t *os_provider = (os_memory_provider_t *)provider; + if (!os_provider->IPC_enabled) { + LOG_ERR("memory visibility mode is not UMF_MEM_MAP_SHARED") return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - os_memory_provider_t *os_provider = (os_memory_provider_t *)provider; os_ipc_data_t *os_ipc_data = (os_ipc_data_t *)providerIpcData; umf_result_t ret = UMF_RESULT_SUCCESS; int fd; - if (os_provider->shm_name[0]) { - fd = os_shm_open(os_provider->shm_name); + if (os_ipc_data->shm_name_len) { + fd = utils_shm_open(os_ipc_data->shm_name); if (fd <= 0) { LOG_PERR("opening a shared memory file (%s) failed", - os_provider->shm_name); + os_ipc_data->shm_name); return UMF_RESULT_ERROR_UNKNOWN; } - (void)os_shm_unlink(os_provider->shm_name); + (void)utils_shm_unlink(os_ipc_data->shm_name); } else { umf_result_t umf_result = utils_duplicate_fd(os_ipc_data->pid, os_ipc_data->fd, &fd); @@ -1274,8 +1369,8 @@ static umf_result_t os_open_ipc_handle(void *provider, void *providerIpcData, } } - *ptr = os_mmap(NULL, os_ipc_data->size, os_provider->protection, - os_provider->visibility, fd, os_ipc_data->fd_offset); + *ptr = utils_mmap(NULL, os_ipc_data->size, os_ipc_data->protection, + os_ipc_data->visibility, fd, os_ipc_data->fd_offset); if (*ptr == NULL) { os_store_last_native_error(UMF_OS_RESULT_ERROR_ALLOC_FAILED, errno); LOG_PERR("memory mapping failed"); @@ -1289,12 +1384,14 @@ static umf_result_t os_open_ipc_handle(void *provider, void *providerIpcData, static umf_result_t os_close_ipc_handle(void *provider, void *ptr, size_t size) { - if (provider == NULL || ptr == NULL) { + os_memory_provider_t *os_provider = (os_memory_provider_t *)provider; + if (!os_provider->IPC_enabled) { + LOG_ERR("memory visibility mode is not UMF_MEM_MAP_SHARED") return UMF_RESULT_ERROR_INVALID_ARGUMENT; } errno = 0; - int ret = os_munmap(ptr, size); + int ret = utils_munmap(ptr, size); // ignore error when size == 0 if (ret && (size > 0)) { os_store_last_native_error(UMF_OS_RESULT_ERROR_FREE_FAILED, errno); @@ -1311,11 +1408,11 @@ static umf_memory_provider_ops_t UMF_OS_MEMORY_PROVIDER_OPS = { .initialize = os_initialize, .finalize = os_finalize, .alloc = os_alloc, - .free = os_free, .get_last_native_error = os_get_last_native_error, .get_recommended_page_size = os_get_recommended_page_size, .get_min_page_size = os_get_min_page_size, .get_name = os_get_name, + .ext.free = os_free, .ext.purge_lazy = os_purge_lazy, .ext.purge_force = os_purge_force, .ext.allocation_merge = os_allocation_merge, @@ -1329,3 +1426,185 @@ static umf_memory_provider_ops_t UMF_OS_MEMORY_PROVIDER_OPS = { umf_memory_provider_ops_t *umfOsMemoryProviderOps(void) { return &UMF_OS_MEMORY_PROVIDER_OPS; } + +umf_result_t umfOsMemoryProviderParamsCreate( + umf_os_memory_provider_params_handle_t *hParams) { + libumfInit(); + if (hParams == NULL) { + LOG_ERR("OS memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_os_memory_provider_params_handle_t params = + umf_ba_global_alloc(sizeof(*params)); + if (params == NULL) { + LOG_ERR("allocating memory for OS memory provider params failed"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + params->protection = UMF_PROTECTION_READ | UMF_PROTECTION_WRITE; + params->visibility = UMF_MEM_MAP_PRIVATE; + params->shm_name = NULL; + params->numa_list = NULL; + params->numa_list_len = 0; + params->numa_mode = UMF_NUMA_MODE_DEFAULT; + params->part_size = 0; + params->partitions = NULL; + params->partitions_len = 0; + + *hParams = params; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfOsMemoryProviderParamsDestroy( + umf_os_memory_provider_params_handle_t hParams) { + if (hParams != NULL) { + umf_ba_global_free(hParams->shm_name); + umf_ba_global_free(hParams->numa_list); + umf_ba_global_free(hParams->partitions); + } + + umf_ba_global_free(hParams); + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfOsMemoryProviderParamsSetProtection( + umf_os_memory_provider_params_handle_t hParams, unsigned protection) { + if (hParams == NULL) { + LOG_ERR("OS memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->protection = protection; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfOsMemoryProviderParamsSetVisibility( + umf_os_memory_provider_params_handle_t hParams, + umf_memory_visibility_t visibility) { + if (hParams == NULL) { + LOG_ERR("OS memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->visibility = visibility; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfOsMemoryProviderParamsSetShmName( + umf_os_memory_provider_params_handle_t hParams, const char *shm_name) { + if (hParams == NULL) { + LOG_ERR("OS memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + char *name = NULL; + if (shm_name) { + size_t len = strlen(shm_name) + 1; + name = umf_ba_global_alloc(len); + if (name == NULL) { + LOG_ERR("allocating memory for the shared memory name failed"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + strncpy(name, shm_name, len); + } + umf_ba_global_free(hParams->shm_name); + hParams->shm_name = name; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfOsMemoryProviderParamsSetNumaList( + umf_os_memory_provider_params_handle_t hParams, unsigned *numa_list, + unsigned numa_list_len) { + if (hParams == NULL) { + LOG_ERR("OS memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (numa_list_len && !numa_list) { + LOG_ERR("numa_list_len is not 0, but numa_list is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + unsigned *new_list = NULL; + if (numa_list_len) { + new_list = umf_ba_global_alloc(numa_list_len * sizeof(*new_list)); + if (new_list == NULL) { + LOG_ERR("allocating memory for the NUMA list failed"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + memcpy(new_list, numa_list, numa_list_len * sizeof(*new_list)); + } + + umf_ba_global_free(hParams->numa_list); + hParams->numa_list = new_list; + hParams->numa_list_len = numa_list_len; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfOsMemoryProviderParamsSetNumaMode( + umf_os_memory_provider_params_handle_t hParams, umf_numa_mode_t numa_mode) { + if (hParams == NULL) { + LOG_ERR("OS memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->numa_mode = numa_mode; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfOsMemoryProviderParamsSetPartSize( + umf_os_memory_provider_params_handle_t hParams, size_t part_size) { + if (hParams == NULL) { + LOG_ERR("OS memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->part_size = part_size; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfOsMemoryProviderParamsSetPartitions( + umf_os_memory_provider_params_handle_t hParams, + umf_numa_split_partition_t *partitions, unsigned partitions_len) { + if (hParams == NULL) { + LOG_ERR("OS memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (partitions_len && !partitions) { + LOG_ERR("partitions_len is not 0, but partitions is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_numa_split_partition_t *new_partitions = NULL; + if (partitions_len) { + new_partitions = + umf_ba_global_alloc(partitions_len * sizeof(*new_partitions)); + if (new_partitions == NULL) { + LOG_ERR("allocating memory for the partitions failed"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + memcpy(new_partitions, partitions, + partitions_len * sizeof(*new_partitions)); + } + + umf_ba_global_free(hParams->partitions); + hParams->partitions = new_partitions; + hParams->partitions_len = partitions_len; + + return UMF_RESULT_SUCCESS; +} + +#endif // !defined(UMF_NO_HWLOC) diff --git a/src/provider/provider_os_memory_internal.h b/src/provider/provider_os_memory_internal.h index 81d729d27..faf0de247 100644 --- a/src/provider/provider_os_memory_internal.h +++ b/src/provider/provider_os_memory_internal.h @@ -8,6 +8,14 @@ #ifndef UMF_OS_MEMORY_PROVIDER_INTERNAL_H #define UMF_OS_MEMORY_PROVIDER_INTERNAL_H +#include +#include + +#if defined(_WIN32) && !defined(NAME_MAX) +#include +#define NAME_MAX _MAX_FNAME +#endif /* defined(_WIN32) && !defined(NAME_MAX) */ + #include #include "critnib.h" @@ -19,22 +27,20 @@ extern "C" { #endif -typedef enum umf_purge_advise_t { - UMF_PURGE_LAZY, - UMF_PURGE_FORCE, -} umf_purge_advise_t; - -#define NAME_MAX 255 - typedef struct os_memory_provider_t { unsigned protection; // combination of OS-specific protection flags unsigned visibility; // memory visibility mode + + // IPC is enabled only if (in_params->visibility == UMF_MEM_MAP_SHARED) + bool IPC_enabled; + // a name of a shared memory file (valid only in case of the shared memory visibility) char shm_name[NAME_MAX]; - int fd; // file descriptor for memory mapping - size_t size_fd; // size of file used for memory mapping - size_t max_size_fd; // maximum size of file used for memory mapping - os_mutex_t lock_fd; // lock for updating file size + + int fd; // file descriptor for memory mapping + size_t size_fd; // size of file used for memory mapping + size_t max_size_fd; // maximum size of file used for memory mapping + utils_mutex_t lock_fd; // lock for updating file size // A critnib map storing (ptr, fd_offset + 1) pairs. We add 1 to fd_offset // in order to be able to store fd_offset equal 0, because @@ -64,42 +70,6 @@ typedef struct os_memory_provider_t { hwloc_topology_t topo; } os_memory_provider_t; -umf_result_t os_translate_flags(unsigned in_flags, unsigned max, - umf_result_t (*translate_flag)(unsigned, - unsigned *), - unsigned *out_flags); - -umf_result_t os_translate_mem_protection_flags(unsigned in_protection, - unsigned *out_protection); - -umf_result_t os_translate_mem_visibility_flag(umf_memory_visibility_t in_flag, - unsigned *out_flag); - -int os_create_anonymous_fd(void); - -int os_shm_create(const char *shm_name, size_t size); - -int os_shm_open(const char *shm_name); - -int os_shm_unlink(const char *shm_name); - -size_t get_max_file_size(void); - -int os_get_file_size(int fd, size_t *size); - -int os_set_file_size(int fd, size_t size); - -void *os_mmap(void *hint_addr, size_t length, int prot, int flag, int fd, - size_t fd_offset); - -int os_munmap(void *addr, size_t length); - -int os_purge(void *addr, size_t length, int advice); - -size_t os_get_page_size(void); - -void os_strerror(int errnum, char *buf, size_t buflen); - #ifdef __cplusplus } #endif diff --git a/src/provider/provider_os_memory_linux.c b/src/provider/provider_os_memory_linux.c deleted file mode 100644 index fd0e3ab97..000000000 --- a/src/provider/provider_os_memory_linux.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (C) 2023-2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "provider_os_memory_internal.h" -#include "utils_log.h" - -umf_result_t os_translate_mem_visibility_flag(umf_memory_visibility_t in_flag, - unsigned *out_flag) { - switch (in_flag) { - case UMF_MEM_MAP_PRIVATE: - *out_flag = MAP_PRIVATE; - return UMF_RESULT_SUCCESS; - case UMF_MEM_MAP_SHARED: - *out_flag = MAP_SHARED; - return UMF_RESULT_SUCCESS; - } - return UMF_RESULT_ERROR_INVALID_ARGUMENT; -} - -// create a shared memory file -int os_shm_create(const char *shm_name, size_t size) { - if (shm_name == NULL) { - LOG_ERR("empty name of a shared memory file"); - return -1; - } - - (void)shm_unlink(shm_name); - - int fd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, 0600); - if (fd == -1) { - LOG_PERR("cannot create a shared memory file /dev/shm/%s", shm_name); - return fd; - } - - int ret = os_set_file_size(fd, size); - if (ret) { - LOG_ERR("setting size (%zu) of a file /dev/shm/%s failed", size, - shm_name); - close(fd); - (void)shm_unlink(shm_name); - return -1; - } - - return fd; -} - -// open a shared memory file -int os_shm_open(const char *shm_name) { - if (shm_name == NULL) { - LOG_ERR("empty name of a shared memory file"); - return -1; - } - - int fd = shm_open(shm_name, O_RDWR, 0600); - if (fd == -1) { - LOG_PERR("cannot open a shared memory file /dev/shm/%s", shm_name); - } - - return fd; -} - -// unlink a shared memory file -int os_shm_unlink(const char *shm_name) { return shm_unlink(shm_name); } - -static int syscall_memfd_secret(void) { - int fd = -1; -#ifdef __NR_memfd_secret - // SYS_memfd_secret is supported since Linux 5.14 - fd = syscall(SYS_memfd_secret, 0); - if (fd == -1) { - LOG_PERR("memfd_secret() failed"); - } - if (fd > 0) { - LOG_DEBUG("anonymous file descriptor created using memfd_secret()"); - } -#endif /* __NR_memfd_secret */ - return fd; -} - -static int syscall_memfd_create(void) { - int fd = -1; -#ifdef __NR_memfd_create - // SYS_memfd_create is supported since Linux 3.17, glibc 2.27 - fd = syscall(SYS_memfd_create, "anon_fd_name", 0); - if (fd == -1) { - LOG_PERR("memfd_create() failed"); - } - if (fd > 0) { - LOG_DEBUG("anonymous file descriptor created using memfd_create()"); - } -#endif /* __NR_memfd_create */ - return fd; -} - -// create an anonymous file descriptor -int os_create_anonymous_fd(void) { - int fd = -1; - - if (!util_env_var_has_str("UMF_MEM_FD_FUNC", "memfd_create")) { - fd = syscall_memfd_secret(); - if (fd > 0) { - return fd; - } - } - - // The SYS_memfd_secret syscall can fail with errno == ENOTSYS (function not implemented). - // We should try to call the SYS_memfd_create syscall in this case. - - fd = syscall_memfd_create(); - -#if !(defined __NR_memfd_secret) && !(defined __NR_memfd_create) - if (fd == -1) { - LOG_ERR("cannot create an anonymous file descriptor - neither " - "memfd_secret() nor memfd_create() are defined"); - } -#endif /* !(defined __NR_memfd_secret) && !(defined __NR_memfd_create) */ - - return fd; -} - -int os_get_file_size(int fd, size_t *size) { - struct stat statbuf; - int ret = fstat(fd, &statbuf); - if (ret) { - LOG_PERR("fstat(%i) failed", fd); - return ret; - } - - *size = statbuf.st_size; - return 0; -} - -int os_set_file_size(int fd, size_t size) { - errno = 0; - int ret = ftruncate(fd, size); - if (ret) { - LOG_PERR("ftruncate(%i, %zu) failed", fd, size); - } - return ret; -} diff --git a/src/provider/provider_os_memory_posix.c b/src/provider/provider_os_memory_posix.c deleted file mode 100644 index 9308f6a18..000000000 --- a/src/provider/provider_os_memory_posix.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 2023-2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ - -#include -#include -#include -#include -#include -#include - -#include - -#include "provider_os_memory_internal.h" -#include "utils_log.h" -#include "utils_sanitizers.h" - -// maximum value of the off_t type -#define OFF_T_MAX \ - (sizeof(off_t) == sizeof(long long) \ - ? LLONG_MAX \ - : (sizeof(off_t) == sizeof(long) ? LONG_MAX : INT_MAX)) - -umf_result_t os_translate_mem_protection_one_flag(unsigned in_protection, - unsigned *out_protection) { - switch (in_protection) { - case UMF_PROTECTION_NONE: - *out_protection = PROT_NONE; - return UMF_RESULT_SUCCESS; - case UMF_PROTECTION_READ: - *out_protection = PROT_READ; - return UMF_RESULT_SUCCESS; - case UMF_PROTECTION_WRITE: - *out_protection = PROT_WRITE; - return UMF_RESULT_SUCCESS; - case UMF_PROTECTION_EXEC: - *out_protection = PROT_EXEC; - return UMF_RESULT_SUCCESS; - } - return UMF_RESULT_ERROR_INVALID_ARGUMENT; -} - -size_t get_max_file_size(void) { return OFF_T_MAX; } - -umf_result_t os_translate_mem_protection_flags(unsigned in_protection, - unsigned *out_protection) { - // translate protection - combination of 'umf_mem_protection_flags_t' flags - return os_translate_flags(in_protection, UMF_PROTECTION_MAX, - os_translate_mem_protection_one_flag, - out_protection); -} - -static int os_translate_purge_advise(umf_purge_advise_t advise) { - switch (advise) { - case UMF_PURGE_LAZY: - return MADV_FREE; - case UMF_PURGE_FORCE: - return MADV_DONTNEED; - } - return -1; -} - -void *os_mmap(void *hint_addr, size_t length, int prot, int flag, int fd, - size_t fd_offset) { - fd = (fd == 0) ? -1 : fd; - if (fd == -1) { - // MAP_ANONYMOUS - the mapping is not backed by any file - flag |= MAP_ANONYMOUS; - } - - void *ptr = mmap(hint_addr, length, prot, flag, fd, fd_offset); - if (ptr == MAP_FAILED) { - return NULL; - } - // this should be unnecessary but pairs of mmap/munmap do not reset - // asan's user-poisoning flags, leading to invalid error reports - // Bug 81619: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81619 - utils_annotate_memory_defined(ptr, length); - return ptr; -} - -int os_munmap(void *addr, size_t length) { - // this should be unnecessary but pairs of mmap/munmap do not reset - // asan's user-poisoning flags, leading to invalid error reports - // Bug 81619: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81619 - utils_annotate_memory_defined(addr, length); - return munmap(addr, length); -} - -size_t os_get_page_size(void) { return sysconf(_SC_PAGE_SIZE); } - -int os_purge(void *addr, size_t length, int advice) { - return madvise(addr, length, os_translate_purge_advise(advice)); -} - -void os_strerror(int errnum, char *buf, size_t buflen) { -// 'strerror_r' implementation is XSI-compliant (returns 0 on success) -#if (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE - if (strerror_r(errnum, buf, buflen)) { -#else // 'strerror_r' implementation is GNU-specific (returns pointer on success) - if (!strerror_r(errnum, buf, buflen)) { -#endif - LOG_PERR("Retrieving error code description failed"); - } -} diff --git a/src/provider/provider_os_memory_windows.c b/src/provider/provider_os_memory_windows.c deleted file mode 100644 index 994f4d53c..000000000 --- a/src/provider/provider_os_memory_windows.c +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (C) 2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ - -#include - -#include -#include -#include -#include - -#include - -#include "utils_concurrency.h" -#include "utils_log.h" - -static UTIL_ONCE_FLAG Page_size_is_initialized = UTIL_ONCE_FLAG_INIT; -static size_t Page_size; - -umf_result_t os_translate_mem_protection_flags(unsigned in_protection, - unsigned *out_protection) { - switch (in_protection) { - case UMF_PROTECTION_NONE: - *out_protection = PAGE_NOACCESS; - return UMF_RESULT_SUCCESS; - case UMF_PROTECTION_EXEC: - *out_protection = PAGE_EXECUTE; - return UMF_RESULT_SUCCESS; - case (UMF_PROTECTION_EXEC | UMF_PROTECTION_READ): - *out_protection = PAGE_EXECUTE_READ; - return UMF_RESULT_SUCCESS; - case (UMF_PROTECTION_EXEC | UMF_PROTECTION_READ | UMF_PROTECTION_WRITE): - *out_protection = PAGE_EXECUTE_READWRITE; - return UMF_RESULT_SUCCESS; - case (UMF_PROTECTION_EXEC | UMF_PROTECTION_WRITE): - *out_protection = PAGE_EXECUTE_WRITECOPY; - return UMF_RESULT_SUCCESS; - case UMF_PROTECTION_READ: - *out_protection = PAGE_READONLY; - return UMF_RESULT_SUCCESS; - case (UMF_PROTECTION_READ | UMF_PROTECTION_WRITE): - *out_protection = PAGE_READWRITE; - return UMF_RESULT_SUCCESS; - case UMF_PROTECTION_WRITE: - *out_protection = PAGE_WRITECOPY; - return UMF_RESULT_SUCCESS; - } - LOG_ERR("os_translate_mem_protection_flags(): unsupported protection flag: " - "%u", - in_protection); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; -} - -umf_result_t os_translate_mem_visibility_flag(umf_memory_visibility_t in_flag, - unsigned *out_flag) { - switch (in_flag) { - case UMF_MEM_MAP_PRIVATE: - *out_flag = 0; // ignored on Windows - return UMF_RESULT_SUCCESS; - case UMF_MEM_MAP_SHARED: - return UMF_RESULT_ERROR_NOT_SUPPORTED; // not supported on Windows yet - } - return UMF_RESULT_ERROR_INVALID_ARGUMENT; -} - -// create a shared memory file -int os_shm_create(const char *shm_name, size_t size) { - (void)shm_name; // unused - (void)size; // unused - return 0; // ignored on Windows -} - -// open a shared memory file -int os_shm_open(const char *shm_name) { - (void)shm_name; // unused - return 0; // ignored on Windows -} - -// unlink a shared memory file -int os_shm_unlink(const char *shm_name) { - (void)shm_name; // unused - return 0; // ignored on Windows -} - -int os_create_anonymous_fd(void) { - return 0; // ignored on Windows -} - -size_t get_max_file_size(void) { return SIZE_MAX; } - -int os_get_file_size(int fd, size_t *size) { - (void)fd; // unused - (void)size; // unused - return -1; // not supported on Windows -} - -int os_set_file_size(int fd, size_t size) { - (void)fd; // unused - (void)size; // unused - return 0; // ignored on Windows -} - -void *os_mmap(void *hint_addr, size_t length, int prot, int flag, int fd, - size_t fd_offset) { - (void)flag; // ignored on Windows - (void)fd; // ignored on Windows - (void)fd_offset; // ignored on Windows - return VirtualAlloc(hint_addr, length, MEM_RESERVE | MEM_COMMIT, prot); -} - -int os_munmap(void *addr, size_t length) { - // If VirtualFree() succeeds, the return value is nonzero. - // If VirtualFree() fails, the return value is 0 (zero). - (void)length; // unused - return (VirtualFree(addr, 0, MEM_RELEASE) == 0); -} - -int os_purge(void *addr, size_t length, int advice) { - // If VirtualFree() succeeds, the return value is nonzero. - // If VirtualFree() fails, the return value is 0 (zero). - (void)advice; // unused - - // temporarily disable the C6250 warning as we intentionally use the - // MEM_DECOMMIT flag only -#if defined(_MSC_VER) -#pragma warning(push) -#pragma warning(disable : 6250) -#endif // _MSC_VER - - return (VirtualFree(addr, length, MEM_DECOMMIT) == 0); - -#if defined(_MSC_VER) -#pragma warning(pop) -#endif // _MSC_VER -} - -static void _os_get_page_size(void) { - SYSTEM_INFO SystemInfo; - GetSystemInfo(&SystemInfo); - Page_size = SystemInfo.dwPageSize; -} - -size_t os_get_page_size(void) { - util_init_once(&Page_size_is_initialized, _os_get_page_size); - return Page_size; -} - -void os_strerror(int errnum, char *buf, size_t buflen) { - strerror_s(buf, buflen, errnum); -} diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index adbe3515b..e726feefb 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -10,6 +10,7 @@ #include "provider_tracking.h" #include "base_alloc_global.h" #include "critnib.h" +#include "ipc_cache.h" #include "ipc_internal.h" #include "utils_common.h" #include "utils_concurrency.h" @@ -25,6 +26,8 @@ #include #include +uint64_t IPC_HANDLE_ID = 0; + typedef struct tracker_value_t { umf_memory_pool_handle_t pool; size_t size; @@ -48,13 +51,14 @@ static umf_result_t umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker, int ret = critnib_insert(hTracker->map, (uintptr_t)ptr, value, 0); if (ret == 0) { - LOG_DEBUG("memory region is added, tracker=%p, ptr=%p, size=%zu", - (void *)hTracker, ptr, size); + LOG_DEBUG( + "memory region is added, tracker=%p, ptr=%p, pool=%p, size=%zu", + (void *)hTracker, ptr, (void *)pool, size); return UMF_RESULT_SUCCESS; } - LOG_ERR("failed to insert tracker value, ret=%d, ptr=%p, size=%zu", ret, - ptr, size); + LOG_ERR("failed to insert tracker value, ret=%d, ptr=%p, pool=%p, size=%zu", + ret, ptr, (void *)pool, size); umf_ba_free(hTracker->tracker_allocator, value); @@ -80,6 +84,11 @@ static umf_result_t umfMemoryTrackerRemove(umf_memory_tracker_handle_t hTracker, return UMF_RESULT_ERROR_UNKNOWN; } + tracker_value_t *v = value; + + LOG_DEBUG("memory region removed: tracker=%p, ptr=%p, size=%zu", + (void *)hTracker, ptr, v->size); + umf_ba_free(hTracker->tracker_allocator, value); return UMF_RESULT_SUCCESS; @@ -97,16 +106,19 @@ umf_memory_pool_handle_t umfMemoryTrackerGetPool(const void *ptr) { umf_result_t umfMemoryTrackerGetAllocInfo(const void *ptr, umf_alloc_info_t *pAllocInfo) { - assert(ptr); assert(pAllocInfo); + if (ptr == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + if (TRACKER == NULL) { - LOG_ERR("tracker is not created"); + LOG_ERR("tracker does not exist"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } if (TRACKER->map == NULL) { - LOG_ERR("tracker's map is not created"); + LOG_ERR("tracker's map does not exist"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -115,9 +127,8 @@ umf_result_t umfMemoryTrackerGetAllocInfo(const void *ptr, int found = critnib_find(TRACKER->map, (uintptr_t)ptr, FIND_LE, (void *)&rkey, (void **)&rvalue); if (!found || (uintptr_t)ptr >= rkey + rvalue->size) { - LOG_WARN("pointer %p not found in the " - "tracker, TRACKER=%p", - ptr, (void *)TRACKER); + LOG_DEBUG("pointer %p not found in the tracker, TRACKER=%p", ptr, + (void *)TRACKER); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -132,6 +143,7 @@ umf_result_t umfMemoryTrackerGetAllocInfo(const void *ptr, // providerIpcData is a Flexible Array Member because its size varies // depending on the provider. typedef struct ipc_cache_value_t { + uint64_t handle_id; uint64_t ipcDataSize; char providerIpcData[]; } ipc_cache_value_t; @@ -141,6 +153,10 @@ typedef struct umf_tracking_memory_provider_t { umf_memory_tracker_handle_t hTracker; umf_memory_pool_handle_t pool; critnib *ipcCache; + ipc_mapped_handle_cache_handle_t hIpcMappedCache; + + // the upstream provider does not support the free() operation + bool upstreamDoesNotFree; } umf_tracking_memory_provider_t; typedef struct umf_tracking_memory_provider_t umf_tracking_memory_provider_t; @@ -184,7 +200,7 @@ static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, splitValue->pool = provider->pool; splitValue->size = firstSize; - int r = util_mutex_lock(&provider->hTracker->splitMergeMutex); + int r = utils_mutex_lock(&provider->hTracker->splitMergeMutex); if (r) { goto err_lock; } @@ -235,12 +251,12 @@ static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, // free the original value umf_ba_free(provider->hTracker->tracker_allocator, value); - util_mutex_unlock(&provider->hTracker->splitMergeMutex); + utils_mutex_unlock(&provider->hTracker->splitMergeMutex); return UMF_RESULT_SUCCESS; err: - util_mutex_unlock(&provider->hTracker->splitMergeMutex); + utils_mutex_unlock(&provider->hTracker->splitMergeMutex); err_lock: umf_ba_free(provider->hTracker->tracker_allocator, splitValue); return ret; @@ -262,7 +278,7 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, mergedValue->pool = provider->pool; mergedValue->size = totalSize; - int r = util_mutex_lock(&provider->hTracker->splitMergeMutex); + int r = utils_mutex_lock(&provider->hTracker->splitMergeMutex); if (r) { goto err_lock; } @@ -295,8 +311,8 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, ret = umfMemoryProviderAllocationMerge(provider->hUpstream, lowPtr, highPtr, totalSize); if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("upstream provider failed to merge regions"); - goto err; + LOG_WARN("upstream provider failed to merge regions"); + goto not_merged; } // We'll have a duplicate entry for the range [highPtr, highValue->size] but this is fine, @@ -316,12 +332,16 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, umf_ba_free(provider->hTracker->tracker_allocator, erasedhighValue); - util_mutex_unlock(&provider->hTracker->splitMergeMutex); + utils_mutex_unlock(&provider->hTracker->splitMergeMutex); return UMF_RESULT_SUCCESS; err: - util_mutex_unlock(&provider->hTracker->splitMergeMutex); + assert(0); + +not_merged: + utils_mutex_unlock(&provider->hTracker->splitMergeMutex); + err_lock: umf_ba_free(provider->hTracker->tracker_allocator, mergedValue); return ret; @@ -329,6 +349,7 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, static umf_result_t trackingFree(void *hProvider, void *ptr, size_t size) { umf_result_t ret; + umf_result_t ret_remove = UMF_RESULT_ERROR_UNKNOWN; umf_tracking_memory_provider_t *p = (umf_tracking_memory_provider_t *)hProvider; @@ -337,13 +358,13 @@ static umf_result_t trackingFree(void *hProvider, void *ptr, size_t size) { // could allocate the memory at address `ptr` before a call to umfMemoryTrackerRemove // resulting in inconsistent state. if (ptr) { - ret = umfMemoryTrackerRemove(p->hTracker, ptr); - if (ret != UMF_RESULT_SUCCESS) { + ret_remove = umfMemoryTrackerRemove(p->hTracker, ptr); + if (ret_remove != UMF_RESULT_SUCCESS) { // DO NOT return an error here, because the tracking provider // cannot change behaviour of the upstream provider. LOG_ERR("failed to remove the region from the tracker, ptr=%p, " "size=%zu, ret = %d", - ptr, size, ret); + ptr, size, ret_remove); } } @@ -353,7 +374,7 @@ static umf_result_t trackingFree(void *hProvider, void *ptr, size_t size) { ret = umfMemoryProviderPutIPCHandle(p->hUpstream, cache_value->providerIpcData); if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("upstream provider is failed to put IPC handle, ptr=%p, " + LOG_ERR("upstream provider failed to put IPC handle, ptr=%p, " "size=%zu, ret = %d", ptr, size, ret); } @@ -362,7 +383,13 @@ static umf_result_t trackingFree(void *hProvider, void *ptr, size_t size) { ret = umfMemoryProviderFree(p->hUpstream, ptr, size); if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("upstream provider is failed to free the memory"); + LOG_ERR("upstream provider failed to free the memory"); + // Do not add memory back to the tracker, + // if it had not been removed. + if (ret_remove != UMF_RESULT_SUCCESS) { + return ret; + } + if (umfMemoryTrackerAdd(p->hTracker, p->pool, ptr, size) != UMF_RESULT_SUCCESS) { LOG_ERR( @@ -392,9 +419,11 @@ static umf_result_t trackingInitialize(void *params, void **ret) { return UMF_RESULT_SUCCESS; } -#ifndef NDEBUG -static void check_if_tracker_is_empty(umf_memory_tracker_handle_t hTracker, - umf_memory_pool_handle_t pool) { +// TODO clearing the tracker is a temporary solution and should be removed. +// The tracker should be cleared using the provider's free() operation. +static void clear_tracker_for_the_pool(umf_memory_tracker_handle_t hTracker, + umf_memory_pool_handle_t pool, + bool upstreamDoesNotFree) { uintptr_t rkey; void *rvalue; size_t n_items = 0; @@ -403,39 +432,57 @@ static void check_if_tracker_is_empty(umf_memory_tracker_handle_t hTracker, while (1 == critnib_find((critnib *)hTracker->map, last_key, FIND_G, &rkey, &rvalue)) { tracker_value_t *value = (tracker_value_t *)rvalue; - if (value->pool == pool || pool == NULL) { - n_items++; + if (value->pool != pool && pool != NULL) { + last_key = rkey; + continue; } + n_items++; + + void *removed_value = critnib_remove(hTracker->map, rkey); + assert(removed_value == rvalue); + umf_ba_free(hTracker->tracker_allocator, removed_value); + last_key = rkey; } - if (n_items) { - // Do not assert if we are running in the proxy library, - // because it may need those resources till - // the very end of exiting the application. - if (!util_is_running_in_proxy_lib()) { - if (pool) { - LOG_ERR("tracking provider of pool %p is not empty! " - "(%zu items left)", - (void *)pool, n_items); - } else { - LOG_ERR("tracking provider is not empty! (%zu items " - "left)", - n_items); - } +#ifndef NDEBUG + // print error messages only if provider supports the free() operation + if (n_items && !upstreamDoesNotFree) { + if (pool) { + LOG_ERR( + "tracking provider of pool %p is not empty! (%zu items left)", + (void *)pool, n_items); + } else { + LOG_ERR("tracking provider is not empty! (%zu items left)", + n_items); } } +#else /* DEBUG */ + (void)upstreamDoesNotFree; // unused in DEBUG build + (void)n_items; // unused in DEBUG build +#endif /* DEBUG */ +} + +static void clear_tracker(umf_memory_tracker_handle_t hTracker) { + clear_tracker_for_the_pool(hTracker, NULL, false); } -#endif /* NDEBUG */ static void trackingFinalize(void *provider) { umf_tracking_memory_provider_t *p = (umf_tracking_memory_provider_t *)provider; + + umfIpcHandleMappedCacheDestroy(p->hIpcMappedCache); + critnib_delete(p->ipcCache); -#ifndef NDEBUG - check_if_tracker_is_empty(p->hTracker, p->pool); -#endif /* NDEBUG */ + + // Do not clear the tracker if we are running in the proxy library, + // because it may need those resources till + // the very end of exiting the application. + if (!utils_is_running_in_proxy_lib()) { + clear_tracker_for_the_pool(p->hTracker, p->pool, + p->upstreamDoesNotFree); + } umf_ba_global_free(provider); } @@ -486,6 +533,11 @@ static umf_result_t trackingGetIpcHandleSize(void *provider, size_t *size) { return umfMemoryProviderGetIPCHandleSize(p->hUpstream, size); } +static inline umf_ipc_data_t *getIpcDataFromIpcHandle(void *providerIpcData) { + return (umf_ipc_data_t *)((uint8_t *)providerIpcData - + sizeof(umf_ipc_data_t)); +} + static umf_result_t trackingGetIpcHandle(void *provider, const void *ptr, size_t size, void *providerIpcData) { umf_tracking_memory_provider_t *p = @@ -493,47 +545,39 @@ static umf_result_t trackingGetIpcHandle(void *provider, const void *ptr, umf_result_t ret = UMF_RESULT_SUCCESS; size_t ipcDataSize = 0; int cached = 0; + ipc_cache_value_t *cache_value = NULL; + umf_ipc_data_t *ipcUmfData = getIpcDataFromIpcHandle(providerIpcData); + do { void *value = critnib_get(p->ipcCache, (uintptr_t)ptr); if (value) { //cache hit - ipc_cache_value_t *cache_value = (ipc_cache_value_t *)value; - memcpy(providerIpcData, cache_value->providerIpcData, - cache_value->ipcDataSize); + cache_value = (ipc_cache_value_t *)value; cached = 1; - } else { - ret = umfMemoryProviderGetIPCHandle(p->hUpstream, ptr, size, - providerIpcData); - if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("upstream provider is failed to get IPC handle"); - return ret; - } - + } else { //cache miss ret = umfMemoryProviderGetIPCHandleSize(p->hUpstream, &ipcDataSize); if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("upstream provider is failed to get the size of IPC " + LOG_ERR("upstream provider failed to get the size of IPC " "handle"); - ret = umfMemoryProviderPutIPCHandle(p->hUpstream, - providerIpcData); - if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("upstream provider is failed to put IPC handle"); - } return ret; } size_t value_size = sizeof(ipc_cache_value_t) + ipcDataSize; - ipc_cache_value_t *cache_value = umf_ba_global_alloc(value_size); + cache_value = umf_ba_global_alloc(value_size); if (!cache_value) { LOG_ERR("failed to allocate cache_value"); - ret = umfMemoryProviderPutIPCHandle(p->hUpstream, - providerIpcData); - if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("upstream provider is failed to put IPC handle"); - } return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } + ret = umfMemoryProviderGetIPCHandle(p->hUpstream, ptr, size, + cache_value->providerIpcData); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("upstream provider failed to get IPC handle"); + umf_ba_global_free(cache_value); + return ret; + } + + cache_value->handle_id = utils_atomic_increment(&IPC_HANDLE_ID); cache_value->ipcDataSize = ipcDataSize; - memcpy(cache_value->providerIpcData, providerIpcData, ipcDataSize); int insRes = critnib_insert(p->ipcCache, (uintptr_t)ptr, (void *)cache_value, 0 /*update*/); @@ -549,11 +593,11 @@ static umf_result_t trackingGetIpcHandle(void *provider, const void *ptr, // But this case should be rare enough. // 2. critnib failed to allocate memory internally. We need // to cleanup and return corresponding error. + ret = umfMemoryProviderPutIPCHandle( + p->hUpstream, cache_value->providerIpcData); umf_ba_global_free(cache_value); - ret = umfMemoryProviderPutIPCHandle(p->hUpstream, - providerIpcData); if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("upstream provider is failed to put IPC handle"); + LOG_ERR("upstream provider failed to put IPC handle"); return ret; } if (insRes == ENOMEM) { @@ -564,6 +608,10 @@ static umf_result_t trackingGetIpcHandle(void *provider, const void *ptr, } } while (!cached); + memcpy(providerIpcData, cache_value->providerIpcData, + cache_value->ipcDataSize); + ipcUmfData->handle_id = cache_value->handle_id; + return ret; } @@ -576,17 +624,65 @@ static umf_result_t trackingPutIpcHandle(void *provider, return UMF_RESULT_SUCCESS; } -static size_t getDataSizeFromIpcHandle(const void *providerIpcData) { - // This is hack to get size of memory pointed by IPC handle. - // tracking memory provider gets only provider-specific data - // pointed by providerIpcData, but the size of allocation tracked - // by umf_ipc_data_t. We use this trick to get pointer to - // umf_ipc_data_t data because the providerIpcData is - // the Flexible Array Member of umf_ipc_data_t. - const umf_ipc_data_t *ipcUmfData = - (const umf_ipc_data_t *)((const uint8_t *)providerIpcData - - sizeof(umf_ipc_data_t)); - return ipcUmfData->baseSize; +static void +ipcMappedCacheEvictionCallback(const ipc_mapped_handle_cache_key_t *key, + const ipc_mapped_handle_cache_value_t *value) { + umf_tracking_memory_provider_t *p = + (umf_tracking_memory_provider_t *)key->local_provider; + // umfMemoryTrackerRemove should be called before umfMemoryProviderCloseIPCHandle + // to avoid a race condition. If the order would be different, other thread + // could allocate the memory at address `ptr` before a call to umfMemoryTrackerRemove + // resulting in inconsistent state. + if (value->mapped_base_ptr) { + umf_result_t ret = + umfMemoryTrackerRemove(p->hTracker, value->mapped_base_ptr); + if (ret != UMF_RESULT_SUCCESS) { + // DO NOT return an error here, because the tracking provider + // cannot change behaviour of the upstream provider. + LOG_ERR("failed to remove the region from the tracker, ptr=%p, " + "size=%zu, ret = %d", + value->mapped_base_ptr, value->mapped_size, ret); + } + } + umf_result_t ret = umfMemoryProviderCloseIPCHandle( + p->hUpstream, value->mapped_base_ptr, value->mapped_size); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("provider failed to close IPC handle, ptr=%p, size=%zu", + value->mapped_base_ptr, value->mapped_size); + } +} + +static umf_result_t upstreamOpenIPCHandle(umf_tracking_memory_provider_t *p, + void *providerIpcData, + size_t bufferSize, void **ptr) { + void *mapped_ptr = NULL; + assert(p != NULL); + assert(ptr != NULL); + umf_result_t ret = umfMemoryProviderOpenIPCHandle( + p->hUpstream, providerIpcData, &mapped_ptr); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("upstream provider failed to open IPC handle"); + return ret; + } + assert(mapped_ptr != NULL); + + ret = umfMemoryTrackerAdd(p->hTracker, p->pool, mapped_ptr, bufferSize); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("failed to add IPC region to the tracker, ptr=%p, " + "size=%zu, " + "ret = %d", + mapped_ptr, bufferSize, ret); + if (umfMemoryProviderCloseIPCHandle(p->hUpstream, mapped_ptr, + bufferSize)) { + LOG_ERR("upstream provider failed to close IPC handle, " + "ptr=%p, size=%zu", + mapped_ptr, bufferSize); + } + return ret; + } + + *ptr = mapped_ptr; + return UMF_RESULT_SUCCESS; } static umf_result_t trackingOpenIpcHandle(void *provider, void *providerIpcData, @@ -596,47 +692,67 @@ static umf_result_t trackingOpenIpcHandle(void *provider, void *providerIpcData, umf_result_t ret = UMF_RESULT_SUCCESS; assert(p->hUpstream); + assert(p->hIpcMappedCache); - ret = umfMemoryProviderOpenIPCHandle(p->hUpstream, providerIpcData, ptr); + umf_ipc_data_t *ipcUmfData = getIpcDataFromIpcHandle(providerIpcData); + + // Compiler may add paddings to the ipc_mapped_handle_cache_key_t structure + // so we need to zero it out to avoid false cache miss. + ipc_mapped_handle_cache_key_t key = {0}; + key.remote_base_ptr = ipcUmfData->base; + key.local_provider = provider; + key.remote_pid = ipcUmfData->pid; + + ipc_mapped_handle_cache_value_t *cache_entry = NULL; + ret = umfIpcHandleMappedCacheGet(p->hIpcMappedCache, &key, + ipcUmfData->handle_id, &cache_entry); if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("upstream provider is failed to open IPC handle"); + LOG_ERR("failed to get cache entry"); return ret; } - size_t bufferSize = getDataSizeFromIpcHandle(providerIpcData); - ret = umfMemoryTrackerAdd(p->hTracker, p->pool, *ptr, bufferSize); - if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("failed to add IPC region to the tracker, ptr=%p, size=%zu, " - "ret = %d", - *ptr, bufferSize, ret); - if (umfMemoryProviderCloseIPCHandle(p->hUpstream, *ptr, bufferSize)) { - LOG_ERR("upstream provider is failed to close IPC handle, ptr=%p, " - "size=%zu", - *ptr, bufferSize); + + assert(cache_entry != NULL); + + void *mapped_ptr = NULL; + utils_atomic_load_acquire(&(cache_entry->mapped_base_ptr), &mapped_ptr); + if (mapped_ptr == NULL) { + utils_mutex_lock(&(cache_entry->mmap_lock)); + utils_atomic_load_acquire(&(cache_entry->mapped_base_ptr), &mapped_ptr); + if (mapped_ptr == NULL) { + ret = upstreamOpenIPCHandle(p, providerIpcData, + ipcUmfData->baseSize, &mapped_ptr); + if (ret == UMF_RESULT_SUCCESS) { + // Put to the cache + cache_entry->mapped_size = ipcUmfData->baseSize; + utils_atomic_store_release(&(cache_entry->mapped_base_ptr), + mapped_ptr); + } } + utils_mutex_unlock(&(cache_entry->mmap_lock)); } + + if (ret == UMF_RESULT_SUCCESS) { + *ptr = mapped_ptr; + } + return ret; } static umf_result_t trackingCloseIpcHandle(void *provider, void *ptr, size_t size) { - umf_tracking_memory_provider_t *p = - (umf_tracking_memory_provider_t *)provider; - - // umfMemoryTrackerRemove should be called before umfMemoryProviderFree - // to avoid a race condition. If the order would be different, other thread - // could allocate the memory at address `ptr` before a call to umfMemoryTrackerRemove - // resulting in inconsistent state. - if (ptr) { - umf_result_t ret = umfMemoryTrackerRemove(p->hTracker, ptr); - if (ret != UMF_RESULT_SUCCESS) { - // DO NOT return an error here, because the tracking provider - // cannot change behaviour of the upstream provider. - LOG_ERR("failed to remove the region from the tracker, ptr=%p, " - "size=%zu, ret = %d", - ptr, size, ret); - } - } - return umfMemoryProviderCloseIPCHandle(p->hUpstream, ptr, size); + (void)provider; + (void)ptr; + (void)size; + // We keep opened IPC handles in the p->hIpcMappedCache. + // IPC handle is closed when it is evicted from the cache + // or when cache is destroyed. + // + // TODO: today the size of the IPC cache is infinite. + // When the threshold for the cache size is implemented + // we need to introduce a reference counting mechanism. + // The trackingOpenIpcHandle will increment the refcount for the corresponding entry. + // The trackingCloseIpcHandle will decrement the refcount for the corresponding cache entry. + return UMF_RESULT_SUCCESS; } umf_memory_provider_ops_t UMF_TRACKING_MEMORY_PROVIDER_OPS = { @@ -644,11 +760,11 @@ umf_memory_provider_ops_t UMF_TRACKING_MEMORY_PROVIDER_OPS = { .initialize = trackingInitialize, .finalize = trackingFinalize, .alloc = trackingAlloc, - .free = trackingFree, .get_last_native_error = trackingGetLastError, .get_min_page_size = trackingGetMinPageSize, .get_recommended_page_size = trackingGetRecommendedPageSize, .get_name = trackingName, + .ext.free = trackingFree, .ext.purge_force = trackingPurgeForce, .ext.purge_lazy = trackingPurgeLazy, .ext.allocation_split = trackingAllocationSplit, @@ -661,10 +777,11 @@ umf_memory_provider_ops_t UMF_TRACKING_MEMORY_PROVIDER_OPS = { umf_result_t umfTrackingMemoryProviderCreate( umf_memory_provider_handle_t hUpstream, umf_memory_pool_handle_t hPool, - umf_memory_provider_handle_t *hTrackingProvider) { + umf_memory_provider_handle_t *hTrackingProvider, bool upstreamDoesNotFree) { umf_tracking_memory_provider_t params; params.hUpstream = hUpstream; + params.upstreamDoesNotFree = upstreamDoesNotFree; params.hTracker = TRACKER; if (!params.hTracker) { LOG_ERR("failed, TRACKER is NULL"); @@ -677,10 +794,14 @@ umf_result_t umfTrackingMemoryProviderCreate( return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } + params.hIpcMappedCache = + umfIpcHandleMappedCacheCreate(ipcMappedCacheEvictionCallback); + LOG_DEBUG("upstream=%p, tracker=%p, " - "pool=%p, ipcCache=%p", + "pool=%p, ipcCache=%p, hIpcMappedCache=%p", (void *)params.hUpstream, (void *)params.hTracker, - (void *)params.pool, (void *)params.ipcCache); + (void *)params.pool, (void *)params.ipcCache, + (void *)params.hIpcMappedCache); return umfMemoryProviderCreate(&UMF_TRACKING_MEMORY_PROVIDER_OPS, ¶ms, hTrackingProvider); @@ -710,7 +831,7 @@ umf_memory_tracker_handle_t umfMemoryTrackerCreate(void) { handle->tracker_allocator = tracker_allocator; - void *mutex_ptr = util_mutex_init(&handle->splitMergeMutex); + void *mutex_ptr = utils_mutex_init(&handle->splitMergeMutex); if (!mutex_ptr) { goto err_destroy_tracker_allocator; } @@ -726,7 +847,7 @@ umf_memory_tracker_handle_t umfMemoryTrackerCreate(void) { return handle; err_destroy_mutex: - util_mutex_destroy_not_free(&handle->splitMergeMutex); + utils_mutex_destroy_not_free(&handle->splitMergeMutex); err_destroy_tracker_allocator: umf_ba_destroy(tracker_allocator); err_free_handle: @@ -739,23 +860,21 @@ void umfMemoryTrackerDestroy(umf_memory_tracker_handle_t handle) { return; } - // Do not destroy if we are running in the proxy library, + // Do not destroy the tracker if we are running in the proxy library, // because it may need those resources till // the very end of exiting the application. - if (util_is_running_in_proxy_lib()) { + if (utils_is_running_in_proxy_lib()) { return; } -#ifndef NDEBUG - check_if_tracker_is_empty(handle, NULL); -#endif /* NDEBUG */ + clear_tracker(handle); // We have to zero all inner pointers, // because the tracker handle can be copied // and used in many places. critnib_delete(handle->map); handle->map = NULL; - util_mutex_destroy_not_free(&handle->splitMergeMutex); + utils_mutex_destroy_not_free(&handle->splitMergeMutex); umf_ba_destroy(handle->tracker_allocator); handle->tracker_allocator = NULL; umf_ba_global_free(handle); diff --git a/src/provider/provider_tracking.h b/src/provider/provider_tracking.h index 585b4fe5c..9444ee475 100644 --- a/src/provider/provider_tracking.h +++ b/src/provider/provider_tracking.h @@ -11,6 +11,7 @@ #define UMF_MEMORY_TRACKER_INTERNAL_H 1 #include +#include #include #include @@ -28,7 +29,7 @@ extern "C" { struct umf_memory_tracker_t { umf_ba_pool_t *tracker_allocator; critnib *map; - os_mutex_t splitMergeMutex; + utils_mutex_t splitMergeMutex; }; typedef struct umf_memory_tracker_t *umf_memory_tracker_handle_t; @@ -53,7 +54,7 @@ umf_result_t umfMemoryTrackerGetAllocInfo(const void *ptr, // forwards all requests to hUpstream memory Provider. hUpstream lifetime should be managed by the user of this function. umf_result_t umfTrackingMemoryProviderCreate( umf_memory_provider_handle_t hUpstream, umf_memory_pool_handle_t hPool, - umf_memory_provider_handle_t *hTrackingProvider); + umf_memory_provider_handle_t *hTrackingProvider, bool upstreamDoesNotFree); void umfTrackingMemoryProviderGetUpstreamProvider( umf_memory_provider_handle_t hTrackingProvider, diff --git a/src/proxy_lib/proxy_lib.c b/src/proxy_lib/proxy_lib.c index 6c3ffa272..f8bae304d 100644 --- a/src/proxy_lib/proxy_lib.c +++ b/src/proxy_lib/proxy_lib.c @@ -27,6 +27,12 @@ * - _aligned_offset_recalloc() */ +#ifndef _WIN32 +#define _GNU_SOURCE // for RTLD_NEXT +#include +#undef _GNU_SOURCE +#endif /* _WIN32 */ + #if (defined PROXY_LIB_USES_JEMALLOC_POOL) #include #define umfPoolManagerOps umfJemallocPoolOps @@ -38,6 +44,7 @@ #endif #include +#include #include #include @@ -47,6 +54,7 @@ #include "base_alloc_linear.h" #include "proxy_lib.h" #include "utils_common.h" +#include "utils_load_library.h" #include "utils_log.h" #ifdef _WIN32 /* Windows ***************************************/ @@ -58,7 +66,7 @@ #define UTIL_ONCE_FLAG INIT_ONCE #define UTIL_ONCE_FLAG_INIT INIT_ONCE_STATIC_INIT -void util_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); +void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); #else /* Linux *************************************************/ @@ -93,6 +101,24 @@ void util_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); * of a UMF pool to allocate memory needed by an application. It should be freed * by an application. */ +#ifndef _WIN32 +typedef void *(*system_aligned_alloc_t)(size_t alignment, size_t size); +typedef void *(*system_calloc_t)(size_t nmemb, size_t size); +typedef void (*system_free_t)(void *ptr); +typedef void *(*system_malloc_t)(size_t size); +typedef size_t (*system_malloc_usable_size_t)(void *ptr); +typedef void *(*system_realloc_t)(void *ptr, size_t size); + +// pointers to the default system allocator's API +static system_aligned_alloc_t System_aligned_alloc; +static system_calloc_t System_calloc; +static system_free_t System_free; +static system_malloc_t System_malloc; +static system_malloc_usable_size_t System_malloc_usable_size; +static system_realloc_t System_realloc; + +static size_t Size_threshold_value = 0; +#endif /* _WIN32 */ static UTIL_ONCE_FLAG Base_alloc_leak_initialized = UTIL_ONCE_FLAG_INIT; static umf_ba_linear_pool_t *Base_alloc_leak = NULL; @@ -102,67 +128,140 @@ static umf_memory_pool_handle_t Proxy_pool = NULL; // it protects us from recursion in umfPool*() static __TLS int was_called_from_umfPool = 0; +// This WA for the issue: +// https://github.com/oneapi-src/unified-memory-framework/issues/894 +// It protects us from a recursion in malloc_usable_size() +// when the JEMALLOC proxy_lib_pool is used. +// TODO remove this WA when the issue is fixed. +static __TLS int was_called_from_malloc_usable_size = 0; + /*****************************************************************************/ /*** The constructor and destructor of the proxy library *********************/ /*****************************************************************************/ +#ifndef _WIN32 +static size_t get_size_threshold(void) { + char *str_threshold = utils_env_var_get_str("UMF_PROXY", "size.threshold="); + LOG_DEBUG("UMF_PROXY[size.threshold] = %s", str_threshold); + long threshold = utils_get_size_threshold(str_threshold); + if (threshold < 0) { + LOG_ERR("incorrect size threshold: %s", str_threshold); + exit(-1); + } + + return (size_t)threshold; +} + +static int get_system_allocator_symbols(void) { + *((void **)(&System_aligned_alloc)) = + utils_get_symbol_addr(RTLD_NEXT, "aligned_alloc", NULL); + *((void **)(&System_calloc)) = + utils_get_symbol_addr(RTLD_NEXT, "calloc", NULL); + *((void **)(&System_free)) = utils_get_symbol_addr(RTLD_NEXT, "free", NULL); + *((void **)(&System_malloc)) = + utils_get_symbol_addr(RTLD_NEXT, "malloc", NULL); + *((void **)(&System_malloc_usable_size)) = + utils_get_symbol_addr(RTLD_NEXT, "malloc_usable_size", NULL); + *((void **)(&System_realloc)) = + utils_get_symbol_addr(RTLD_NEXT, "realloc", NULL); + + if (System_aligned_alloc && System_calloc && System_free && System_malloc && + System_malloc_usable_size && System_realloc) { + return 0; + } + + return -1; +} +#endif /* _WIN32 */ + void proxy_lib_create_common(void) { - util_log_init(); - umf_os_memory_provider_params_t os_params = - umfOsMemoryProviderParamsDefault(); + utils_log_init(); + umf_os_memory_provider_params_handle_t os_params = NULL; umf_result_t umf_result; + umf_result = umfOsMemoryProviderParamsCreate(&os_params); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("creating OS memory provider params failed"); + exit(-1); + } + #ifndef _WIN32 -#define NAME_MAX 255 - char shm_name[NAME_MAX]; + size_t _threshold = get_size_threshold(); + if (_threshold > 0) { + if (get_system_allocator_symbols()) { + LOG_ERR("initialization of the system allocator failed!"); + exit(-1); + } - if (util_env_var_has_str("UMF_PROXY", "page.disposition=shared-fd")) { - LOG_DEBUG("proxy_lib: using the MAP_SHARED visibility mode with the " - "file descriptor duplication"); - os_params.visibility = UMF_MEM_MAP_SHARED; - os_params.shm_name = NULL; + Size_threshold_value = _threshold; + LOG_INFO("system allocator initialized, size threshold value = %zu", + Size_threshold_value); + } - } else if (util_env_var_has_str("UMF_PROXY", - "page.disposition=shared-shm")) { - LOG_DEBUG("proxy_lib: using the MAP_SHARED visibility mode with the " - "named shared memory"); - os_params.visibility = UMF_MEM_MAP_SHARED; + if (utils_env_var_has_str("UMF_PROXY", "page.disposition=shared-fd")) { + LOG_INFO("proxy_lib: using the MAP_SHARED visibility mode with the " + "file descriptor duplication"); + umf_result = umfOsMemoryProviderParamsSetVisibility(os_params, + UMF_MEM_MAP_SHARED); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("setting visibility mode failed"); + exit(-1); + } + umf_result = umfOsMemoryProviderParamsSetShmName(os_params, NULL); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("setting shared memory name failed"); + exit(-1); + } + } else if (utils_env_var_has_str("UMF_PROXY", + "page.disposition=shared-shm")) { + umf_result = umfOsMemoryProviderParamsSetVisibility(os_params, + UMF_MEM_MAP_SHARED); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("setting visibility mode failed"); + exit(-1); + } + char shm_name[NAME_MAX]; memset(shm_name, 0, NAME_MAX); sprintf(shm_name, "umf_proxy_lib_shm_pid_%i", utils_getpid()); - os_params.shm_name = shm_name; + umf_result = umfOsMemoryProviderParamsSetShmName(os_params, shm_name); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("setting shared memory name failed"); + exit(-1); + } - LOG_DEBUG("proxy_lib: using the MAP_SHARED visibility mode with the " - "named shared memory: %s", - os_params.shm_name); + LOG_INFO("proxy_lib: using the MAP_SHARED visibility mode with the " + "named shared memory: %s", + shm_name); } -#undef NAME_MAX -#endif +#endif /* _WIN32 */ - umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), &os_params, + umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), os_params, &OS_memory_provider); + umfOsMemoryProviderParamsDestroy(os_params); if (umf_result != UMF_RESULT_SUCCESS) { LOG_ERR("creating OS memory provider failed"); exit(-1); } - umf_result = - umfPoolCreate(umfPoolManagerOps(), OS_memory_provider, NULL, - UMF_POOL_CREATE_FLAG_DISABLE_TRACKING, &Proxy_pool); + umf_result = umfPoolCreate(umfPoolManagerOps(), OS_memory_provider, NULL, 0, + &Proxy_pool); if (umf_result != UMF_RESULT_SUCCESS) { LOG_ERR("creating UMF pool manager failed"); exit(-1); } + // The UMF pool has just been created (Proxy_pool != NULL). Stop using // the linear allocator and start using the UMF pool allocator from now on. + LOG_DEBUG("proxy library initialized"); } void proxy_lib_destroy_common(void) { - if (util_is_running_in_proxy_lib()) { + if (utils_is_running_in_proxy_lib()) { // We cannot destroy 'Base_alloc_leak' nor 'Proxy_pool' nor 'OS_memory_provider', // because it could lead to use-after-free in the program's unloader // (for example _dl_fini() on Linux). - return; + goto fini_proxy_lib_destroy_common; } umf_memory_pool_handle_t pool = Proxy_pool; @@ -172,6 +271,10 @@ void proxy_lib_destroy_common(void) { umf_memory_provider_handle_t provider = OS_memory_provider; OS_memory_provider = NULL; umfMemoryProviderDestroy(provider); + LOG_DEBUG("proxy library destroyed"); + +fini_proxy_lib_destroy_common: + LOG_DEBUG("proxy library finalized"); } /*****************************************************************************/ @@ -210,18 +313,24 @@ static void ba_leak_create(void) { Base_alloc_leak = umf_ba_linear_create(0); } // it does not implement destroy(), because we cannot destroy non-freed memory static void ba_leak_init_once(void) { - util_init_once(&Base_alloc_leak_initialized, ba_leak_create); + utils_init_once(&Base_alloc_leak_initialized, ba_leak_create); } -static inline void *ba_leak_malloc(size_t size) { +static inline void *ba_leak_aligned_alloc(size_t alignment, size_t size) { ba_leak_init_once(); - return umf_ba_linear_alloc(Base_alloc_leak, size); + void *ptr = umf_ba_linear_alloc(Base_alloc_leak, size + alignment); + return (void *)ALIGN_UP_SAFE((uintptr_t)ptr, alignment); +} + +static inline void *ba_leak_malloc(size_t size) { + return ba_leak_aligned_alloc(0, size); } static inline void *ba_leak_calloc(size_t nmemb, size_t size) { ba_leak_init_once(); // umf_ba_linear_alloc() returns zeroed memory - return umf_ba_linear_alloc(Base_alloc_leak, nmemb * size); + // so ba_leak_aligned_alloc() does too + return ba_leak_aligned_alloc(0, nmemb * size); } static inline void *ba_leak_realloc(void *ptr, size_t size, size_t max_size) { @@ -229,12 +338,6 @@ static inline void *ba_leak_realloc(void *ptr, size_t size, size_t max_size) { return ba_generic_realloc(Base_alloc_leak, ptr, size, max_size); } -static inline void *ba_leak_aligned_alloc(size_t alignment, size_t size) { - ba_leak_init_once(); - void *ptr = umf_ba_linear_alloc(Base_alloc_leak, size + alignment); - return (void *)ALIGN_UP((uintptr_t)ptr, alignment); -} - static inline int ba_leak_free(void *ptr) { ba_leak_init_once(); return umf_ba_linear_free(Base_alloc_leak, ptr); @@ -250,6 +353,12 @@ static inline size_t ba_leak_pool_contains_pointer(void *ptr) { /*****************************************************************************/ void *malloc(size_t size) { +#ifndef _WIN32 + if (size < Size_threshold_value) { + return System_malloc(size); + } +#endif /* _WIN32 */ + if (!was_called_from_umfPool && Proxy_pool) { was_called_from_umfPool = 1; void *ptr = umfPoolMalloc(Proxy_pool, size); @@ -261,6 +370,12 @@ void *malloc(size_t size) { } void *calloc(size_t nmemb, size_t size) { +#ifndef _WIN32 + if ((nmemb * size) < Size_threshold_value) { + return System_calloc(nmemb, size); + } +#endif /* _WIN32 */ + if (!was_called_from_umfPool && Proxy_pool) { was_called_from_umfPool = 1; void *ptr = umfPoolCalloc(Proxy_pool, nmemb, size); @@ -280,15 +395,22 @@ void free(void *ptr) { return; } - if (Proxy_pool) { + if (Proxy_pool && (umfPoolByPtr(ptr) == Proxy_pool)) { if (umfPoolFree(Proxy_pool, ptr) != UMF_RESULT_SUCCESS) { LOG_ERR("umfPoolFree() failed"); - assert(0); } return; } - assert(0); +#ifndef _WIN32 + if (Size_threshold_value) { + System_free(ptr); + return; + } +#endif /* _WIN32 */ + + LOG_ERR("free() failed: %p", ptr); + return; } @@ -307,18 +429,31 @@ void *realloc(void *ptr, size_t size) { return ba_leak_realloc(ptr, size, leak_pool_contains_pointer); } - if (Proxy_pool) { + if (Proxy_pool && (umfPoolByPtr(ptr) == Proxy_pool)) { was_called_from_umfPool = 1; void *new_ptr = umfPoolRealloc(Proxy_pool, ptr, size); was_called_from_umfPool = 0; return new_ptr; } - assert(0); +#ifndef _WIN32 + if (Size_threshold_value) { + return System_realloc(ptr, size); + } +#endif /* _WIN32 */ + + LOG_ERR("realloc() failed: %p", ptr); + return NULL; } void *aligned_alloc(size_t alignment, size_t size) { +#ifndef _WIN32 + if (size < Size_threshold_value) { + return System_aligned_alloc(alignment, size); + } +#endif /* _WIN32 */ + if (!was_called_from_umfPool && Proxy_pool) { was_called_from_umfPool = 1; void *ptr = umfPoolAlignedMalloc(Proxy_pool, size, alignment); @@ -334,19 +469,33 @@ size_t _msize(void *ptr) { #else size_t malloc_usable_size(void *ptr) { #endif - - // a check to verify we are running the proxy library + // a check to verify if we are running the proxy library if (ptr == (void *)0x01) { return 0xDEADBEEF; } - if (!was_called_from_umfPool && Proxy_pool) { + if (ba_leak_pool_contains_pointer(ptr)) { + return 0; // unsupported in case of the ba_leak allocator + } + + if (!was_called_from_malloc_usable_size && Proxy_pool && + (umfPoolByPtr(ptr) == Proxy_pool)) { + was_called_from_malloc_usable_size = 1; was_called_from_umfPool = 1; size_t size = umfPoolMallocUsableSize(Proxy_pool, ptr); was_called_from_umfPool = 0; + was_called_from_malloc_usable_size = 0; return size; } +#ifndef _WIN32 + if (!was_called_from_malloc_usable_size && Size_threshold_value) { + return System_malloc_usable_size(ptr); + } +#endif /* _WIN32 */ + + LOG_ERR("malloc_usable_size() failed: %p", ptr); + return 0; // unsupported in this case } diff --git a/src/ravl/ravl.c b/src/ravl/ravl.c new file mode 100644 index 000000000..dd6c17b03 --- /dev/null +++ b/src/ravl/ravl.c @@ -0,0 +1,557 @@ +/* + * + * Copyright (C) 2018-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +/* + * ravl.c -- implementation of a RAVL tree + * https://sidsen.azurewebsites.net//papers/ravl-trees-journal.pdf + */ + +#include "ravl.h" +#include "../src/utils/utils_common.h" +#include "../src/utils/utils_concurrency.h" +#include "assert.h" +#include "base_alloc_global.h" + +#include +#include +#include +#include + +#define RAVL_DEFAULT_DATA_SIZE (sizeof(void *)) + +enum ravl_slot_type { + RAVL_LEFT, + RAVL_RIGHT, + + MAX_SLOTS, + + RAVL_ROOT +}; + +struct ravl_node { + struct ravl_node *parent; + struct ravl_node *slots[MAX_SLOTS]; + int32_t rank; /* cannot be greater than height of the subtree */ + int32_t pointer_based; + char data[]; +}; + +struct ravl { + struct ravl_node *root; + ravl_compare *compare; + size_t data_size; +}; + +/* + * ravl_new -- creates a new ravl tree instance + */ +struct ravl *ravl_new_sized(ravl_compare *compare, size_t data_size) { + struct ravl *r = umf_ba_global_alloc(sizeof(*r)); + if (r == NULL) { + return NULL; + } + + r->compare = compare; + r->root = NULL; + r->data_size = data_size; + + return r; +} + +/* + * ravl_new -- creates a new tree that stores data pointers + */ +struct ravl *ravl_new(ravl_compare *compare) { + return ravl_new_sized(compare, RAVL_DEFAULT_DATA_SIZE); +} + +/* + * ravl_clear_node -- (internal) recursively clears the given subtree, + * calls callback in an in-order fashion. Optionally frees the given node. + */ +static void ravl_foreach_node(struct ravl_node *n, ravl_cb cb, void *arg, + int free_node) { + if (n == NULL) { + return; + } + + ravl_foreach_node(n->slots[RAVL_LEFT], cb, arg, free_node); + if (cb) { + cb((void *)n->data, arg); + } + ravl_foreach_node(n->slots[RAVL_RIGHT], cb, arg, free_node); + + if (free_node) { + umf_ba_global_free(n); + } +} + +/* + * ravl_clear -- clears the entire tree, starting from the root + */ +void ravl_clear(struct ravl *ravl) { + ravl_foreach_node(ravl->root, NULL, NULL, 1); + ravl->root = NULL; +} + +/* + * ravl_delete_cb -- clears and deletes the given ravl instance, calls callback + */ +void ravl_delete_cb(struct ravl *ravl, ravl_cb cb, void *arg) { + ravl_foreach_node(ravl->root, cb, arg, 1); + umf_ba_global_free(ravl); +} + +/* + * ravl_delete -- clears and deletes the given ravl instance + */ +void ravl_delete(struct ravl *ravl) { ravl_delete_cb(ravl, NULL, NULL); } + +/* + * ravl_foreach -- traverses the entire tree, calling callback for every node + */ +void ravl_foreach(struct ravl *ravl, ravl_cb cb, void *arg) { + ravl_foreach_node(ravl->root, cb, arg, 0); +} + +/* + * ravl_empty -- checks whether the given tree is empty + */ +int ravl_empty(struct ravl *ravl) { return ravl->root == NULL; } + +/* + * ravl_node_insert_constructor -- node data constructor for ravl_insert + */ +static void ravl_node_insert_constructor(void *data, size_t data_size, + const void *arg) { + /* suppress unused-parameter errors */ + (void)data_size; + + /* copy only the 'arg' pointer */ + memcpy(data, &arg, sizeof(arg)); +} + +/* + * ravl_node_copy_constructor -- node data constructor for ravl_emplace_copy + */ +static void ravl_node_copy_constructor(void *data, size_t data_size, + const void *arg) { + memcpy(data, arg, data_size); +} + +/* + * ravl_new_node -- (internal) allocates and initializes a new node + */ +static struct ravl_node *ravl_new_node(struct ravl *ravl, ravl_constr constr, + const void *arg) { + struct ravl_node *n = umf_ba_global_alloc(sizeof(*n) + ravl->data_size); + if (n == NULL) { + return NULL; + } + + n->parent = NULL; + n->slots[RAVL_LEFT] = NULL; + n->slots[RAVL_RIGHT] = NULL; + n->rank = 0; + n->pointer_based = constr == ravl_node_insert_constructor; + constr(n->data, ravl->data_size, arg); + + return n; +} + +/* + * ravl_slot_opposite -- (internal) returns the opposite slot type, cannot be + * called for root type + */ +static enum ravl_slot_type ravl_slot_opposite(enum ravl_slot_type t) { + assert(t != RAVL_ROOT); + + return t == RAVL_LEFT ? RAVL_RIGHT : RAVL_LEFT; +} + +/* + * ravl_node_slot_type -- (internal) returns the type of the given node: + * left child, right child or root + */ +static enum ravl_slot_type ravl_node_slot_type(struct ravl_node *n) { + if (n->parent == NULL) { + return RAVL_ROOT; + } + + return n->parent->slots[RAVL_LEFT] == n ? RAVL_LEFT : RAVL_RIGHT; +} + +/* + * ravl_node_sibling -- (internal) returns the sibling of the given node, + * NULL if the node is root (has no parent) + */ +static struct ravl_node *ravl_node_sibling(struct ravl_node *n) { + enum ravl_slot_type t = ravl_node_slot_type(n); + if (t == RAVL_ROOT) { + return NULL; + } + + return n->parent->slots[t == RAVL_LEFT ? RAVL_RIGHT : RAVL_LEFT]; +} + +/* + * ravl_node_ref -- (internal) returns the pointer to the memory location in + * which the given node resides + */ +static struct ravl_node **ravl_node_ref(struct ravl *ravl, + struct ravl_node *n) { + enum ravl_slot_type t = ravl_node_slot_type(n); + + return t == RAVL_ROOT ? &ravl->root : &n->parent->slots[t]; +} + +/* + * ravl_rotate -- (internal) performs a rotation around a given node + * + * The node n swaps place with its parent. If n is right child, parent becomes + * the left child of n, otherwise parent becomes right child of n. + */ +static void ravl_rotate(struct ravl *ravl, struct ravl_node *n) { + assert(n->parent != NULL); + struct ravl_node *p = n->parent; + struct ravl_node **pref = ravl_node_ref(ravl, p); + + enum ravl_slot_type t = ravl_node_slot_type(n); + enum ravl_slot_type t_opposite = ravl_slot_opposite(t); + + n->parent = p->parent; + p->parent = n; + *pref = n; + + if ((p->slots[t] = n->slots[t_opposite]) != NULL) { + p->slots[t]->parent = p; + } + n->slots[t_opposite] = p; +} + +/* + * ravl_node_rank -- (internal) returns the rank of the node + * + * For the purpose of balancing, NULL nodes have rank -1. + */ +static int ravl_node_rank(struct ravl_node *n) { + return n == NULL ? -1 : n->rank; +} + +/* + * ravl_node_rank_difference_parent -- (internal) returns the rank different + * between parent node p and its child n + * + * Every rank difference must be positive. + * + * Either of these can be NULL. + */ +static int ravl_node_rank_difference_parent(struct ravl_node *p, + struct ravl_node *n) { + int rv = ravl_node_rank(p) - ravl_node_rank(n); + // assert to check integer overflow + // ravl_node_rank(x) is >= -1 + assert(rv <= ravl_node_rank(p) + 1); + return rv; +} + +/* + * ravl_node_rank_difference - (internal) returns the rank difference between + * parent and its child + * + * Can be used to check if a given node is an i-child. + */ +static int ravl_node_rank_difference(struct ravl_node *n) { + return ravl_node_rank_difference_parent(n->parent, n); +} + +/* + * ravl_node_is_i_j -- (internal) checks if a given node is strictly i,j-node + */ +static int ravl_node_is_i_j(struct ravl_node *n, int i, int j) { + return (ravl_node_rank_difference_parent(n, n->slots[RAVL_LEFT]) == i && + ravl_node_rank_difference_parent(n, n->slots[RAVL_RIGHT]) == j); +} + +/* + * ravl_node_is -- (internal) checks if a given node is i,j-node or j,i-node + */ +static int ravl_node_is(struct ravl_node *n, int i, int j) { + return ravl_node_is_i_j(n, i, j) || ravl_node_is_i_j(n, j, i); +} + +/* + * ravl_node_promote -- promotes a given node by increasing its rank + */ +static void ravl_node_promote(struct ravl_node *n) { n->rank += 1; } + +/* + * ravl_node_promote -- demotes a given node by increasing its rank + */ +static void ravl_node_demote(struct ravl_node *n) { + assert(n->rank > 0); + n->rank -= 1; +} + +/* + * ravl_balance -- balances the tree after insert + * + * This function must restore the invariant that every rank + * difference is positive. + */ +static void ravl_balance(struct ravl *ravl, struct ravl_node *n) { + /* walk up the tree, promoting nodes */ + while (n->parent && ravl_node_is(n->parent, 0, 1)) { + ravl_node_promote(n->parent); + n = n->parent; + } + + /* + * Either the rank rule holds or n is a 0-child whose sibling is an + * i-child with i > 1. + */ + struct ravl_node *s = ravl_node_sibling(n); + if (!(ravl_node_rank_difference(n) == 0 && + ravl_node_rank_difference_parent(n->parent, s) > 1)) { + return; + } + + struct ravl_node *y = n->parent; + /* if n is a left child, let z be n's right child and vice versa */ + enum ravl_slot_type t = ravl_slot_opposite(ravl_node_slot_type(n)); + struct ravl_node *z = n->slots[t]; + + if (z == NULL || ravl_node_rank_difference(z) == 2) { + ravl_rotate(ravl, n); + ravl_node_demote(y); + } else if (ravl_node_rank_difference(z) == 1) { + ravl_rotate(ravl, z); + ravl_rotate(ravl, z); + ravl_node_promote(z); + ravl_node_demote(n); + assert(y != NULL); + ravl_node_demote(y); + } +} + +/* + * ravl_insert -- insert data into the tree + */ +int ravl_insert(struct ravl *ravl, const void *data) { + return ravl_emplace(ravl, ravl_node_insert_constructor, data); +} + +/* + * ravl_insert -- copy construct data inside of a new tree node + */ +int ravl_emplace_copy(struct ravl *ravl, const void *data) { + return ravl_emplace(ravl, ravl_node_copy_constructor, data); +} + +/* + * ravl_emplace -- construct data inside of a new tree node + */ +int ravl_emplace(struct ravl *ravl, ravl_constr constr, const void *arg) { + struct ravl_node *n = ravl_new_node(ravl, constr, arg); + if (n == NULL) { + return -1; + } + + /* walk down the tree and insert the new node into a missing slot */ + struct ravl_node **dstp = &ravl->root; + struct ravl_node *dst = NULL; + while (*dstp != NULL) { + dst = (*dstp); + int cmp_result = ravl->compare(ravl_data(n), ravl_data(dst)); + if (cmp_result == 0) { + goto error_duplicate; + } + + dstp = &dst->slots[cmp_result > 0]; + } + n->parent = dst; + *dstp = n; + + ravl_balance(ravl, n); + + return 0; + +error_duplicate: + errno = EEXIST; + umf_ba_global_free(n); + return -1; +} + +/* + * ravl_node_type_most -- (internal) returns left-most or right-most node in + * the subtree + */ +static struct ravl_node *ravl_node_type_most(struct ravl_node *n, + enum ravl_slot_type t) { + while (n->slots[t] != NULL) { + n = n->slots[t]; + } + + return n; +} + +/* + * ravl_node_cessor -- (internal) returns the successor or predecessor of the + * node + */ +static struct ravl_node *ravl_node_cessor(struct ravl_node *n, + enum ravl_slot_type t) { + /* + * If t child is present, we are looking for t-opposite-most node + * in t child subtree + */ + if (n->slots[t]) { + return ravl_node_type_most(n->slots[t], ravl_slot_opposite(t)); + } + + /* otherwise get the first parent on the t path */ + while (n->parent != NULL && n == n->parent->slots[t]) { + n = n->parent; + } + + return n->parent; +} + +/* + * ravl_node_successor -- returns node's successor + * + * It's the first node larger than n. + */ +struct ravl_node *ravl_node_successor(struct ravl_node *n) { + return ravl_node_cessor(n, RAVL_RIGHT); +} + +/* + * ravl_node_predecessor -- returns node's successor + * + * It's the first node smaller than n. + */ +struct ravl_node *ravl_node_predecessor(struct ravl_node *n) { + return ravl_node_cessor(n, RAVL_LEFT); +} + +/* + * ravl_predicate_holds -- (internal) verifies the given predicate for + * the current node in the search path + * + * If the predicate holds for the given node or a node that can be directly + * derived from it, returns 1. Otherwise returns 0. + */ +static int ravl_predicate_holds(int result, struct ravl_node **ret, + struct ravl_node *n, + enum ravl_predicate flags) { + if (flags & RAVL_PREDICATE_EQUAL) { + if (result == 0) { + *ret = n; + return 1; + } + } + if (flags & RAVL_PREDICATE_GREATER) { + if (result < 0) { /* data < n->data */ + *ret = n; + return 0; + } else if (result == 0) { + *ret = ravl_node_successor(n); + return 1; + } + } + if (flags & RAVL_PREDICATE_LESS) { + if (result > 0) { /* data > n->data */ + *ret = n; + return 0; + } else if (result == 0) { + *ret = ravl_node_predecessor(n); + return 1; + } + } + + return 0; +} + +/* + * ravl_find -- searches for the node in the tree + */ +struct ravl_node *ravl_find(struct ravl *ravl, const void *data, + enum ravl_predicate flags) { + struct ravl_node *r = NULL; + struct ravl_node *n = ravl->root; + while (n) { + int result = ravl->compare(data, ravl_data(n)); + if (ravl_predicate_holds(result, &r, n, flags)) { + return r; + } + + n = n->slots[result > 0]; + } + + return r; +} + +/* + * ravl_remove -- removes the given node from the tree + */ +void ravl_remove(struct ravl *ravl, struct ravl_node *n) { + if (n->slots[RAVL_LEFT] != NULL && n->slots[RAVL_RIGHT] != NULL) { + /* if both children are present, remove the successor instead */ + struct ravl_node *s = ravl_node_successor(n); + memcpy(n->data, s->data, ravl->data_size); + + ravl_remove(ravl, s); + } else { + /* swap n with the child that may exist */ + struct ravl_node *r = + n->slots[RAVL_LEFT] ? n->slots[RAVL_LEFT] : n->slots[RAVL_RIGHT]; + if (r != NULL) { + r->parent = n->parent; + } + + *ravl_node_ref(ravl, n) = r; + umf_ba_global_free(n); + } +} + +/* + * ravl_data -- returns the data contained within the node + */ +void *ravl_data(struct ravl_node *node) { + if (node->pointer_based) { + void *data; + memcpy(&data, node->data, sizeof(void *)); + return data; + } else { + return (void *)node->data; + } +} + +/* + * ravl_first -- returns first (left-most) node in the tree + */ +struct ravl_node *ravl_first(struct ravl *ravl) { + if (ravl->root) { + return ravl_node_type_most(ravl->root, RAVL_LEFT); + } + + return NULL; +} + +/* + * ravl_last -- returns last (right-most) node in the tree + */ +struct ravl_node *ravl_last(struct ravl *ravl) { + if (ravl->root) { + return ravl_node_type_most(ravl->root, RAVL_RIGHT); + } + + return NULL; +} diff --git a/src/ravl/ravl.h b/src/ravl/ravl.h new file mode 100644 index 000000000..ae84d5a56 --- /dev/null +++ b/src/ravl/ravl.h @@ -0,0 +1,63 @@ +/* + * + * Copyright (C) 2018-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +/* + * ravl.h -- internal definitions for ravl tree + */ + +#ifndef UMF_RAVL_H +#define UMF_RAVL_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct ravl; +struct ravl_node; + +enum ravl_predicate { + RAVL_PREDICATE_EQUAL = 1 << 0, + RAVL_PREDICATE_GREATER = 1 << 1, + RAVL_PREDICATE_LESS = 1 << 2, + RAVL_PREDICATE_LESS_EQUAL = RAVL_PREDICATE_EQUAL | RAVL_PREDICATE_LESS, + RAVL_PREDICATE_GREATER_EQUAL = + RAVL_PREDICATE_EQUAL | RAVL_PREDICATE_GREATER, +}; + +typedef int ravl_compare(const void *lhs, const void *rhs); +typedef void ravl_cb(void *data, void *arg); +typedef void ravl_constr(void *data, size_t data_size, const void *arg); + +struct ravl *ravl_new(ravl_compare *compare); +struct ravl *ravl_new_sized(ravl_compare *compare, size_t data_size); +void ravl_delete(struct ravl *ravl); +void ravl_delete_cb(struct ravl *ravl, ravl_cb cb, void *arg); +void ravl_foreach(struct ravl *ravl, ravl_cb cb, void *arg); +int ravl_empty(struct ravl *ravl); +void ravl_clear(struct ravl *ravl); +int ravl_insert(struct ravl *ravl, const void *data); +int ravl_emplace(struct ravl *ravl, ravl_constr constr, const void *arg); +int ravl_emplace_copy(struct ravl *ravl, const void *data); + +struct ravl_node *ravl_find(struct ravl *ravl, const void *data, + enum ravl_predicate predicate_flags); +struct ravl_node *ravl_first(struct ravl *ravl); +struct ravl_node *ravl_last(struct ravl *ravl); +void *ravl_data(struct ravl_node *node); +void ravl_remove(struct ravl *ravl, struct ravl_node *node); +struct ravl_node *ravl_node_successor(struct ravl_node *n); +struct ravl_node *ravl_node_predecessor(struct ravl_node *n); + +#ifdef __cplusplus +} +#endif + +#endif /* UMF_RAVL_H */ diff --git a/src/topology.c b/src/topology.c index 79caffdb6..eab7992ce 100644 --- a/src/topology.c +++ b/src/topology.c @@ -41,6 +41,6 @@ static void umfCreateTopology(void) { } hwloc_topology_t umfGetTopology(void) { - util_init_once(&topology_initialized, umfCreateTopology); + utils_init_once(&topology_initialized, umfCreateTopology); return topology; } diff --git a/src/uthash/uthash.h b/src/uthash/uthash.h new file mode 100644 index 000000000..6058e638e --- /dev/null +++ b/src/uthash/uthash.h @@ -0,0 +1,1261 @@ +/* +Copyright (C) 2024 Intel Corporation +Copyright (c) 2003-2022, Troy D. Hanson https://troydhanson.github.io/uthash/ +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* +Modifications by Intel: +- define uthash_malloc unconditional as a umf_ba_global_alloc +- define uthash_free unconditional as a umf_ba_global_free +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#define UTHASH_VERSION 2.3.0 + +#include /* ptrdiff_t */ +#include /* exit */ +#include /* memcmp, memset, strlen */ + +#include "base_alloc_global.h" + +#if defined(HASH_DEFINE_OWN_STDINT) && HASH_DEFINE_OWN_STDINT +/* This codepath is provided for backward compatibility, but I plan to remove it. */ +#warning \ + "HASH_DEFINE_OWN_STDINT is deprecated; please use HASH_NO_STDINT instead" +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#elif defined(HASH_NO_STDINT) && HASH_NO_STDINT +#else +#include /* uint8_t, uint32_t */ +#endif + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#if !defined(DECLTYPE) && !defined(NO_DECLTYPE) +#if defined(_MSC_VER) /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#endif +#elif defined(__MCST__) /* Elbrus C Compiler */ +#define DECLTYPE(x) (__typeof(x)) +#elif defined(__BORLANDC__) || defined(__ICCARM__) || defined(__LCC__) || \ + defined(__WATCOMC__) +#define NO_DECLTYPE +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE(x) +#define DECLTYPE_ASSIGN(dst, src) \ + do { \ + char **_da_dst = (char **)(&(dst)); \ + *_da_dst = (char *)(src); \ + } while (0) +#else +#define DECLTYPE_ASSIGN(dst, src) \ + do { \ + (dst) = DECLTYPE(dst)(src); \ + } while (0) +#endif + +#define uthash_malloc(sz) umf_ba_global_alloc(sz) /* malloc fcn */ +#define uthash_free(ptr, sz) umf_ba_global_free(ptr) /* free fcn */ + +#ifndef uthash_bzero +#define uthash_bzero(a, n) memset(a, '\0', n) +#endif +#ifndef uthash_strlen +#define uthash_strlen(s) strlen(s) +#endif + +#ifndef HASH_FUNCTION +#define HASH_FUNCTION(keyptr, keylen, hashv) HASH_JEN(keyptr, keylen, hashv) +#endif + +#ifndef HASH_KEYCMP +#define HASH_KEYCMP(a, b, n) memcmp(a, b, n) +#endif + +#ifndef uthash_noexpand_fyi +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#endif +#ifndef uthash_expand_fyi +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ +#endif + +#ifndef HASH_NONFATAL_OOM +#define HASH_NONFATAL_OOM 0 +#endif + +#if HASH_NONFATAL_OOM +/* malloc failures can be recovered from */ + +#ifndef uthash_nonfatal_oom +#define uthash_nonfatal_oom(obj) \ + do { \ + } while (0) /* non-fatal OOM error */ +#endif + +#define HASH_RECORD_OOM(oomed) \ + do { \ + (oomed) = 1; \ + } while (0) +#define IF_HASH_NONFATAL_OOM(x) x + +#else +/* malloc failures result in lost memory, hash tables are unusable */ + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal OOM error */ +#endif + +#define HASH_RECORD_OOM(oomed) uthash_fatal("out of memory") +#define IF_HASH_NONFATAL_OOM(x) + +#endif + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhp */ +#define ELMT_FROM_HH(tbl, hhp) ((void *)(((char *)(hhp)) - ((tbl)->hho))) +/* calculate the hash handle from element address elp */ +#define HH_FROM_ELMT(tbl, elp) \ + ((UT_hash_handle *)(void *)(((char *)(elp)) + ((tbl)->hho))) + +#define HASH_ROLLBACK_BKT(hh, head, itemptrhh) \ + do { \ + struct UT_hash_handle *_hd_hh_item = (itemptrhh); \ + unsigned _hd_bkt; \ + HASH_TO_BKT(_hd_hh_item->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + (head)->hh.tbl->buckets[_hd_bkt].count++; \ + _hd_hh_item->hh_next = NULL; \ + _hd_hh_item->hh_prev = NULL; \ + } while (0) + +#define HASH_VALUE(keyptr, keylen, hashv) \ + do { \ + HASH_FUNCTION(keyptr, keylen, hashv); \ + } while (0) + +#define HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, hashval, out) \ + do { \ + (out) = NULL; \ + if (head) { \ + unsigned _hf_bkt; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, hashval)) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, \ + (head)->hh.tbl->buckets[_hf_bkt], keyptr, \ + keylen, hashval, out); \ + } \ + } \ + } while (0) + +#define HASH_FIND(hh, head, keyptr, keylen, out) \ + do { \ + (out) = NULL; \ + if (head) { \ + unsigned _hf_hashv; \ + HASH_VALUE(keyptr, keylen, _hf_hashv); \ + HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ + } \ + } while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN \ + (HASH_BLOOM_BITLEN / 8UL) + (((HASH_BLOOM_BITLEN % 8UL) != 0UL) ? 1UL : 0UL) +#define HASH_BLOOM_MAKE(tbl, oomed) \ + do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t *)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!(tbl)->bloom_bv) { \ + HASH_RECORD_OOM(oomed); \ + } else { \ + uthash_bzero((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ + } \ + } while (0) + +#define HASH_BLOOM_FREE(tbl) \ + do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ + } while (0) + +#define HASH_BLOOM_BITSET(bv, idx) (bv[(idx) / 8U] |= (1U << ((idx) % 8U))) +#define HASH_BLOOM_BITTEST(bv, idx) \ + ((bv[(idx) / 8U] & (1U << ((idx) % 8U))) != 0) + +#define HASH_BLOOM_ADD(tbl, hashv) \ + HASH_BLOOM_BITSET( \ + (tbl)->bloom_bv, \ + ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) + +#define HASH_BLOOM_TEST(tbl, hashv) \ + HASH_BLOOM_BITTEST( \ + (tbl)->bloom_bv, \ + ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) + +#else +#define HASH_BLOOM_MAKE(tbl, oomed) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl, hashv) +#define HASH_BLOOM_TEST(tbl, hashv) 1 +#define HASH_BLOOM_BYTELEN 0U +#endif + +#define HASH_MAKE_TABLE(hh, head, oomed) \ + do { \ + (head)->hh.tbl = \ + (UT_hash_table *)uthash_malloc(sizeof(UT_hash_table)); \ + if (!(head)->hh.tbl) { \ + HASH_RECORD_OOM(oomed); \ + } else { \ + uthash_bzero((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char *)(&(head)->hh) - (char *)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket *)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ + if (!(head)->hh.tbl->buckets) { \ + HASH_RECORD_OOM(oomed); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + } else { \ + uthash_bzero((head)->hh.tbl->buckets, \ + HASH_INITIAL_NUM_BUCKETS * \ + sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl, oomed); \ + IF_HASH_NONFATAL_OOM(if (oomed) { \ + uthash_free((head)->hh.tbl->buckets, \ + HASH_INITIAL_NUM_BUCKETS * \ + sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + }) \ + } \ + } \ + } while (0) + +#define HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, \ + hashval, add, replaced, cmpfcn) \ + do { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, \ + hashval, replaced); \ + if (replaced) { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), \ + keylen_in, hashval, add, cmpfcn); \ + } while (0) + +#define HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, hashval, add, \ + replaced) \ + do { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, \ + hashval, replaced); \ + if (replaced) { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, \ + hashval, add); \ + } while (0) + +#define HASH_REPLACE(hh, head, fieldname, keylen_in, add, replaced) \ + do { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, \ + add, replaced); \ + } while (0) + +#define HASH_REPLACE_INORDER(hh, head, fieldname, keylen_in, add, replaced, \ + cmpfcn) \ + do { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, \ + _hr_hashv, add, replaced, cmpfcn); \ + } while (0) + +#define HASH_APPEND_LIST(hh, head, add) \ + do { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail->next = (add); \ + (head)->hh.tbl->tail = &((add)->hh); \ + } while (0) + +#define HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn) \ + do { \ + do { \ + if (cmpfcn(DECLTYPE(head)(_hs_iter), add) > 0) { \ + break; \ + } \ + } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ + } while (0) + +#ifdef NO_DECLTYPE +#undef HASH_AKBI_INNER_LOOP +#define HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn) \ + do { \ + char *_hs_saved_head = (char *)(head); \ + do { \ + DECLTYPE_ASSIGN(head, _hs_iter); \ + if (cmpfcn(head, add) > 0) { \ + DECLTYPE_ASSIGN(head, _hs_saved_head); \ + break; \ + } \ + DECLTYPE_ASSIGN(head, _hs_saved_head); \ + } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ + } while (0) +#endif + +#if HASH_NONFATAL_OOM + +#define HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, oomed) \ + do { \ + if (!(oomed)) { \ + unsigned _ha_bkt; \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, \ + oomed); \ + if (oomed) { \ + HASH_ROLLBACK_BKT(hh, head, &(add)->hh); \ + HASH_DELETE_HH(hh, head, &(add)->hh); \ + (add)->hh.tbl = NULL; \ + uthash_nonfatal_oom(add); \ + } else { \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + } \ + } else { \ + (add)->hh.tbl = NULL; \ + uthash_nonfatal_oom(add); \ + } \ + } while (0) + +#else + +#define HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, oomed) \ + do { \ + unsigned _ha_bkt; \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, \ + oomed); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + } while (0) + +#endif + +#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, \ + hashval, add, cmpfcn) \ + do { \ + IF_HASH_NONFATAL_OOM(int _ha_oomed = 0;) \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (char *)(keyptr); \ + (add)->hh.keylen = (unsigned)(keylen_in); \ + if (!(head)) { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh, add, _ha_oomed); \ + IF_HASH_NONFATAL_OOM(if (!_ha_oomed) { ) \ + (head) = (add); \ + IF_HASH_NONFATAL_OOM( \ + }) \ + } else { \ + void *_hs_iter = (head); \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn); \ + if (_hs_iter) { \ + (add)->hh.next = _hs_iter; \ + if (((add)->hh.prev = \ + HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev)) { \ + HH_FROM_ELMT((head)->hh.tbl, (add)->hh.prev)->next = \ + (add); \ + } else { \ + (head) = (add); \ + } \ + HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev = (add); \ + } else { \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + } \ + HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, \ + _ha_oomed); \ + HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE_INORDER"); \ + } while (0) + +#define HASH_ADD_KEYPTR_INORDER(hh, head, keyptr, keylen_in, add, cmpfcn) \ + do { \ + unsigned _hs_hashv; \ + HASH_VALUE(keyptr, keylen_in, _hs_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, \ + _hs_hashv, add, cmpfcn); \ + } while (0) + +#define HASH_ADD_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, hashval, \ + add, cmpfcn) \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), \ + keylen_in, hashval, add, cmpfcn) + +#define HASH_ADD_INORDER(hh, head, fieldname, keylen_in, add, cmpfcn) \ + HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, \ + cmpfcn) + +#define HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, hashval, add) \ + do { \ + IF_HASH_NONFATAL_OOM(int _ha_oomed = 0;) \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (const void *)(keyptr); \ + (add)->hh.keylen = (unsigned)(keylen_in); \ + if (!(head)) { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh, add, _ha_oomed); \ + IF_HASH_NONFATAL_OOM(if (!_ha_oomed) { ) \ + (head) = (add); \ + IF_HASH_NONFATAL_OOM( \ + }) \ + } else { \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, \ + _ha_oomed); \ + HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE"); \ + } while (0) + +#define HASH_ADD_KEYPTR(hh, head, keyptr, keylen_in, add) \ + do { \ + unsigned _ha_hashv; \ + HASH_VALUE(keyptr, keylen_in, _ha_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, \ + add); \ + } while (0) + +#define HASH_ADD_BYHASHVALUE(hh, head, fieldname, keylen_in, hashval, add) \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, \ + hashval, add) + +#define HASH_ADD(hh, head, fieldname, keylen_in, add) \ + HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) + +#define HASH_TO_BKT(hashv, num_bkts, bkt) \ + do { \ + bkt = ((hashv) & ((num_bkts)-1U)); \ + } while (0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh, head, delptr) HASH_DELETE_HH(hh, head, &(delptr)->hh) + +#define HASH_DELETE_HH(hh, head, delptrhh) \ + do { \ + const struct UT_hash_handle *_hd_hh_del = (delptrhh); \ + if ((_hd_hh_del->prev == NULL) && (_hd_hh_del->next == NULL)) { \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets * \ + sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head) = NULL; \ + } else { \ + unsigned _hd_bkt; \ + if (_hd_hh_del == (head)->hh.tbl->tail) { \ + (head)->hh.tbl->tail = \ + HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev); \ + } \ + if (_hd_hh_del->prev != NULL) { \ + HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev)->next = \ + _hd_hh_del->next; \ + } else { \ + DECLTYPE_ASSIGN(head, _hd_hh_del->next); \ + } \ + if (_hd_hh_del->next != NULL) { \ + HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->next)->prev = \ + _hd_hh_del->prev; \ + } \ + HASH_TO_BKT(_hd_hh_del->hashv, (head)->hh.tbl->num_buckets, \ + _hd_bkt); \ + HASH_DEL_IN_BKT((head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh, head, "HASH_DELETE_HH"); \ + } while (0) + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head, findstr, out) \ + do { \ + unsigned _uthash_hfstr_keylen = (unsigned)uthash_strlen(findstr); \ + HASH_FIND(hh, head, findstr, _uthash_hfstr_keylen, out); \ + } while (0) +#define HASH_ADD_STR(head, strfield, add) \ + do { \ + unsigned _uthash_hastr_keylen = \ + (unsigned)uthash_strlen((add)->strfield); \ + HASH_ADD(hh, head, strfield[0], _uthash_hastr_keylen, add); \ + } while (0) +#define HASH_REPLACE_STR(head, strfield, add, replaced) \ + do { \ + unsigned _uthash_hrstr_keylen = \ + (unsigned)uthash_strlen((add)->strfield); \ + HASH_REPLACE(hh, head, strfield[0], _uthash_hrstr_keylen, add, \ + replaced); \ + } while (0) +#define HASH_FIND_INT(head, findint, out) \ + HASH_FIND(hh, head, findint, sizeof(int), out) +#define HASH_ADD_INT(head, intfield, add) \ + HASH_ADD(hh, head, intfield, sizeof(int), add) +#define HASH_REPLACE_INT(head, intfield, add, replaced) \ + HASH_REPLACE(hh, head, intfield, sizeof(int), add, replaced) +#define HASH_FIND_PTR(head, findptr, out) \ + HASH_FIND(hh, head, findptr, sizeof(void *), out) +#define HASH_ADD_PTR(head, ptrfield, add) \ + HASH_ADD(hh, head, ptrfield, sizeof(void *), add) +#define HASH_REPLACE_PTR(head, ptrfield, add, replaced) \ + HASH_REPLACE(hh, head, ptrfield, sizeof(void *), add, replaced) +#define HASH_DEL(head, delptr) HASH_DELETE(hh, head, delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#include /* fprintf, stderr */ +#define HASH_OOPS(...) \ + do { \ + fprintf(stderr, __VA_ARGS__); \ + exit(-1); \ + } while (0) +#define HASH_FSCK(hh, head, where) \ + do { \ + struct UT_hash_handle *_thh; \ + if (head) { \ + unsigned _bkt_i; \ + unsigned _count = 0; \ + char *_prev; \ + for (_bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; ++_bkt_i) { \ + unsigned _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char *)(_thh->hh_prev)) { \ + HASH_OOPS("%s: invalid hh_prev %p, actual %p\n", \ + (where), (void *)_thh->hh_prev, \ + (void *)_prev); \ + } \ + _bkt_count++; \ + _prev = (char *)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("%s: invalid bucket count %u, actual %u\n", \ + (where), (head)->hh.tbl->buckets[_bkt_i].count, \ + _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("%s: invalid hh item count %u, actual %u\n", \ + (where), (head)->hh.tbl->num_items, _count); \ + } \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev != (char *)_thh->prev) { \ + HASH_OOPS("%s: invalid prev %p, actual %p\n", (where), \ + (void *)_thh->prev, (void *)_prev); \ + } \ + _prev = (char *)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = (_thh->next ? HH_FROM_ELMT((head)->hh.tbl, _thh->next) \ + : NULL); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("%s: invalid app item count %u, actual %u\n", \ + (where), (head)->hh.tbl->num_items, _count); \ + } \ + } \ + } while (0) +#else +#define HASH_FSCK(hh, head, where) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh, head, keyptr, fieldlen) \ + do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ + } while (0) +#else +#define HASH_EMIT_KEY(hh, head, keyptr, fieldlen) +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ +#define HASH_BER(key, keylen, hashv) \ + do { \ + unsigned _hb_keylen = (unsigned)keylen; \ + const unsigned char *_hb_key = (const unsigned char *)(key); \ + (hashv) = 0; \ + while (_hb_keylen-- != 0U) { \ + (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ + } \ + } while (0) + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx + * (archive link: https://archive.is/Ivcan ) + */ +#define HASH_SAX(key, keylen, hashv) \ + do { \ + unsigned _sx_i; \ + const unsigned char *_hs_key = (const unsigned char *)(key); \ + hashv = 0; \ + for (_sx_i = 0; _sx_i < keylen; _sx_i++) { \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + } \ + } while (0) +/* FNV-1a variation */ +#define HASH_FNV(key, keylen, hashv) \ + do { \ + unsigned _fn_i; \ + const unsigned char *_hf_key = (const unsigned char *)(key); \ + (hashv) = 2166136261U; \ + for (_fn_i = 0; _fn_i < keylen; _fn_i++) { \ + hashv = hashv ^ _hf_key[_fn_i]; \ + hashv = hashv * 16777619U; \ + } \ + } while (0) + +#define HASH_OAT(key, keylen, hashv) \ + do { \ + unsigned _ho_i; \ + const unsigned char *_ho_key = (const unsigned char *)(key); \ + hashv = 0; \ + for (_ho_i = 0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ + } while (0) + +#define HASH_JEN_MIX(a, b, c) \ + do { \ + a -= b; \ + a -= c; \ + a ^= (c >> 13); \ + b -= c; \ + b -= a; \ + b ^= (a << 8); \ + c -= a; \ + c -= b; \ + c ^= (b >> 13); \ + a -= b; \ + a -= c; \ + a ^= (c >> 12); \ + b -= c; \ + b -= a; \ + b ^= (a << 16); \ + c -= a; \ + c -= b; \ + c ^= (b >> 5); \ + a -= b; \ + a -= c; \ + a ^= (c >> 3); \ + b -= c; \ + b -= a; \ + b ^= (a << 10); \ + c -= a; \ + c -= b; \ + c ^= (b >> 15); \ + } while (0) + +#define HASH_JEN(key, keylen, hashv) \ + do { \ + unsigned _hj_i, _hj_j, _hj_k; \ + unsigned const char *_hj_key = (unsigned const char *)(key); \ + hashv = 0xfeedbeefu; \ + _hj_i = _hj_j = 0x9e3779b9u; \ + _hj_k = (unsigned)(keylen); \ + while (_hj_k >= 12U) { \ + _hj_i += \ + (_hj_key[0] + ((unsigned)_hj_key[1] << 8) + \ + ((unsigned)_hj_key[2] << 16) + ((unsigned)_hj_key[3] << 24)); \ + _hj_j += \ + (_hj_key[4] + ((unsigned)_hj_key[5] << 8) + \ + ((unsigned)_hj_key[6] << 16) + ((unsigned)_hj_key[7] << 24)); \ + hashv += (_hj_key[8] + ((unsigned)_hj_key[9] << 8) + \ + ((unsigned)_hj_key[10] << 16) + \ + ((unsigned)_hj_key[11] << 24)); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12U; \ + } \ + hashv += (unsigned)(keylen); \ + switch (_hj_k) { \ + case 11: \ + hashv += ((unsigned)_hj_key[10] << 24); /* FALLTHROUGH */ \ + case 10: \ + hashv += ((unsigned)_hj_key[9] << 16); /* FALLTHROUGH */ \ + case 9: \ + hashv += ((unsigned)_hj_key[8] << 8); /* FALLTHROUGH */ \ + case 8: \ + _hj_j += ((unsigned)_hj_key[7] << 24); /* FALLTHROUGH */ \ + case 7: \ + _hj_j += ((unsigned)_hj_key[6] << 16); /* FALLTHROUGH */ \ + case 6: \ + _hj_j += ((unsigned)_hj_key[5] << 8); /* FALLTHROUGH */ \ + case 5: \ + _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ + case 4: \ + _hj_i += ((unsigned)_hj_key[3] << 24); /* FALLTHROUGH */ \ + case 3: \ + _hj_i += ((unsigned)_hj_key[2] << 16); /* FALLTHROUGH */ \ + case 2: \ + _hj_i += ((unsigned)_hj_key[1] << 8); /* FALLTHROUGH */ \ + case 1: \ + _hj_i += _hj_key[0]; /* FALLTHROUGH */ \ + default:; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + } while (0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) || \ + defined(_MSC_VER) || defined(__BORLANDC__) || defined(__TURBOC__) +#define get16bits(d) (*((const uint16_t *)(d))) +#endif + +#if !defined(get16bits) +#define get16bits(d) \ + ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) + \ + (uint32_t)(((const uint8_t *)(d))[0])) +#endif +#define HASH_SFH(key, keylen, hashv) \ + do { \ + unsigned const char *_sfh_key = (unsigned const char *)(key); \ + uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ + \ + unsigned _sfh_rem = _sfh_len & 3U; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabeu; \ + \ + /* Main loop */ \ + for (; _sfh_len > 0U; _sfh_len--) { \ + hashv += get16bits(_sfh_key); \ + _sfh_tmp = ((uint32_t)(get16bits(_sfh_key + 2)) << 11) ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2U * sizeof(uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: \ + hashv += get16bits(_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= (uint32_t)(_sfh_key[sizeof(uint16_t)]) << 18; \ + hashv += hashv >> 11; \ + break; \ + case 2: \ + hashv += get16bits(_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: \ + hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + break; \ + default:; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ + } while (0) + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl, hh, head, keyptr, keylen_in, hashval, out) \ + do { \ + if ((head).hh_head != NULL) { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head)); \ + } else { \ + (out) = NULL; \ + } \ + while ((out) != NULL) { \ + if ((out)->hh.hashv == (hashval) && \ + (out)->hh.keylen == (keylen_in)) { \ + if (HASH_KEYCMP((out)->hh.key, keyptr, keylen_in) == 0) { \ + break; \ + } \ + } \ + if ((out)->hh.hh_next != NULL) { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ + } else { \ + (out) = NULL; \ + } \ + } \ + } while (0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head, hh, addhh, oomed) \ + do { \ + UT_hash_bucket *_ha_head = &(head); \ + _ha_head->count++; \ + (addhh)->hh_next = _ha_head->hh_head; \ + (addhh)->hh_prev = NULL; \ + if (_ha_head->hh_head != NULL) { \ + _ha_head->hh_head->hh_prev = (addhh); \ + } \ + _ha_head->hh_head = (addhh); \ + if ((_ha_head->count >= \ + ((_ha_head->expand_mult + 1U) * HASH_BKT_CAPACITY_THRESH)) && \ + !(addhh)->tbl->noexpand) { \ + HASH_EXPAND_BUCKETS(addhh, (addhh)->tbl, oomed); \ + IF_HASH_NONFATAL_OOM(if (oomed) { HASH_DEL_IN_BKT(head, addhh); }) \ + } \ + } while (0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(head, delhh) \ + do { \ + UT_hash_bucket *_hd_head = &(head); \ + _hd_head->count--; \ + if (_hd_head->hh_head == (delhh)) { \ + _hd_head->hh_head = (delhh)->hh_next; \ + } \ + if ((delhh)->hh_prev) { \ + (delhh)->hh_prev->hh_next = (delhh)->hh_next; \ + } \ + if ((delhh)->hh_next) { \ + (delhh)->hh_next->hh_prev = (delhh)->hh_prev; \ + } \ + } while (0) + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(hh, tbl, oomed) \ + do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket *)uthash_malloc( \ + sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U); \ + if (!_he_new_buckets) { \ + HASH_RECORD_OOM(oomed); \ + } else { \ + uthash_bzero(_he_new_buckets, sizeof(struct UT_hash_bucket) * \ + (tbl)->num_buckets * 2U); \ + (tbl)->ideal_chain_maxlen = \ + ((tbl)->num_items >> ((tbl)->log2_num_buckets + 1U)) + \ + ((((tbl)->num_items & (((tbl)->num_buckets * 2U) - 1U)) != 0U) \ + ? 1U \ + : 0U); \ + (tbl)->nonideal_items = 0; \ + for (_he_bkt_i = 0; _he_bkt_i < (tbl)->num_buckets; _he_bkt_i++) { \ + _he_thh = (tbl)->buckets[_he_bkt_i].hh_head; \ + while (_he_thh != NULL) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT(_he_thh->hashv, (tbl)->num_buckets * 2U, \ + _he_bkt); \ + _he_newbkt = &(_he_new_buckets[_he_bkt]); \ + if (++(_he_newbkt->count) > (tbl)->ideal_chain_maxlen) { \ + (tbl)->nonideal_items++; \ + if (_he_newbkt->count > \ + _he_newbkt->expand_mult * \ + (tbl)->ideal_chain_maxlen) { \ + _he_newbkt->expand_mult++; \ + } \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head != NULL) { \ + _he_newbkt->hh_head->hh_prev = _he_thh; \ + } \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free((tbl)->buckets, \ + (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ + (tbl)->num_buckets *= 2U; \ + (tbl)->log2_num_buckets++; \ + (tbl)->buckets = _he_new_buckets; \ + (tbl)->ineff_expands = \ + ((tbl)->nonideal_items > ((tbl)->num_items >> 1)) \ + ? ((tbl)->ineff_expands + 1U) \ + : 0U; \ + if ((tbl)->ineff_expands > 1U) { \ + (tbl)->noexpand = 1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ + } \ + } while (0) + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head, cmpfcn) HASH_SRT(hh, head, cmpfcn) +#define HASH_SRT(hh, head, cmpfcn) \ + do { \ + unsigned _hs_i; \ + unsigned _hs_looping, _hs_nmerges, _hs_insize, _hs_psize, _hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head != NULL) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping != 0U) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p != NULL) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for (_hs_i = 0; _hs_i < _hs_insize; ++_hs_i) { \ + _hs_psize++; \ + _hs_q = \ + ((_hs_q->next != NULL) \ + ? HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) \ + : NULL); \ + if (_hs_q == NULL) { \ + break; \ + } \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize != 0U) || \ + ((_hs_qsize != 0U) && (_hs_q != NULL))) { \ + if (_hs_psize == 0U) { \ + _hs_e = _hs_q; \ + _hs_q = ((_hs_q->next != NULL) \ + ? HH_FROM_ELMT((head)->hh.tbl, \ + _hs_q->next) \ + : NULL); \ + _hs_qsize--; \ + } else if ((_hs_qsize == 0U) || (_hs_q == NULL)) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL) { \ + _hs_p = ((_hs_p->next != NULL) \ + ? HH_FROM_ELMT((head)->hh.tbl, \ + _hs_p->next) \ + : NULL); \ + } \ + _hs_psize--; \ + } else if ((cmpfcn(DECLTYPE(head)(ELMT_FROM_HH( \ + (head)->hh.tbl, _hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH( \ + (head)->hh.tbl, _hs_q)))) <= \ + 0) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL) { \ + _hs_p = ((_hs_p->next != NULL) \ + ? HH_FROM_ELMT((head)->hh.tbl, \ + _hs_p->next) \ + : NULL); \ + } \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = ((_hs_q->next != NULL) \ + ? HH_FROM_ELMT((head)->hh.tbl, \ + _hs_q->next) \ + : NULL); \ + _hs_qsize--; \ + } \ + if (_hs_tail != NULL) { \ + _hs_tail->next = \ + ((_hs_e != NULL) \ + ? ELMT_FROM_HH((head)->hh.tbl, _hs_e) \ + : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + if (_hs_e != NULL) { \ + _hs_e->prev = \ + ((_hs_tail != NULL) \ + ? ELMT_FROM_HH((head)->hh.tbl, _hs_tail) \ + : NULL); \ + } \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + if (_hs_tail != NULL) { \ + _hs_tail->next = NULL; \ + } \ + if (_hs_nmerges <= 1U) { \ + _hs_looping = 0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head, \ + ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2U; \ + } \ + HASH_FSCK(hh, head, "HASH_SRT"); \ + } \ + } while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ + do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt = NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh = NULL; \ + ptrdiff_t _dst_hho = ((char *)(&(dst)->hh_dst) - (char *)(dst)); \ + if ((src) != NULL) { \ + for (_src_bkt = 0; _src_bkt < (src)->hh_src.tbl->num_buckets; \ + _src_bkt++) { \ + for (_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh != NULL; _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + IF_HASH_NONFATAL_OOM(int _hs_oomed = 0;) \ + _dst_hh = (UT_hash_handle *)(void *)(((char *)_elt) + \ + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh != NULL) { \ + _last_elt_hh->next = _elt; \ + } \ + if ((dst) == NULL) { \ + DECLTYPE_ASSIGN(dst, _elt); \ + HASH_MAKE_TABLE(hh_dst, dst, _hs_oomed); \ + IF_HASH_NONFATAL_OOM(if (_hs_oomed) { \ + uthash_nonfatal_oom(_elt); \ + (dst) = NULL; \ + continue; \ + }) \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, \ + _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt], \ + hh_dst, _dst_hh, _hs_oomed); \ + (dst)->hh_dst.tbl->num_items++; \ + IF_HASH_NONFATAL_OOM(if (_hs_oomed) { \ + HASH_ROLLBACK_BKT(hh_dst, dst, _dst_hh); \ + HASH_DELETE_HH(hh_dst, dst, _dst_hh); \ + _dst_hh->tbl = NULL; \ + uthash_nonfatal_oom(_elt); \ + continue; \ + }) \ + HASH_BLOOM_ADD(_dst_hh->tbl, _dst_hh->hashv); \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst, dst, "HASH_SELECT"); \ + } while (0) + +#define HASH_CLEAR(hh, head) \ + do { \ + if ((head) != NULL) { \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets * \ + sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head) = NULL; \ + } \ + } while (0) + +#define HASH_OVERHEAD(hh, head) \ + (((head) != NULL) \ + ? ((size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ + ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ + sizeof(UT_hash_table) + (HASH_BLOOM_BYTELEN))) \ + : 0U) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh, head, el, tmp) \ + for (((el) = (head)), \ + ((*(char **)(&(tmp))) = \ + (char *)((head != NULL) ? (head)->hh.next : NULL)); \ + (el) != NULL; ((el) = (tmp)), \ + ((*(char **)(&(tmp))) = \ + (char *)((tmp != NULL) ? (tmp)->hh.next : NULL))) +#else +#define HASH_ITER(hh, head, el, tmp) \ + for (((el) = (head)), \ + ((tmp) = DECLTYPE(el)((head != NULL) ? (head)->hh.next : NULL)); \ + (el) != NULL; \ + ((el) = (tmp)), \ + ((tmp) = DECLTYPE(el)((tmp != NULL) ? (tmp)->hh.next : NULL))) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh, head) +#define HASH_CNT(hh, head) ((head != NULL) ? ((head)->hh.tbl->num_items) : 0U) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1u +#define HASH_BLOOM_SIGNATURE 0xb12220f2u + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t + bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + uint8_t bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + const void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ diff --git a/src/uthash/utlist.h b/src/uthash/utlist.h new file mode 100644 index 000000000..ed4660b49 --- /dev/null +++ b/src/uthash/utlist.h @@ -0,0 +1,1148 @@ +/* +Copyright (c) 2007-2022, Troy D. Hanson https://troydhanson.github.io/uthash/ +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTLIST_H +#define UTLIST_H + +#define UTLIST_VERSION 2.3.0 + +#include + +/* + * This file contains macros to manipulate singly and doubly-linked lists. + * + * 1. LL_ macros: singly-linked lists. + * 2. DL_ macros: doubly-linked lists. + * 3. CDL_ macros: circular doubly-linked lists. + * + * To use singly-linked lists, your structure must have a "next" pointer. + * To use doubly-linked lists, your structure must "prev" and "next" pointers. + * Either way, the pointer to the head of the list must be initialized to NULL. + * + * ----------------.EXAMPLE ------------------------- + * struct item { + * int id; + * struct item *prev, *next; + * } + * + * struct item *list = NULL: + * + * int main() { + * struct item *item; + * ... allocate and populate item ... + * DL_APPEND(list, item); + * } + * -------------------------------------------------- + * + * For doubly-linked lists, the append and delete macros are O(1) + * For singly-linked lists, append and delete are O(n) but prepend is O(1) + * The sort macro is O(n log(n)) for all types of single/double/circular lists. + */ + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#if !defined(LDECLTYPE) && !defined(NO_DECLTYPE) +#if defined(_MSC_VER) /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define LDECLTYPE(x) decltype(x) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#endif +#elif defined(__MCST__) /* Elbrus C Compiler */ +#define LDECLTYPE(x) __typeof(x) +#elif defined(__BORLANDC__) || defined(__ICCARM__) || defined(__LCC__) || \ + defined(__WATCOMC__) +#define NO_DECLTYPE +#else /* GNU, Sun and other compilers */ +#define LDECLTYPE(x) __typeof(x) +#endif +#endif + +/* for VS2008 we use some workarounds to get around the lack of decltype, + * namely, we always reassign our tmp variable to the list head if we need + * to dereference its prev/next pointers, and save/restore the real head.*/ +#ifdef NO_DECLTYPE +#define IF_NO_DECLTYPE(x) x +#define LDECLTYPE(x) char * +#define UTLIST_SV(elt, list) \ + _tmp = (char *)(list); \ + { \ + char **_alias = (char **)&(list); \ + *_alias = (elt); \ + } +#define UTLIST_NEXT(elt, list, next) ((char *)((list)->next)) +#define UTLIST_NEXTASGN(elt, list, to, next) \ + { \ + char **_alias = (char **)&((list)->next); \ + *_alias = (char *)(to); \ + } +/* #define UTLIST_PREV(elt,list,prev) ((char*)((list)->prev)) */ +#define UTLIST_PREVASGN(elt, list, to, prev) \ + { \ + char **_alias = (char **)&((list)->prev); \ + *_alias = (char *)(to); \ + } +#define UTLIST_RS(list) \ + { \ + char **_alias = (char **)&(list); \ + *_alias = _tmp; \ + } +#define UTLIST_CASTASGN(a, b) \ + { \ + char **_alias = (char **)&(a); \ + *_alias = (char *)(b); \ + } +#else +#define IF_NO_DECLTYPE(x) +#define UTLIST_SV(elt, list) +#define UTLIST_NEXT(elt, list, next) ((elt)->next) +#define UTLIST_NEXTASGN(elt, list, to, next) ((elt)->next) = (to) +/* #define UTLIST_PREV(elt,list,prev) ((elt)->prev) */ +#define UTLIST_PREVASGN(elt, list, to, prev) ((elt)->prev) = (to) +#define UTLIST_RS(list) +#define UTLIST_CASTASGN(a, b) (a) = (b) +#endif + +/****************************************************************************** + * The sort macro is an adaptation of Simon Tatham's O(n log(n)) mergesort * + * Unwieldy variable names used here to avoid shadowing passed-in variables. * + *****************************************************************************/ +#define LL_SORT(list, cmp) LL_SORT2(list, cmp, next) + +#define LL_SORT2(list, cmp, next) \ + do { \ + LDECLTYPE(list) _ls_p; \ + LDECLTYPE(list) _ls_q; \ + LDECLTYPE(list) _ls_e; \ + LDECLTYPE(list) _ls_tail; \ + IF_NO_DECLTYPE(LDECLTYPE(list) _tmp;) \ + int _ls_insize, _ls_nmerges, _ls_psize, _ls_qsize, _ls_i, _ls_looping; \ + if (list) { \ + _ls_insize = 1; \ + _ls_looping = 1; \ + while (_ls_looping) { \ + UTLIST_CASTASGN(_ls_p, list); \ + (list) = NULL; \ + _ls_tail = NULL; \ + _ls_nmerges = 0; \ + while (_ls_p) { \ + _ls_nmerges++; \ + _ls_q = _ls_p; \ + _ls_psize = 0; \ + for (_ls_i = 0; _ls_i < _ls_insize; _ls_i++) { \ + _ls_psize++; \ + UTLIST_SV(_ls_q, list); \ + _ls_q = UTLIST_NEXT(_ls_q, list, next); \ + UTLIST_RS(list); \ + if (!_ls_q) \ + break; \ + } \ + _ls_qsize = _ls_insize; \ + while (_ls_psize > 0 || (_ls_qsize > 0 && _ls_q)) { \ + if (_ls_psize == 0) { \ + _ls_e = _ls_q; \ + UTLIST_SV(_ls_q, list); \ + _ls_q = UTLIST_NEXT(_ls_q, list, next); \ + UTLIST_RS(list); \ + _ls_qsize--; \ + } else if (_ls_qsize == 0 || !_ls_q) { \ + _ls_e = _ls_p; \ + UTLIST_SV(_ls_p, list); \ + _ls_p = UTLIST_NEXT(_ls_p, list, next); \ + UTLIST_RS(list); \ + _ls_psize--; \ + } else if (cmp(_ls_p, _ls_q) <= 0) { \ + _ls_e = _ls_p; \ + UTLIST_SV(_ls_p, list); \ + _ls_p = UTLIST_NEXT(_ls_p, list, next); \ + UTLIST_RS(list); \ + _ls_psize--; \ + } else { \ + _ls_e = _ls_q; \ + UTLIST_SV(_ls_q, list); \ + _ls_q = UTLIST_NEXT(_ls_q, list, next); \ + UTLIST_RS(list); \ + _ls_qsize--; \ + } \ + if (_ls_tail) { \ + UTLIST_SV(_ls_tail, list); \ + UTLIST_NEXTASGN(_ls_tail, list, _ls_e, next); \ + UTLIST_RS(list); \ + } else { \ + UTLIST_CASTASGN(list, _ls_e); \ + } \ + _ls_tail = _ls_e; \ + } \ + _ls_p = _ls_q; \ + } \ + if (_ls_tail) { \ + UTLIST_SV(_ls_tail, list); \ + UTLIST_NEXTASGN(_ls_tail, list, NULL, next); \ + UTLIST_RS(list); \ + } \ + if (_ls_nmerges <= 1) { \ + _ls_looping = 0; \ + } \ + _ls_insize *= 2; \ + } \ + } \ + } while (0) + +#define DL_SORT(list, cmp) DL_SORT2(list, cmp, prev, next) + +#define DL_SORT2(list, cmp, prev, next) \ + do { \ + LDECLTYPE(list) _ls_p; \ + LDECLTYPE(list) _ls_q; \ + LDECLTYPE(list) _ls_e; \ + LDECLTYPE(list) _ls_tail; \ + IF_NO_DECLTYPE(LDECLTYPE(list) _tmp;) \ + int _ls_insize, _ls_nmerges, _ls_psize, _ls_qsize, _ls_i, _ls_looping; \ + if (list) { \ + _ls_insize = 1; \ + _ls_looping = 1; \ + while (_ls_looping) { \ + UTLIST_CASTASGN(_ls_p, list); \ + (list) = NULL; \ + _ls_tail = NULL; \ + _ls_nmerges = 0; \ + while (_ls_p) { \ + _ls_nmerges++; \ + _ls_q = _ls_p; \ + _ls_psize = 0; \ + for (_ls_i = 0; _ls_i < _ls_insize; _ls_i++) { \ + _ls_psize++; \ + UTLIST_SV(_ls_q, list); \ + _ls_q = UTLIST_NEXT(_ls_q, list, next); \ + UTLIST_RS(list); \ + if (!_ls_q) \ + break; \ + } \ + _ls_qsize = _ls_insize; \ + while ((_ls_psize > 0) || ((_ls_qsize > 0) && _ls_q)) { \ + if (_ls_psize == 0) { \ + _ls_e = _ls_q; \ + UTLIST_SV(_ls_q, list); \ + _ls_q = UTLIST_NEXT(_ls_q, list, next); \ + UTLIST_RS(list); \ + _ls_qsize--; \ + } else if ((_ls_qsize == 0) || (!_ls_q)) { \ + _ls_e = _ls_p; \ + UTLIST_SV(_ls_p, list); \ + _ls_p = UTLIST_NEXT(_ls_p, list, next); \ + UTLIST_RS(list); \ + _ls_psize--; \ + } else if (cmp(_ls_p, _ls_q) <= 0) { \ + _ls_e = _ls_p; \ + UTLIST_SV(_ls_p, list); \ + _ls_p = UTLIST_NEXT(_ls_p, list, next); \ + UTLIST_RS(list); \ + _ls_psize--; \ + } else { \ + _ls_e = _ls_q; \ + UTLIST_SV(_ls_q, list); \ + _ls_q = UTLIST_NEXT(_ls_q, list, next); \ + UTLIST_RS(list); \ + _ls_qsize--; \ + } \ + if (_ls_tail) { \ + UTLIST_SV(_ls_tail, list); \ + UTLIST_NEXTASGN(_ls_tail, list, _ls_e, next); \ + UTLIST_RS(list); \ + } else { \ + UTLIST_CASTASGN(list, _ls_e); \ + } \ + UTLIST_SV(_ls_e, list); \ + UTLIST_PREVASGN(_ls_e, list, _ls_tail, prev); \ + UTLIST_RS(list); \ + _ls_tail = _ls_e; \ + } \ + _ls_p = _ls_q; \ + } \ + UTLIST_CASTASGN((list)->prev, _ls_tail); \ + UTLIST_SV(_ls_tail, list); \ + UTLIST_NEXTASGN(_ls_tail, list, NULL, next); \ + UTLIST_RS(list); \ + if (_ls_nmerges <= 1) { \ + _ls_looping = 0; \ + } \ + _ls_insize *= 2; \ + } \ + } \ + } while (0) + +#define CDL_SORT(list, cmp) CDL_SORT2(list, cmp, prev, next) + +#define CDL_SORT2(list, cmp, prev, next) \ + do { \ + LDECLTYPE(list) _ls_p; \ + LDECLTYPE(list) _ls_q; \ + LDECLTYPE(list) _ls_e; \ + LDECLTYPE(list) _ls_tail; \ + LDECLTYPE(list) _ls_oldhead; \ + LDECLTYPE(list) _tmp; \ + int _ls_insize, _ls_nmerges, _ls_psize, _ls_qsize, _ls_i, _ls_looping; \ + if (list) { \ + _ls_insize = 1; \ + _ls_looping = 1; \ + while (_ls_looping) { \ + UTLIST_CASTASGN(_ls_p, list); \ + UTLIST_CASTASGN(_ls_oldhead, list); \ + (list) = NULL; \ + _ls_tail = NULL; \ + _ls_nmerges = 0; \ + while (_ls_p) { \ + _ls_nmerges++; \ + _ls_q = _ls_p; \ + _ls_psize = 0; \ + for (_ls_i = 0; _ls_i < _ls_insize; _ls_i++) { \ + _ls_psize++; \ + UTLIST_SV(_ls_q, list); \ + if (UTLIST_NEXT(_ls_q, list, next) == _ls_oldhead) { \ + _ls_q = NULL; \ + } else { \ + _ls_q = UTLIST_NEXT(_ls_q, list, next); \ + } \ + UTLIST_RS(list); \ + if (!_ls_q) \ + break; \ + } \ + _ls_qsize = _ls_insize; \ + while (_ls_psize > 0 || (_ls_qsize > 0 && _ls_q)) { \ + if (_ls_psize == 0) { \ + _ls_e = _ls_q; \ + UTLIST_SV(_ls_q, list); \ + _ls_q = UTLIST_NEXT(_ls_q, list, next); \ + UTLIST_RS(list); \ + _ls_qsize--; \ + if (_ls_q == _ls_oldhead) { \ + _ls_q = NULL; \ + } \ + } else if (_ls_qsize == 0 || !_ls_q) { \ + _ls_e = _ls_p; \ + UTLIST_SV(_ls_p, list); \ + _ls_p = UTLIST_NEXT(_ls_p, list, next); \ + UTLIST_RS(list); \ + _ls_psize--; \ + if (_ls_p == _ls_oldhead) { \ + _ls_p = NULL; \ + } \ + } else if (cmp(_ls_p, _ls_q) <= 0) { \ + _ls_e = _ls_p; \ + UTLIST_SV(_ls_p, list); \ + _ls_p = UTLIST_NEXT(_ls_p, list, next); \ + UTLIST_RS(list); \ + _ls_psize--; \ + if (_ls_p == _ls_oldhead) { \ + _ls_p = NULL; \ + } \ + } else { \ + _ls_e = _ls_q; \ + UTLIST_SV(_ls_q, list); \ + _ls_q = UTLIST_NEXT(_ls_q, list, next); \ + UTLIST_RS(list); \ + _ls_qsize--; \ + if (_ls_q == _ls_oldhead) { \ + _ls_q = NULL; \ + } \ + } \ + if (_ls_tail) { \ + UTLIST_SV(_ls_tail, list); \ + UTLIST_NEXTASGN(_ls_tail, list, _ls_e, next); \ + UTLIST_RS(list); \ + } else { \ + UTLIST_CASTASGN(list, _ls_e); \ + } \ + UTLIST_SV(_ls_e, list); \ + UTLIST_PREVASGN(_ls_e, list, _ls_tail, prev); \ + UTLIST_RS(list); \ + _ls_tail = _ls_e; \ + } \ + _ls_p = _ls_q; \ + } \ + UTLIST_CASTASGN((list)->prev, _ls_tail); \ + UTLIST_CASTASGN(_tmp, list); \ + UTLIST_SV(_ls_tail, list); \ + UTLIST_NEXTASGN(_ls_tail, list, _tmp, next); \ + UTLIST_RS(list); \ + if (_ls_nmerges <= 1) { \ + _ls_looping = 0; \ + } \ + _ls_insize *= 2; \ + } \ + } \ + } while (0) + +/****************************************************************************** + * singly linked list macros (non-circular) * + *****************************************************************************/ +#define LL_PREPEND(head, add) LL_PREPEND2(head, add, next) + +#define LL_PREPEND2(head, add, next) \ + do { \ + (add)->next = (head); \ + (head) = (add); \ + } while (0) + +#define LL_CONCAT(head1, head2) LL_CONCAT2(head1, head2, next) + +#define LL_CONCAT2(head1, head2, next) \ + do { \ + LDECLTYPE(head1) _tmp; \ + if (head1) { \ + _tmp = (head1); \ + while (_tmp->next) { \ + _tmp = _tmp->next; \ + } \ + _tmp->next = (head2); \ + } else { \ + (head1) = (head2); \ + } \ + } while (0) + +#define LL_APPEND(head, add) LL_APPEND2(head, add, next) + +#define LL_APPEND2(head, add, next) \ + do { \ + LDECLTYPE(head) _tmp; \ + (add)->next = NULL; \ + if (head) { \ + _tmp = (head); \ + while (_tmp->next) { \ + _tmp = _tmp->next; \ + } \ + _tmp->next = (add); \ + } else { \ + (head) = (add); \ + } \ + } while (0) + +#define LL_INSERT_INORDER(head, add, cmp) \ + LL_INSERT_INORDER2(head, add, cmp, next) + +#define LL_INSERT_INORDER2(head, add, cmp, next) \ + do { \ + LDECLTYPE(head) _tmp; \ + if (head) { \ + LL_LOWER_BOUND2(head, _tmp, add, cmp, next); \ + LL_APPEND_ELEM2(head, _tmp, add, next); \ + } else { \ + (head) = (add); \ + (head)->next = NULL; \ + } \ + } while (0) + +#define LL_LOWER_BOUND(head, elt, like, cmp) \ + LL_LOWER_BOUND2(head, elt, like, cmp, next) + +#define LL_LOWER_BOUND2(head, elt, like, cmp, next) \ + do { \ + if ((head) == NULL || (cmp(head, like)) >= 0) { \ + (elt) = NULL; \ + } else { \ + for ((elt) = (head); (elt)->next != NULL; (elt) = (elt)->next) { \ + if (cmp((elt)->next, like) >= 0) { \ + break; \ + } \ + } \ + } \ + } while (0) + +#define LL_DELETE(head, del) LL_DELETE2(head, del, next) + +#define LL_DELETE2(head, del, next) \ + do { \ + LDECLTYPE(head) _tmp; \ + if ((head) == (del)) { \ + (head) = (head)->next; \ + } else { \ + _tmp = (head); \ + while (_tmp->next && (_tmp->next != (del))) { \ + _tmp = _tmp->next; \ + } \ + if (_tmp->next) { \ + _tmp->next = (del)->next; \ + } \ + } \ + } while (0) + +#define LL_COUNT(head, el, counter) LL_COUNT2(head, el, counter, next) + +#define LL_COUNT2(head, el, counter, next) \ + do { \ + (counter) = 0; \ + LL_FOREACH2(head, el, next) { ++(counter); } \ + } while (0) + +#define LL_FOREACH(head, el) LL_FOREACH2(head, el, next) + +#define LL_FOREACH2(head, el, next) for ((el) = (head); el; (el) = (el)->next) + +#define LL_FOREACH_SAFE(head, el, tmp) LL_FOREACH_SAFE2(head, el, tmp, next) + +#define LL_FOREACH_SAFE2(head, el, tmp, next) \ + for ((el) = (head); (el) && ((tmp) = (el)->next, 1); (el) = (tmp)) + +#define LL_SEARCH_SCALAR(head, out, field, val) \ + LL_SEARCH_SCALAR2(head, out, field, val, next) + +#define LL_SEARCH_SCALAR2(head, out, field, val, next) \ + do { \ + LL_FOREACH2(head, out, next) { \ + if ((out)->field == (val)) \ + break; \ + } \ + } while (0) + +#define LL_SEARCH(head, out, elt, cmp) LL_SEARCH2(head, out, elt, cmp, next) + +#define LL_SEARCH2(head, out, elt, cmp, next) \ + do { \ + LL_FOREACH2(head, out, next) { \ + if ((cmp(out, elt)) == 0) \ + break; \ + } \ + } while (0) + +#define LL_REPLACE_ELEM2(head, el, add, next) \ + do { \ + LDECLTYPE(head) _tmp; \ + assert((head) != NULL); \ + assert((el) != NULL); \ + assert((add) != NULL); \ + (add)->next = (el)->next; \ + if ((head) == (el)) { \ + (head) = (add); \ + } else { \ + _tmp = (head); \ + while (_tmp->next && (_tmp->next != (el))) { \ + _tmp = _tmp->next; \ + } \ + if (_tmp->next) { \ + _tmp->next = (add); \ + } \ + } \ + } while (0) + +#define LL_REPLACE_ELEM(head, el, add) LL_REPLACE_ELEM2(head, el, add, next) + +#define LL_PREPEND_ELEM2(head, el, add, next) \ + do { \ + if (el) { \ + LDECLTYPE(head) _tmp; \ + assert((head) != NULL); \ + assert((add) != NULL); \ + (add)->next = (el); \ + if ((head) == (el)) { \ + (head) = (add); \ + } else { \ + _tmp = (head); \ + while (_tmp->next && (_tmp->next != (el))) { \ + _tmp = _tmp->next; \ + } \ + if (_tmp->next) { \ + _tmp->next = (add); \ + } \ + } \ + } else { \ + LL_APPEND2(head, add, next); \ + } \ + } while (0) + +#define LL_PREPEND_ELEM(head, el, add) LL_PREPEND_ELEM2(head, el, add, next) + +#define LL_APPEND_ELEM2(head, el, add, next) \ + do { \ + if (el) { \ + assert((head) != NULL); \ + assert((add) != NULL); \ + (add)->next = (el)->next; \ + (el)->next = (add); \ + } else { \ + LL_PREPEND2(head, add, next); \ + } \ + } while (0) + +#define LL_APPEND_ELEM(head, el, add) LL_APPEND_ELEM2(head, el, add, next) + +#ifdef NO_DECLTYPE +/* Here are VS2008 / NO_DECLTYPE replacements for a few functions */ + +#undef LL_CONCAT2 +#define LL_CONCAT2(head1, head2, next) \ + do { \ + char *_tmp; \ + if (head1) { \ + _tmp = (char *)(head1); \ + while ((head1)->next) { \ + (head1) = (head1)->next; \ + } \ + (head1)->next = (head2); \ + UTLIST_RS(head1); \ + } else { \ + (head1) = (head2); \ + } \ + } while (0) + +#undef LL_APPEND2 +#define LL_APPEND2(head, add, next) \ + do { \ + if (head) { \ + (add)->next = head; /* use add->next as a temp variable */ \ + while ((add)->next->next) { \ + (add)->next = (add)->next->next; \ + } \ + (add)->next->next = (add); \ + } else { \ + (head) = (add); \ + } \ + (add)->next = NULL; \ + } while (0) + +#undef LL_INSERT_INORDER2 +#define LL_INSERT_INORDER2(head, add, cmp, next) \ + do { \ + if ((head) == NULL || (cmp(head, add)) >= 0) { \ + (add)->next = (head); \ + (head) = (add); \ + } else { \ + char *_tmp = (char *)(head); \ + while ((head)->next != NULL && (cmp((head)->next, add)) < 0) { \ + (head) = (head)->next; \ + } \ + (add)->next = (head)->next; \ + (head)->next = (add); \ + UTLIST_RS(head); \ + } \ + } while (0) + +#undef LL_DELETE2 +#define LL_DELETE2(head, del, next) \ + do { \ + if ((head) == (del)) { \ + (head) = (head)->next; \ + } else { \ + char *_tmp = (char *)(head); \ + while ((head)->next && ((head)->next != (del))) { \ + (head) = (head)->next; \ + } \ + if ((head)->next) { \ + (head)->next = ((del)->next); \ + } \ + UTLIST_RS(head); \ + } \ + } while (0) + +#undef LL_REPLACE_ELEM2 +#define LL_REPLACE_ELEM2(head, el, add, next) \ + do { \ + assert((head) != NULL); \ + assert((el) != NULL); \ + assert((add) != NULL); \ + if ((head) == (el)) { \ + (head) = (add); \ + } else { \ + (add)->next = head; \ + while ((add)->next->next && ((add)->next->next != (el))) { \ + (add)->next = (add)->next->next; \ + } \ + if ((add)->next->next) { \ + (add)->next->next = (add); \ + } \ + } \ + (add)->next = (el)->next; \ + } while (0) + +#undef LL_PREPEND_ELEM2 +#define LL_PREPEND_ELEM2(head, el, add, next) \ + do { \ + if (el) { \ + assert((head) != NULL); \ + assert((add) != NULL); \ + if ((head) == (el)) { \ + (head) = (add); \ + } else { \ + (add)->next = (head); \ + while ((add)->next->next && ((add)->next->next != (el))) { \ + (add)->next = (add)->next->next; \ + } \ + if ((add)->next->next) { \ + (add)->next->next = (add); \ + } \ + } \ + (add)->next = (el); \ + } else { \ + LL_APPEND2(head, add, next); \ + } \ + } while (0) + +#endif /* NO_DECLTYPE */ + +/****************************************************************************** + * doubly linked list macros (non-circular) * + *****************************************************************************/ +#define DL_PREPEND(head, add) DL_PREPEND2(head, add, prev, next) + +#define DL_PREPEND2(head, add, prev, next) \ + do { \ + (add)->next = (head); \ + if (head) { \ + (add)->prev = (head)->prev; \ + (head)->prev = (add); \ + } else { \ + (add)->prev = (add); \ + } \ + (head) = (add); \ + } while (0) + +#define DL_APPEND(head, add) DL_APPEND2(head, add, prev, next) + +#define DL_APPEND2(head, add, prev, next) \ + do { \ + if (head) { \ + (add)->prev = (head)->prev; \ + (head)->prev->next = (add); \ + (head)->prev = (add); \ + (add)->next = NULL; \ + } else { \ + (head) = (add); \ + (head)->prev = (head); \ + (head)->next = NULL; \ + } \ + } while (0) + +#define DL_INSERT_INORDER(head, add, cmp) \ + DL_INSERT_INORDER2(head, add, cmp, prev, next) + +#define DL_INSERT_INORDER2(head, add, cmp, prev, next) \ + do { \ + LDECLTYPE(head) _tmp; \ + if (head) { \ + DL_LOWER_BOUND2(head, _tmp, add, cmp, next); \ + DL_APPEND_ELEM2(head, _tmp, add, prev, next); \ + } else { \ + (head) = (add); \ + (head)->prev = (head); \ + (head)->next = NULL; \ + } \ + } while (0) + +#define DL_LOWER_BOUND(head, elt, like, cmp) \ + DL_LOWER_BOUND2(head, elt, like, cmp, next) + +#define DL_LOWER_BOUND2(head, elt, like, cmp, next) \ + do { \ + if ((head) == NULL || (cmp(head, like)) >= 0) { \ + (elt) = NULL; \ + } else { \ + for ((elt) = (head); (elt)->next != NULL; (elt) = (elt)->next) { \ + if ((cmp((elt)->next, like)) >= 0) { \ + break; \ + } \ + } \ + } \ + } while (0) + +#define DL_CONCAT(head1, head2) DL_CONCAT2(head1, head2, prev, next) + +#define DL_CONCAT2(head1, head2, prev, next) \ + do { \ + LDECLTYPE(head1) _tmp; \ + if (head2) { \ + if (head1) { \ + UTLIST_CASTASGN(_tmp, (head2)->prev); \ + (head2)->prev = (head1)->prev; \ + (head1)->prev->next = (head2); \ + UTLIST_CASTASGN((head1)->prev, _tmp); \ + } else { \ + (head1) = (head2); \ + } \ + } \ + } while (0) + +#define DL_DELETE(head, del) DL_DELETE2(head, del, prev, next) + +#define DL_DELETE2(head, del, prev, next) \ + do { \ + assert((head) != NULL); \ + assert((del)->prev != NULL); \ + if ((del)->prev == (del)) { \ + (head) = NULL; \ + } else if ((del) == (head)) { \ + assert((del)->next != NULL); \ + (del)->next->prev = (del)->prev; \ + (head) = (del)->next; \ + } else { \ + (del)->prev->next = (del)->next; \ + if ((del)->next) { \ + (del)->next->prev = (del)->prev; \ + } else { \ + (head)->prev = (del)->prev; \ + } \ + } \ + } while (0) + +#define DL_COUNT(head, el, counter) DL_COUNT2(head, el, counter, next) + +#define DL_COUNT2(head, el, counter, next) \ + do { \ + (counter) = 0; \ + DL_FOREACH2(head, el, next) { ++(counter); } \ + } while (0) + +#define DL_FOREACH(head, el) DL_FOREACH2(head, el, next) + +#define DL_FOREACH2(head, el, next) for ((el) = (head); el; (el) = (el)->next) + +/* this version is safe for deleting the elements during iteration */ +#define DL_FOREACH_SAFE(head, el, tmp) DL_FOREACH_SAFE2(head, el, tmp, next) + +#define DL_FOREACH_SAFE2(head, el, tmp, next) \ + for ((el) = (head); (el) && ((tmp) = (el)->next, 1); (el) = (tmp)) + +/* these are identical to their singly-linked list counterparts */ +#define DL_SEARCH_SCALAR LL_SEARCH_SCALAR +#define DL_SEARCH LL_SEARCH +#define DL_SEARCH_SCALAR2 LL_SEARCH_SCALAR2 +#define DL_SEARCH2 LL_SEARCH2 + +#define DL_REPLACE_ELEM2(head, el, add, prev, next) \ + do { \ + assert((head) != NULL); \ + assert((el) != NULL); \ + assert((add) != NULL); \ + if ((head) == (el)) { \ + (head) = (add); \ + (add)->next = (el)->next; \ + if ((el)->next == NULL) { \ + (add)->prev = (add); \ + } else { \ + (add)->prev = (el)->prev; \ + (add)->next->prev = (add); \ + } \ + } else { \ + (add)->next = (el)->next; \ + (add)->prev = (el)->prev; \ + (add)->prev->next = (add); \ + if ((el)->next == NULL) { \ + (head)->prev = (add); \ + } else { \ + (add)->next->prev = (add); \ + } \ + } \ + } while (0) + +#define DL_REPLACE_ELEM(head, el, add) \ + DL_REPLACE_ELEM2(head, el, add, prev, next) + +#define DL_PREPEND_ELEM2(head, el, add, prev, next) \ + do { \ + if (el) { \ + assert((head) != NULL); \ + assert((add) != NULL); \ + (add)->next = (el); \ + (add)->prev = (el)->prev; \ + (el)->prev = (add); \ + if ((head) == (el)) { \ + (head) = (add); \ + } else { \ + (add)->prev->next = (add); \ + } \ + } else { \ + DL_APPEND2(head, add, prev, next); \ + } \ + } while (0) + +#define DL_PREPEND_ELEM(head, el, add) \ + DL_PREPEND_ELEM2(head, el, add, prev, next) + +#define DL_APPEND_ELEM2(head, el, add, prev, next) \ + do { \ + if (el) { \ + assert((head) != NULL); \ + assert((add) != NULL); \ + (add)->next = (el)->next; \ + (add)->prev = (el); \ + (el)->next = (add); \ + if ((add)->next) { \ + (add)->next->prev = (add); \ + } else { \ + (head)->prev = (add); \ + } \ + } else { \ + DL_PREPEND2(head, add, prev, next); \ + } \ + } while (0) + +#define DL_APPEND_ELEM(head, el, add) DL_APPEND_ELEM2(head, el, add, prev, next) + +#ifdef NO_DECLTYPE +/* Here are VS2008 / NO_DECLTYPE replacements for a few functions */ + +#undef DL_INSERT_INORDER2 +#define DL_INSERT_INORDER2(head, add, cmp, prev, next) \ + do { \ + if ((head) == NULL) { \ + (add)->prev = (add); \ + (add)->next = NULL; \ + (head) = (add); \ + } else if ((cmp(head, add)) >= 0) { \ + (add)->prev = (head)->prev; \ + (add)->next = (head); \ + (head)->prev = (add); \ + (head) = (add); \ + } else { \ + char *_tmp = (char *)(head); \ + while ((head)->next && (cmp((head)->next, add)) < 0) { \ + (head) = (head)->next; \ + } \ + (add)->prev = (head); \ + (add)->next = (head)->next; \ + (head)->next = (add); \ + UTLIST_RS(head); \ + if ((add)->next) { \ + (add)->next->prev = (add); \ + } else { \ + (head)->prev = (add); \ + } \ + } \ + } while (0) +#endif /* NO_DECLTYPE */ + +/****************************************************************************** + * circular doubly linked list macros * + *****************************************************************************/ +#define CDL_APPEND(head, add) CDL_APPEND2(head, add, prev, next) + +#define CDL_APPEND2(head, add, prev, next) \ + do { \ + if (head) { \ + (add)->prev = (head)->prev; \ + (add)->next = (head); \ + (head)->prev = (add); \ + (add)->prev->next = (add); \ + } else { \ + (add)->prev = (add); \ + (add)->next = (add); \ + (head) = (add); \ + } \ + } while (0) + +#define CDL_PREPEND(head, add) CDL_PREPEND2(head, add, prev, next) + +#define CDL_PREPEND2(head, add, prev, next) \ + do { \ + if (head) { \ + (add)->prev = (head)->prev; \ + (add)->next = (head); \ + (head)->prev = (add); \ + (add)->prev->next = (add); \ + } else { \ + (add)->prev = (add); \ + (add)->next = (add); \ + } \ + (head) = (add); \ + } while (0) + +#define CDL_INSERT_INORDER(head, add, cmp) \ + CDL_INSERT_INORDER2(head, add, cmp, prev, next) + +#define CDL_INSERT_INORDER2(head, add, cmp, prev, next) \ + do { \ + LDECLTYPE(head) _tmp; \ + if (head) { \ + CDL_LOWER_BOUND2(head, _tmp, add, cmp, next); \ + CDL_APPEND_ELEM2(head, _tmp, add, prev, next); \ + } else { \ + (head) = (add); \ + (head)->next = (head); \ + (head)->prev = (head); \ + } \ + } while (0) + +#define CDL_LOWER_BOUND(head, elt, like, cmp) \ + CDL_LOWER_BOUND2(head, elt, like, cmp, next) + +#define CDL_LOWER_BOUND2(head, elt, like, cmp, next) \ + do { \ + if ((head) == NULL || (cmp(head, like)) >= 0) { \ + (elt) = NULL; \ + } else { \ + for ((elt) = (head); (elt)->next != (head); (elt) = (elt)->next) { \ + if ((cmp((elt)->next, like)) >= 0) { \ + break; \ + } \ + } \ + } \ + } while (0) + +#define CDL_DELETE(head, del) CDL_DELETE2(head, del, prev, next) + +#define CDL_DELETE2(head, del, prev, next) \ + do { \ + if (((head) == (del)) && ((head)->next == (head))) { \ + (head) = NULL; \ + } else { \ + (del)->next->prev = (del)->prev; \ + (del)->prev->next = (del)->next; \ + if ((del) == (head)) \ + (head) = (del)->next; \ + } \ + } while (0) + +#define CDL_COUNT(head, el, counter) CDL_COUNT2(head, el, counter, next) + +#define CDL_COUNT2(head, el, counter, next) \ + do { \ + (counter) = 0; \ + CDL_FOREACH2(head, el, next) { ++(counter); } \ + } while (0) + +#define CDL_FOREACH(head, el) CDL_FOREACH2(head, el, next) + +#define CDL_FOREACH2(head, el, next) \ + for ((el) = (head); el; (el) = (((el)->next == (head)) ? NULL : (el)->next)) + +#define CDL_FOREACH_SAFE(head, el, tmp1, tmp2) \ + CDL_FOREACH_SAFE2(head, el, tmp1, tmp2, prev, next) + +#define CDL_FOREACH_SAFE2(head, el, tmp1, tmp2, prev, next) \ + for ((el) = (head), (tmp1) = (head) ? (head)->prev : NULL; \ + (el) && ((tmp2) = (el)->next, 1); \ + (el) = ((el) == (tmp1) ? NULL : (tmp2))) + +#define CDL_SEARCH_SCALAR(head, out, field, val) \ + CDL_SEARCH_SCALAR2(head, out, field, val, next) + +#define CDL_SEARCH_SCALAR2(head, out, field, val, next) \ + do { \ + CDL_FOREACH2(head, out, next) { \ + if ((out)->field == (val)) \ + break; \ + } \ + } while (0) + +#define CDL_SEARCH(head, out, elt, cmp) CDL_SEARCH2(head, out, elt, cmp, next) + +#define CDL_SEARCH2(head, out, elt, cmp, next) \ + do { \ + CDL_FOREACH2(head, out, next) { \ + if ((cmp(out, elt)) == 0) \ + break; \ + } \ + } while (0) + +#define CDL_REPLACE_ELEM2(head, el, add, prev, next) \ + do { \ + assert((head) != NULL); \ + assert((el) != NULL); \ + assert((add) != NULL); \ + if ((el)->next == (el)) { \ + (add)->next = (add); \ + (add)->prev = (add); \ + (head) = (add); \ + } else { \ + (add)->next = (el)->next; \ + (add)->prev = (el)->prev; \ + (add)->next->prev = (add); \ + (add)->prev->next = (add); \ + if ((head) == (el)) { \ + (head) = (add); \ + } \ + } \ + } while (0) + +#define CDL_REPLACE_ELEM(head, el, add) \ + CDL_REPLACE_ELEM2(head, el, add, prev, next) + +#define CDL_PREPEND_ELEM2(head, el, add, prev, next) \ + do { \ + if (el) { \ + assert((head) != NULL); \ + assert((add) != NULL); \ + (add)->next = (el); \ + (add)->prev = (el)->prev; \ + (el)->prev = (add); \ + (add)->prev->next = (add); \ + if ((head) == (el)) { \ + (head) = (add); \ + } \ + } else { \ + CDL_APPEND2(head, add, prev, next); \ + } \ + } while (0) + +#define CDL_PREPEND_ELEM(head, el, add) \ + CDL_PREPEND_ELEM2(head, el, add, prev, next) + +#define CDL_APPEND_ELEM2(head, el, add, prev, next) \ + do { \ + if (el) { \ + assert((head) != NULL); \ + assert((add) != NULL); \ + (add)->next = (el)->next; \ + (add)->prev = (el); \ + (el)->next = (add); \ + (add)->next->prev = (add); \ + } else { \ + CDL_PREPEND2(head, add, prev, next); \ + } \ + } while (0) + +#define CDL_APPEND_ELEM(head, el, add) \ + CDL_APPEND_ELEM2(head, el, add, prev, next) + +#ifdef NO_DECLTYPE +/* Here are VS2008 / NO_DECLTYPE replacements for a few functions */ + +#undef CDL_INSERT_INORDER2 +#define CDL_INSERT_INORDER2(head, add, cmp, prev, next) \ + do { \ + if ((head) == NULL) { \ + (add)->prev = (add); \ + (add)->next = (add); \ + (head) = (add); \ + } else if ((cmp(head, add)) >= 0) { \ + (add)->prev = (head)->prev; \ + (add)->next = (head); \ + (add)->prev->next = (add); \ + (head)->prev = (add); \ + (head) = (add); \ + } else { \ + char *_tmp = (char *)(head); \ + while ((char *)(head)->next != _tmp && \ + (cmp((head)->next, add)) < 0) { \ + (head) = (head)->next; \ + } \ + (add)->prev = (head); \ + (add)->next = (head)->next; \ + (add)->next->prev = (add); \ + (head)->next = (add); \ + UTLIST_RS(head); \ + } \ + } while (0) +#endif /* NO_DECLTYPE */ + +#endif /* UTLIST_H */ diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index c7a285ce2..a0bff39fd 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -10,6 +10,10 @@ set(UMF_UTILS_SOURCES_COMMON utils_common.c utils_log.c utils_load_library.c) set(UMF_UTILS_SOURCES_POSIX utils_posix_common.c utils_posix_concurrency.c utils_posix_math.c) +set(UMF_UTILS_SOURCES_LINUX utils_linux_common.c) + +set(UMF_UTILS_SOURCES_MACOSX utils_macosx_common.c) + set(UMF_UTILS_SOURCES_WINDOWS utils_windows_common.c utils_windows_concurrency.c utils_windows_math.c) @@ -33,6 +37,13 @@ if(LINUX OR MACOSX) set(UMF_UTILS_SOURCES ${UMF_UTILS_SOURCES_COMMON} ${UMF_UTILS_SOURCES_POSIX}) set(UMF_UTILS_LIBS dl) + + if(LINUX) + set(UMF_UTILS_SOURCES ${UMF_UTILS_SOURCES} ${UMF_UTILS_SOURCES_LINUX}) + set(UMF_UTILS_LIBS ${UMF_UTILS_LIBS} rt) # librt for shm_open() + elseif(MACOSX) + set(UMF_UTILS_SOURCES ${UMF_UTILS_SOURCES} ${UMF_UTILS_SOURCES_MACOSX}) + endif() elseif(WINDOWS) set(UMF_UTILS_SOURCES ${UMF_UTILS_SOURCES_COMMON} ${UMF_UTILS_SOURCES_WINDOWS}) diff --git a/src/utils/utils_common.c b/src/utils/utils_common.c index fc6bd58ef..bffc9f355 100644 --- a/src/utils/utils_common.c +++ b/src/utils/utils_common.c @@ -12,41 +12,54 @@ #include "utils_assert.h" #include "utils_common.h" -// align a pointer and a size -void util_align_ptr_size(void **ptr, size_t *size, size_t alignment) { +// align a pointer up and a size down +void utils_align_ptr_up_size_down(void **ptr, size_t *size, size_t alignment) { uintptr_t p = (uintptr_t)*ptr; size_t s = *size; - // align pointer to 'alignment' bytes and adjust the size + // align the pointer up to 'alignment' bytes and adjust the size down size_t rest = p & (alignment - 1); if (rest) { - p += alignment - rest; + p = ALIGN_UP(p, alignment); s -= alignment - rest; } - ASSERT((p & (alignment - 1)) == 0); - ASSERT((s & (alignment - 1)) == 0); + ASSERT(IS_ALIGNED(p, alignment)); + ASSERT(IS_ALIGNED(s, alignment)); *ptr = (void *)p; *size = s; } -int util_env_var_has_str(const char *envvar, const char *str) { - char *value = getenv(envvar); - if (value && strstr(value, str)) { - return 1; +// align a pointer down and a size up (for mmap()/munmap()) +void utils_align_ptr_down_size_up(void **ptr, size_t *size, size_t alignment) { + uintptr_t p = (uintptr_t)*ptr; + size_t s = *size; + + // align the pointer down to 'alignment' bytes and adjust the size up + size_t rest = p & (alignment - 1); + if (rest) { + p = ALIGN_DOWN(p, alignment); + s += rest; } - return 0; + // align the size up to 'alignment' bytes + s = ALIGN_UP(s, alignment); + + ASSERT(IS_ALIGNED(p, alignment)); + ASSERT(IS_ALIGNED(s, alignment)); + + *ptr = (void *)p; + *size = s; } -// check if we are running in the proxy library -int util_is_running_in_proxy_lib(void) { - return util_env_var_has_str("LD_PRELOAD", "libumf_proxy.so"); +char *utils_env_var_get_str(const char *envvar, const char *str) { + char *value = getenv(envvar); + return value ? strstr(value, str) : NULL; } -const char *util_parse_var(const char *var, const char *option, - const char **extraArg) { +const char *utils_parse_var(const char *var, const char *option, + const char **extraArg) { const char *found = strstr(var, option); // ensure that found string is first on list or it's a separating semicolon if (!found) { @@ -75,3 +88,44 @@ const char *util_parse_var(const char *var, const char *option, return found; } + +int utils_copy_path(const char *in_path, char out_path[], size_t path_max) { + // (- 1) because there should be a room for the terminating null byte ('\0') + size_t max_len = path_max - 1; + + if (strlen(in_path) > max_len) { + LOG_ERR("path of the %s file is longer than %zu bytes", in_path, + max_len); + return -1; + } + + strncpy(out_path, in_path, max_len); + out_path[path_max - 1] = '\0'; // the terminating null byte + + return 0; +} + +umf_result_t utils_translate_flags(unsigned in_flags, unsigned max, + umf_result_t (*translate_flag)(unsigned, + unsigned *), + unsigned *out_flags) { + unsigned out_f = 0; + for (unsigned n = 1; n < max; n <<= 1) { + if (in_flags & n) { + unsigned flag; + umf_result_t result = translate_flag(n, &flag); + if (result != UMF_RESULT_SUCCESS) { + return result; + } + out_f |= flag; + in_flags &= ~n; // clear this bit + } + } + + if (in_flags != 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + *out_flags = out_f; + return UMF_RESULT_SUCCESS; +} diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h index 28579eb00..9ef2b3cf1 100644 --- a/src/utils/utils_common.h +++ b/src/utils/utils_common.h @@ -11,15 +11,24 @@ #define UMF_COMMON_H 1 #include +#include #include #include #include +#include #ifdef __cplusplus extern "C" { #endif +typedef enum umf_purge_advise_t { + UMF_PURGE_LAZY = 1, + UMF_PURGE_FORCE, + + UMF_PURGE_MAX, // must be the last one +} umf_purge_advise_t; + #define DO_WHILE_EMPTY \ do { \ } while (0) @@ -29,8 +38,18 @@ extern "C" { expression; \ } while (0) +#define IS_ALIGNED(value, align) \ + ((align == 0 || (((value) & ((align)-1)) == 0))) +#define IS_NOT_ALIGNED(value, align) \ + ((align != 0 && (((value) & ((align)-1)) != 0))) #define ALIGN_UP(value, align) (((value) + (align)-1) & ~((align)-1)) +#define ALIGN_UP_SAFE(value, align) \ + (((align) == 0) \ + ? (value) \ + : (((value) + (align)-1) < (value) ? 0 : ALIGN_UP((value), (align)))) #define ALIGN_DOWN(value, align) ((value) & ~((align)-1)) +#define ASSERT_IS_ALIGNED(value, align) \ + DO_WHILE_EXPRS(assert(IS_ALIGNED(value, align))) #define VALGRIND_ANNOTATE_NEW_MEMORY(p, s) DO_WHILE_EMPTY #define VALGRIND_HG_DRD_DISABLE_CHECKING(p, s) DO_WHILE_EMPTY @@ -45,10 +64,28 @@ extern "C" { #endif /* _WIN32 */ +// get the address of the given string in the environment variable (or NULL) +char *utils_env_var_get_str(const char *envvar, const char *str); + // Check if the environment variable contains the given string. -int util_env_var_has_str(const char *envvar, const char *str); +static inline int utils_env_var_has_str(const char *envvar, const char *str) { + return utils_env_var_get_str(envvar, str) ? 1 : 0; +} + +// check if we are running in the proxy library +static inline int utils_is_running_in_proxy_lib(void) { + return utils_env_var_get_str("LD_PRELOAD", "libumf_proxy.so") ? 1 : 0; +} + +// check if we are running in the proxy library with a size threshold +static inline int utils_is_running_in_proxy_lib_with_size_threshold(void) { + return (utils_env_var_get_str("LD_PRELOAD", "libumf_proxy.so") && + utils_env_var_get_str("UMF_PROXY", "size.threshold=")) + ? 1 + : 0; +} -// util_parse_var - Parses var for a prefix, +// utils_parse_var - Parses var for a prefix, // optionally identifying a following argument. // Parameters: // - var: String to parse in "option1;option2,arg2;..." format, with options @@ -62,16 +99,16 @@ int util_env_var_has_str(const char *envvar, const char *str); // // IMPORTANT: Both extraArg and return values are pointers within var, // and are not null-terminated. -const char *util_parse_var(const char *var, const char *option, - const char **extraArg); +const char *utils_parse_var(const char *var, const char *option, + const char **extraArg); -// check if we are running in the proxy library -int util_is_running_in_proxy_lib(void); +size_t utils_get_page_size(void); -size_t util_get_page_size(void); +// align a pointer up and a size down +void utils_align_ptr_up_size_down(void **ptr, size_t *size, size_t alignment); -// align a pointer and a size -void util_align_ptr_size(void **ptr, size_t *size, size_t alignment); +// align a pointer down and a size up (for mmap()/munmap()) +void utils_align_ptr_down_size_up(void **ptr, size_t *size, size_t alignment); // get the current process ID int utils_getpid(void); @@ -82,9 +119,63 @@ int utils_gettid(void); // close file descriptor int utils_close_fd(int fd); +umf_result_t utils_errno_to_umf_result(int err); + // obtain a duplicate of another process's file descriptor umf_result_t utils_duplicate_fd(int pid, int fd_in, int *fd_out); +int utils_copy_path(const char *in_path, char out_path[], size_t path_max); + +umf_result_t utils_translate_flags(unsigned in_flags, unsigned max, + umf_result_t (*translate_flag)(unsigned, + unsigned *), + unsigned *out_flags); + +umf_result_t utils_translate_mem_protection_flags(unsigned in_protection, + unsigned *out_protection); + +int utils_translate_purge_advise(umf_purge_advise_t advise); + +umf_result_t +utils_translate_mem_visibility_flag(umf_memory_visibility_t in_flag, + unsigned *out_flag); + +int utils_create_anonymous_fd(void); + +int utils_shm_create(const char *shm_name, size_t size); + +int utils_shm_open(const char *shm_name); + +int utils_shm_unlink(const char *shm_name); + +size_t get_max_file_size(void); + +int utils_get_file_size(int fd, size_t *size); + +int utils_set_file_size(int fd, size_t size); + +void *utils_mmap(void *hint_addr, size_t length, int prot, int flag, int fd, + size_t fd_offset); + +void *utils_mmap_file(void *hint_addr, size_t length, int prot, int flags, + int fd, size_t fd_offset, bool *map_sync); + +int utils_munmap(void *addr, size_t length); + +int utils_purge(void *addr, size_t length, int advice); + +void utils_strerror(int errnum, char *buf, size_t buflen); + +int utils_devdax_open(const char *path); + +int utils_file_open(const char *path); + +int utils_file_open_or_create(const char *path); + +int utils_fallocate(int fd, long offset, long len); + +long utils_get_size_threshold(char *str_threshold); + #ifdef __cplusplus } #endif diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index dcc67dc42..155184cc4 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -36,19 +36,19 @@ extern "C" { #endif -typedef struct os_mutex_t { +typedef struct utils_mutex_t { #ifdef _WIN32 CRITICAL_SECTION lock; #else pthread_mutex_t lock; #endif -} os_mutex_t; +} utils_mutex_t; -size_t util_mutex_get_size(void); -os_mutex_t *util_mutex_init(void *ptr); -void util_mutex_destroy_not_free(os_mutex_t *m); -int util_mutex_lock(os_mutex_t *mutex); -int util_mutex_unlock(os_mutex_t *mutex); +size_t utils_mutex_get_size(void); +utils_mutex_t *utils_mutex_init(void *ptr); +void utils_mutex_destroy_not_free(utils_mutex_t *m); +int utils_mutex_lock(utils_mutex_t *mutex); +int utils_mutex_unlock(utils_mutex_t *mutex); #if defined(_WIN32) #define UTIL_ONCE_FLAG INIT_ONCE @@ -58,50 +58,56 @@ int util_mutex_unlock(os_mutex_t *mutex); #define UTIL_ONCE_FLAG_INIT PTHREAD_ONCE_INIT #endif -void util_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); +void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); #if defined(_WIN32) -static __inline unsigned char util_lssb_index(long long value) { +static __inline unsigned char utils_lssb_index(long long value) { unsigned long ret; _BitScanForward64(&ret, value); return (unsigned char)ret; } -static __inline unsigned char util_mssb_index(long long value) { +static __inline unsigned char utils_mssb_index(long long value) { unsigned long ret; _BitScanReverse64(&ret, value); return (unsigned char)ret; } // There is no good way to do atomic_load on windows... -#define util_atomic_load_acquire(object, dest) \ +#define utils_atomic_load_acquire(object, dest) \ do { \ - *dest = InterlockedOr64Acquire((LONG64 volatile *)object, 0); \ + *(LONG64 *)dest = \ + InterlockedOr64Acquire((LONG64 volatile *)object, 0); \ } while (0) -#define util_atomic_store_release(object, desired) \ +#define utils_atomic_store_release(object, desired) \ InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired) -#define util_atomic_increment(object) \ +#define utils_atomic_increment(object) \ InterlockedIncrement64((LONG64 volatile *)object) -#define util_fetch_and_add64(ptr, value) \ +#define utils_atomic_decrement(object) \ + InterlockedDecrement64((LONG64 volatile *)object) +#define utils_fetch_and_add64(ptr, value) \ InterlockedExchangeAdd64((LONG64 *)(ptr), value) #else -#define util_lssb_index(x) ((unsigned char)__builtin_ctzll(x)) -#define util_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x))) -#define util_atomic_load_acquire(object, dest) \ +#define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x)) +#define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x))) +#define utils_atomic_load_acquire(object, dest) \ do { \ utils_annotate_acquire((void *)object); \ __atomic_load(object, dest, memory_order_acquire); \ } while (0) -#define util_atomic_store_release(object, desired) \ +#define utils_atomic_store_release(object, desired) \ do { \ __atomic_store_n(object, desired, memory_order_release); \ utils_annotate_release((void *)object); \ } while (0) -#define util_atomic_increment(object) \ +#define utils_atomic_increment(object) \ __atomic_add_fetch(object, 1, __ATOMIC_ACQ_REL) -#define util_fetch_and_add64 __sync_fetch_and_add +#define utils_atomic_decrement(object) \ + __atomic_sub_fetch(object, 1, __ATOMIC_ACQ_REL) +#define utils_fetch_and_add64 __sync_fetch_and_add + #endif #ifdef __cplusplus diff --git a/test/providers/level_zero_helpers.cpp b/src/utils/utils_level_zero.cpp similarity index 76% rename from test/providers/level_zero_helpers.cpp rename to src/utils/utils_level_zero.cpp index 067bf6a15..833047dd7 100644 --- a/test/providers/level_zero_helpers.cpp +++ b/src/utils/utils_level_zero.cpp @@ -5,7 +5,7 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ -#include "level_zero_helpers.h" +#include "utils_level_zero.h" #include #include @@ -63,10 +63,69 @@ struct libze_ops { } libze_ops; #if USE_DLOPEN +// Generic no-op stub function for all callbacks +template ze_result_t noop_stub(Args &&...) { + return ZE_RESULT_SUCCESS; // Always return ZE_RESULT_SUCCESS +} + struct DlHandleCloser { void operator()(void *dlHandle) { if (dlHandle) { - util_close_library(dlHandle); + // Reset all function pointers to no-op stubs in case the library + // but some other global object still try to call Level Zero functions. + libze_ops.zeInit = [](auto... args) { return noop_stub(args...); }; + libze_ops.zeDriverGet = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeDeviceGet = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeDeviceGetProperties = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeContextCreate = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeContextDestroy = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeCommandQueueCreate = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeCommandQueueDestroy = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeCommandQueueExecuteCommandLists = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeCommandQueueSynchronize = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeCommandListCreate = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeCommandListDestroy = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeCommandListClose = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeCommandListAppendMemoryCopy = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeCommandListAppendMemoryFill = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeMemGetAllocProperties = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeMemAllocDevice = [](auto... args) { + return noop_stub(args...); + }; + libze_ops.zeMemFree = [](auto... args) { + return noop_stub(args...); + }; + utils_close_library(dlHandle); } } }; @@ -82,26 +141,26 @@ int InitLevelZeroOps() { // NOTE that we use UMF_UTIL_OPEN_LIBRARY_GLOBAL which add all loaded symbols to the // global symbol table. zeDlHandle = std::unique_ptr( - util_open_library(lib_name, UMF_UTIL_OPEN_LIBRARY_GLOBAL)); + utils_open_library(lib_name, UMF_UTIL_OPEN_LIBRARY_GLOBAL)); *(void **)&libze_ops.zeInit = - util_get_symbol_addr(zeDlHandle.get(), "zeInit", lib_name); + utils_get_symbol_addr(zeDlHandle.get(), "zeInit", lib_name); if (libze_ops.zeInit == nullptr) { fprintf(stderr, "zeInit symbol not found in %s\n", lib_name); return -1; } *(void **)&libze_ops.zeDriverGet = - util_get_symbol_addr(zeDlHandle.get(), "zeDriverGet", lib_name); + utils_get_symbol_addr(zeDlHandle.get(), "zeDriverGet", lib_name); if (libze_ops.zeDriverGet == nullptr) { fprintf(stderr, "zeDriverGet symbol not found in %s\n", lib_name); return -1; } *(void **)&libze_ops.zeDeviceGet = - util_get_symbol_addr(zeDlHandle.get(), "zeDeviceGet", lib_name); + utils_get_symbol_addr(zeDlHandle.get(), "zeDeviceGet", lib_name); if (libze_ops.zeDeviceGet == nullptr) { fprintf(stderr, "zeDeviceGet symbol not found in %s\n", lib_name); return -1; } - *(void **)&libze_ops.zeDeviceGetProperties = util_get_symbol_addr( + *(void **)&libze_ops.zeDeviceGetProperties = utils_get_symbol_addr( zeDlHandle.get(), "zeDeviceGetProperties", lib_name); if (libze_ops.zeDeviceGetProperties == nullptr) { fprintf(stderr, "zeDeviceGetProperties symbol not found in %s\n", @@ -109,25 +168,25 @@ int InitLevelZeroOps() { return -1; } *(void **)&libze_ops.zeContextCreate = - util_get_symbol_addr(zeDlHandle.get(), "zeContextCreate", lib_name); + utils_get_symbol_addr(zeDlHandle.get(), "zeContextCreate", lib_name); if (libze_ops.zeContextCreate == nullptr) { fprintf(stderr, "zeContextCreate symbol not found in %s\n", lib_name); return -1; } *(void **)&libze_ops.zeContextDestroy = - util_get_symbol_addr(zeDlHandle.get(), "zeContextDestroy", lib_name); + utils_get_symbol_addr(zeDlHandle.get(), "zeContextDestroy", lib_name); if (libze_ops.zeContextDestroy == nullptr) { fprintf(stderr, "zeContextDestroy symbol not found in %s\n", lib_name); return -1; } - *(void **)&libze_ops.zeCommandQueueCreate = util_get_symbol_addr( + *(void **)&libze_ops.zeCommandQueueCreate = utils_get_symbol_addr( zeDlHandle.get(), "zeCommandQueueCreate", lib_name); if (libze_ops.zeCommandQueueCreate == nullptr) { fprintf(stderr, "zeCommandQueueCreate symbol not found in %s\n", lib_name); return -1; } - *(void **)&libze_ops.zeCommandQueueDestroy = util_get_symbol_addr( + *(void **)&libze_ops.zeCommandQueueDestroy = utils_get_symbol_addr( zeDlHandle.get(), "zeCommandQueueDestroy", lib_name); if (libze_ops.zeCommandQueueDestroy == nullptr) { fprintf(stderr, "zeCommandQueueDestroy symbol not found in %s\n", @@ -135,29 +194,29 @@ int InitLevelZeroOps() { return -1; } *(void **)&libze_ops.zeCommandQueueExecuteCommandLists = - util_get_symbol_addr(zeDlHandle.get(), - "zeCommandQueueExecuteCommandLists", lib_name); + utils_get_symbol_addr(zeDlHandle.get(), + "zeCommandQueueExecuteCommandLists", lib_name); if (libze_ops.zeCommandQueueExecuteCommandLists == nullptr) { fprintf(stderr, "zeCommandQueueExecuteCommandLists symbol not found in %s\n", lib_name); return -1; } - *(void **)&libze_ops.zeCommandQueueSynchronize = util_get_symbol_addr( + *(void **)&libze_ops.zeCommandQueueSynchronize = utils_get_symbol_addr( zeDlHandle.get(), "zeCommandQueueSynchronize", lib_name); if (libze_ops.zeCommandQueueSynchronize == nullptr) { fprintf(stderr, "zeCommandQueueSynchronize symbol not found in %s\n", lib_name); return -1; } - *(void **)&libze_ops.zeCommandListCreate = - util_get_symbol_addr(zeDlHandle.get(), "zeCommandListCreate", lib_name); + *(void **)&libze_ops.zeCommandListCreate = utils_get_symbol_addr( + zeDlHandle.get(), "zeCommandListCreate", lib_name); if (libze_ops.zeCommandListCreate == nullptr) { fprintf(stderr, "zeCommandListCreate symbol not found in %s\n", lib_name); return -1; } - *(void **)&libze_ops.zeCommandListDestroy = util_get_symbol_addr( + *(void **)&libze_ops.zeCommandListDestroy = utils_get_symbol_addr( zeDlHandle.get(), "zeCommandListDestroy", lib_name); if (libze_ops.zeCommandListDestroy == nullptr) { fprintf(stderr, "zeCommandListDestroy symbol not found in %s\n", @@ -165,13 +224,13 @@ int InitLevelZeroOps() { return -1; } *(void **)&libze_ops.zeCommandListClose = - util_get_symbol_addr(zeDlHandle.get(), "zeCommandListClose", lib_name); + utils_get_symbol_addr(zeDlHandle.get(), "zeCommandListClose", lib_name); if (libze_ops.zeCommandListClose == nullptr) { fprintf(stderr, "zeCommandListClose symbol not found in %s\n", lib_name); return -1; } - *(void **)&libze_ops.zeCommandListAppendMemoryCopy = util_get_symbol_addr( + *(void **)&libze_ops.zeCommandListAppendMemoryCopy = utils_get_symbol_addr( zeDlHandle.get(), "zeCommandListAppendMemoryCopy", lib_name); if (libze_ops.zeCommandListAppendMemoryCopy == nullptr) { fprintf(stderr, @@ -179,7 +238,7 @@ int InitLevelZeroOps() { lib_name); return -1; } - *(void **)&libze_ops.zeCommandListAppendMemoryFill = util_get_symbol_addr( + *(void **)&libze_ops.zeCommandListAppendMemoryFill = utils_get_symbol_addr( zeDlHandle.get(), "zeCommandListAppendMemoryFill", lib_name); if (libze_ops.zeCommandListAppendMemoryFill == nullptr) { fprintf(stderr, @@ -187,7 +246,7 @@ int InitLevelZeroOps() { lib_name); return -1; } - *(void **)&libze_ops.zeMemGetAllocProperties = util_get_symbol_addr( + *(void **)&libze_ops.zeMemGetAllocProperties = utils_get_symbol_addr( zeDlHandle.get(), "zeMemGetAllocProperties", lib_name); if (libze_ops.zeMemGetAllocProperties == nullptr) { fprintf(stderr, "zeMemGetAllocProperties symbol not found in %s\n", @@ -195,13 +254,13 @@ int InitLevelZeroOps() { return -1; } *(void **)&libze_ops.zeMemAllocDevice = - util_get_symbol_addr(zeDlHandle.get(), "zeMemAllocDevice", lib_name); + utils_get_symbol_addr(zeDlHandle.get(), "zeMemAllocDevice", lib_name); if (libze_ops.zeMemAllocDevice == nullptr) { fprintf(stderr, "zeMemAllocDevice symbol not found in %s\n", lib_name); return -1; } *(void **)&libze_ops.zeMemFree = - util_get_symbol_addr(zeDlHandle.get(), "zeMemFree", lib_name); + utils_get_symbol_addr(zeDlHandle.get(), "zeMemFree", lib_name); if (libze_ops.zeMemFree == nullptr) { fprintf(stderr, "zeMemFree symbol not found in %s\n", lib_name); return -1; @@ -238,7 +297,7 @@ int InitLevelZeroOps() { } #endif // USE_DLOPEN -static int init_level_zero_lib(void) { +static int utils_ze_init_level_zero_lib(void) { ze_init_flag_t flags = ZE_INIT_FLAG_GPU_ONLY; ze_result_t result = libze_ops.zeInit(flags); if (result != ZE_RESULT_SUCCESS) { @@ -247,12 +306,36 @@ static int init_level_zero_lib(void) { return 0; } -int get_drivers(uint32_t *drivers_num_, ze_driver_handle_t **drivers_) { +static UTIL_ONCE_FLAG level_zero_init_flag = UTIL_ONCE_FLAG_INIT; +static int InitResult; + +static void utils_ze_init_level_zero_once(void) { + InitResult = InitLevelZeroOps(); + if (InitResult != 0) { + return; + } + InitResult = utils_ze_init_level_zero_lib(); +} + +int utils_ze_init_level_zero(void) { + utils_init_once(&level_zero_init_flag, utils_ze_init_level_zero_once); + + return InitResult; +} + +int utils_ze_get_drivers(uint32_t *drivers_num_, + ze_driver_handle_t **drivers_) { int ret = 0; ze_result_t ze_result; ze_driver_handle_t *drivers = NULL; uint32_t drivers_num = 0; + ret = utils_ze_init_level_zero(); + if (ret != 0) { + fprintf(stderr, "utils_ze_init_level_zero() failed!\n"); + goto init_fail; + } + ze_result = libze_ops.zeDriverGet(&drivers_num, NULL); if (ze_result != ZE_RESULT_SUCCESS) { fprintf(stderr, "zeDriverGet() failed!\n"); @@ -288,16 +371,24 @@ int get_drivers(uint32_t *drivers_num_, ze_driver_handle_t **drivers_) { free(drivers); *drivers_ = NULL; } + +init_fail: return ret; } -int get_devices(ze_driver_handle_t driver, uint32_t *devices_num_, - ze_device_handle_t **devices_) { +int utils_ze_get_devices(ze_driver_handle_t driver, uint32_t *devices_num_, + ze_device_handle_t **devices_) { ze_result_t ze_result; int ret = 0; uint32_t devices_num = 0; ze_device_handle_t *devices = NULL; + ret = utils_ze_init_level_zero(); + if (ret != 0) { + fprintf(stderr, "utils_ze_init_level_zero() failed!\n"); + goto init_fail; + } + ze_result = libze_ops.zeDeviceGet(driver, &devices_num, NULL); if (ze_result != ZE_RESULT_SUCCESS) { fprintf(stderr, "zeDeviceGet() failed!\n"); @@ -333,10 +424,12 @@ int get_devices(ze_driver_handle_t driver, uint32_t *devices_num_, free(devices); devices = NULL; } +init_fail: return ret; } -int find_driver_with_gpu(uint32_t *driver_idx, ze_driver_handle_t *driver_) { +int utils_ze_find_driver_with_gpu(uint32_t *driver_idx, + ze_driver_handle_t *driver_) { int ret = 0; ze_result_t ze_result; uint32_t drivers_num = 0; @@ -344,7 +437,7 @@ int find_driver_with_gpu(uint32_t *driver_idx, ze_driver_handle_t *driver_) { ze_driver_handle_t *drivers = NULL; ze_driver_handle_t driver_with_gpus = NULL; - ret = get_drivers(&drivers_num, &drivers); + ret = utils_ze_get_drivers(&drivers_num, &drivers); if (ret) { goto fn_fail; } @@ -354,7 +447,7 @@ int find_driver_with_gpu(uint32_t *driver_idx, ze_driver_handle_t *driver_) { uint32_t devices_num = 0; ze_driver_handle_t driver = drivers[i]; - ret = get_devices(driver, &devices_num, &devices); + ret = utils_ze_get_devices(driver, &devices_num, &devices); if (ret) { goto fn_fail; } @@ -403,13 +496,14 @@ int find_driver_with_gpu(uint32_t *driver_idx, ze_driver_handle_t *driver_) { return ret; } -int find_gpu_device(ze_driver_handle_t driver, ze_device_handle_t *device_) { +int utils_ze_find_gpu_device(ze_driver_handle_t driver, + ze_device_handle_t *device_) { int ret = -1; uint32_t devices_num = 0; ze_device_handle_t *devices = NULL; ze_device_handle_t device; - ret = get_devices(driver, &devices_num, &devices); + ret = utils_ze_get_devices(driver, &devices_num, &devices); if (ret) { return ret; } @@ -441,9 +535,9 @@ int find_gpu_device(ze_driver_handle_t driver, ze_device_handle_t *device_) { return ret; } -int level_zero_fill(ze_context_handle_t context, ze_device_handle_t device, - void *ptr, size_t size, const void *pattern, - size_t pattern_size) { +int utils_ze_level_zero_fill(ze_context_handle_t context, + ze_device_handle_t device, void *ptr, size_t size, + const void *pattern, size_t pattern_size) { int ret = 0; ze_command_queue_desc_t commandQueueDesc = { @@ -527,8 +621,9 @@ int level_zero_fill(ze_context_handle_t context, ze_device_handle_t device, return ret; } -int level_zero_copy(ze_context_handle_t context, ze_device_handle_t device, - void *dst_ptr, const void *src_ptr, size_t size) { +int utils_ze_level_zero_copy(ze_context_handle_t context, + ze_device_handle_t device, void *dst_ptr, + const void *src_ptr, size_t size) { int ret = 0; ze_command_queue_desc_t commandQueueDesc = { ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, @@ -610,7 +705,8 @@ int level_zero_copy(ze_context_handle_t context, ze_device_handle_t device, return ret; } -int create_context(ze_driver_handle_t driver, ze_context_handle_t *context) { +int utils_ze_create_context(ze_driver_handle_t driver, + ze_context_handle_t *context) { ze_result_t ze_result; ze_context_desc_t ctxtDesc; ctxtDesc.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC; @@ -626,7 +722,7 @@ int create_context(ze_driver_handle_t driver, ze_context_handle_t *context) { return 0; } -int destroy_context(ze_context_handle_t context) { +int utils_ze_destroy_context(ze_context_handle_t context) { ze_result_t ze_result; ze_result = libze_ops.zeContextDestroy(context); if (ze_result != ZE_RESULT_SUCCESS) { @@ -637,7 +733,7 @@ int destroy_context(ze_context_handle_t context) { return 0; } -ze_memory_type_t get_mem_type(ze_context_handle_t context, void *ptr) { +ze_memory_type_t utils_ze_get_mem_type(ze_context_handle_t context, void *ptr) { ze_device_handle_t device = NULL; ze_memory_allocation_properties_t alloc_props; alloc_props.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES; @@ -649,60 +745,3 @@ ze_memory_type_t get_mem_type(ze_context_handle_t context, void *ptr) { libze_ops.zeMemGetAllocProperties(context, ptr, &alloc_props, &device); return alloc_props.type; } - -UTIL_ONCE_FLAG level_zero_init_flag; -int InitResult; -void init_level_zero_once() { - InitResult = InitLevelZeroOps(); - if (InitResult != 0) { - return; - } - InitResult = init_level_zero_lib(); -} - -int init_level_zero() { - util_init_once(&level_zero_init_flag, init_level_zero_once); - - return InitResult; -} - -level_zero_memory_provider_params_t -create_level_zero_prov_params(umf_usm_memory_type_t memory_type) { - level_zero_memory_provider_params_t params = {NULL, NULL, - UMF_MEMORY_TYPE_UNKNOWN}; - uint32_t driver_idx = 0; - ze_driver_handle_t hDriver; - ze_device_handle_t hDevice; - ze_context_handle_t hContext; - int ret = -1; - - ret = init_level_zero(); - if (ret != 0) { - // Return empty params. Test will be skipped. - return params; - } - - ret = find_driver_with_gpu(&driver_idx, &hDriver); - if (ret != 0 || hDriver == NULL) { - // Return empty params. Test will be skipped. - return params; - } - - ret = find_gpu_device(hDriver, &hDevice); - if (ret != 0 || hDevice == NULL) { - // Return empty params. Test will be skipped. - return params; - } - - ret = create_context(hDriver, &hContext); - if (ret != 0) { - // Return empty params. Test will be skipped. - return params; - } - - params.level_zero_context_handle = hContext; - params.level_zero_device_handle = hDevice; - params.memory_type = memory_type; - - return params; -} \ No newline at end of file diff --git a/src/utils/utils_level_zero.h b/src/utils/utils_level_zero.h new file mode 100644 index 000000000..b29a4dc43 --- /dev/null +++ b/src/utils/utils_level_zero.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef UMF_UTILS_LEVEL_ZERO_H +#define UMF_UTILS_LEVEL_ZERO_H + +#include + +#include "ze_api.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int utils_ze_init_level_zero(void); +int utils_ze_init_level_zero(void); + +int utils_ze_get_drivers(uint32_t *drivers_num_, ze_driver_handle_t **drivers_); + +int utils_ze_get_devices(ze_driver_handle_t driver, uint32_t *devices_num_, + ze_device_handle_t **devices_); + +int utils_ze_find_driver_with_gpu(uint32_t *driver_idx, + ze_driver_handle_t *driver_); + +int utils_ze_find_gpu_device(ze_driver_handle_t driver, + ze_device_handle_t *device_); + +int utils_ze_level_zero_fill(ze_context_handle_t context, + ze_device_handle_t device, void *ptr, size_t size, + const void *pattern, size_t pattern_size); + +int utils_ze_level_zero_copy(ze_context_handle_t context, + ze_device_handle_t device, void *dst_ptr, + const void *src_ptr, size_t size); + +int utils_ze_create_context(ze_driver_handle_t driver, + ze_context_handle_t *context); + +int utils_ze_destroy_context(ze_context_handle_t context); + +ze_memory_type_t utils_ze_get_mem_type(ze_context_handle_t context, void *ptr); + +#ifdef __cplusplus +} +#endif + +#endif // UMF_UTILS_LEVEL_ZERO_H diff --git a/src/utils/utils_linux_common.c b/src/utils/utils_linux_common.c new file mode 100644 index 000000000..cd0fefd2a --- /dev/null +++ b/src/utils/utils_linux_common.c @@ -0,0 +1,242 @@ +/* + * + * Copyright (C) 2023-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "utils_common.h" +#include "utils_log.h" + +umf_result_t +utils_translate_mem_visibility_flag(umf_memory_visibility_t in_flag, + unsigned *out_flag) { + switch (in_flag) { + case UMF_MEM_MAP_PRIVATE: + *out_flag = MAP_PRIVATE; + return UMF_RESULT_SUCCESS; + case UMF_MEM_MAP_SHARED: + *out_flag = MAP_SHARED; + return UMF_RESULT_SUCCESS; + } + return UMF_RESULT_ERROR_INVALID_ARGUMENT; +} + +/* + * Map given file into memory. + * If (flags & MAP_PRIVATE) it uses just mmap. Otherwise, it tries to mmap + * with (flags | MAP_SHARED_VALIDATE | MAP_SYNC) which allows flushing + * from the user-space. If MAP_SYNC fails and if the user did not specify + * this flag by himself, it falls back to the mmap with (flags | MAP_SHARED). + */ +void *utils_mmap_file(void *hint_addr, size_t length, int prot, int flags, + int fd, size_t fd_offset, bool *map_sync) { + void *addr; + + if (map_sync) { + *map_sync = false; + } + + /* + * MAP_PRIVATE and MAP_SHARED are mutually exclusive, + * therefore mmap with MAP_PRIVATE is executed separately. + */ + if (flags & MAP_PRIVATE) { + addr = utils_mmap(hint_addr, length, prot, flags, fd, fd_offset); + if (addr == NULL) { + LOG_PERR("mapping file with the MAP_PRIVATE flag failed (fd=%i, " + "offset=%zu, length=%zu, flags=%i)", + fd, fd_offset, length, flags); + return NULL; + } + + LOG_DEBUG("file mapped with the MAP_PRIVATE flag (fd=%i, offset=%zu, " + "length=%zu, flags=%i)", + fd, fd_offset, length, flags); + + return addr; + } + + errno = 0; + + /* try to mmap with MAP_SYNC flag */ + const int sync_flags = flags | MAP_SHARED_VALIDATE | MAP_SYNC; + addr = utils_mmap(hint_addr, length, prot, sync_flags, fd, fd_offset); + if (addr) { + LOG_DEBUG("file mapped with the MAP_SYNC flag (fd=%i, offset=%zu, " + "length=%zu, flags=%i)", + fd, fd_offset, length, sync_flags); + if (map_sync) { + *map_sync = true; + } + return addr; + } + + /* try to mmap with MAP_SHARED flag (without MAP_SYNC) */ + if (errno == EINVAL || errno == ENOTSUP || errno == EOPNOTSUPP) { + const int shared_flags = flags | MAP_SHARED; + addr = utils_mmap(hint_addr, length, prot, shared_flags, fd, fd_offset); + if (addr) { + LOG_DEBUG("file mapped with the MAP_SHARED flag (fd=%i, " + "offset=%zu, length=%zu, flags=%i)", + fd, fd_offset, length, shared_flags); + return addr; + } + + LOG_PERR("mapping file with the MAP_SHARED flag failed (fd=%i, " + "offset=%zu, length=%zu, flags=%i)", + fd, fd_offset, length, shared_flags); + } else { + LOG_PERR( + "mapping file with the MAP_SYNC flag failed (fd=%i, offset=%zu, " + "length=%zu, flags=%i)", + fd, fd_offset, length, sync_flags); + } + + return NULL; +} + +int utils_get_file_size(int fd, size_t *size) { + struct stat statbuf; + int ret = fstat(fd, &statbuf); + if (ret) { + LOG_PERR("fstat(%i) failed", fd); + return ret; + } + + *size = statbuf.st_size; + return 0; +} + +int utils_set_file_size(int fd, size_t size) { + errno = 0; + int ret = ftruncate(fd, size); + if (ret) { + LOG_PERR("setting size %zu of a file failed", size); + } else { + LOG_DEBUG("set size of a file to %zu bytes", size); + } + + return ret; +} + +int utils_fallocate(int fd, long offset, long len) { + return posix_fallocate(fd, offset, len); +} + +// create a shared memory file +int utils_shm_create(const char *shm_name, size_t size) { + if (shm_name == NULL) { + LOG_ERR("empty name of a shared memory file"); + return -1; + } + + (void)shm_unlink(shm_name); + + int fd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, 0600); + if (fd == -1) { + LOG_PERR("cannot create a shared memory file /dev/shm/%s", shm_name); + return fd; + } + + int ret = utils_set_file_size(fd, size); + if (ret) { + LOG_ERR("setting size (%zu) of a file /dev/shm/%s failed", size, + shm_name); + close(fd); + (void)shm_unlink(shm_name); + return -1; + } + + return fd; +} + +// open a shared memory file +int utils_shm_open(const char *shm_name) { + if (shm_name == NULL) { + LOG_ERR("empty name of a shared memory file"); + return -1; + } + + int fd = shm_open(shm_name, O_RDWR, 0600); + if (fd == -1) { + LOG_PERR("cannot open a shared memory file /dev/shm/%s", shm_name); + } + + return fd; +} + +// unlink a shared memory file +int utils_shm_unlink(const char *shm_name) { return shm_unlink(shm_name); } + +static int syscall_memfd_secret(void) { + int fd = -1; +#ifdef __NR_memfd_secret + // SYS_memfd_secret is supported since Linux 5.14 + // not using SYS_memfd_secret as SLES does not define it + fd = syscall(__NR_memfd_secret, 0); + if (fd == -1) { + LOG_PERR("memfd_secret() failed"); + } + if (fd > 0) { + LOG_DEBUG("anonymous file descriptor created using memfd_secret()"); + } +#endif /* __NR_memfd_secret */ + return fd; +} + +static int syscall_memfd_create(void) { + int fd = -1; +#ifdef __NR_memfd_create + // SYS_memfd_create is supported since Linux 3.17, glibc 2.27 + // not using SYS_memfd_create for consistency with syscall_memfd_secret + fd = syscall(__NR_memfd_create, "anon_fd_name", 0); + if (fd == -1) { + LOG_PERR("memfd_create() failed"); + } + if (fd > 0) { + LOG_DEBUG("anonymous file descriptor created using memfd_create()"); + } +#endif /* __NR_memfd_create */ + return fd; +} + +// create an anonymous file descriptor +int utils_create_anonymous_fd(void) { + int fd = -1; + + if (!utils_env_var_has_str("UMF_MEM_FD_FUNC", "memfd_create")) { + fd = syscall_memfd_secret(); + if (fd > 0) { + return fd; + } + } + + // The SYS_memfd_secret syscall can fail with errno == ENOTSYS (function not implemented). + // We should try to call the SYS_memfd_create syscall in this case. + + fd = syscall_memfd_create(); + +#if !(defined __NR_memfd_secret) && !(defined __NR_memfd_create) + if (fd == -1) { + LOG_ERR("cannot create an anonymous file descriptor - neither " + "memfd_secret() nor memfd_create() are defined"); + } +#endif /* !(defined __NR_memfd_secret) && !(defined __NR_memfd_create) */ + + return fd; +} diff --git a/src/utils/utils_load_library.c b/src/utils/utils_load_library.c index cc5fe29fc..ef0da450b 100644 --- a/src/utils/utils_load_library.c +++ b/src/utils/utils_load_library.c @@ -16,59 +16,80 @@ #include // clang-format on -#else +#else // _WIN32 #define _GNU_SOURCE 1 #include // forces linking with libdl on Linux -#endif +#endif // !_WIN32 + +#include #include "utils_load_library.h" +#include "utils_log.h" #ifdef _WIN32 -void *util_open_library(const char *filename, int userFlags) { +void *utils_open_library(const char *filename, int userFlags) { (void)userFlags; //unused for win return LoadLibrary(TEXT(filename)); } -int util_close_library(void *handle) { +int utils_close_library(void *handle) { // If the FreeLibrary function succeeds, the return value is nonzero. // If the FreeLibrary function fails, the return value is zero. return (FreeLibrary((HMODULE)handle) == 0); } -void *util_get_symbol_addr(void *handle, const char *symbol, - const char *libname) { +void *utils_get_symbol_addr(void *handle, const char *symbol, + const char *libname) { if (!handle) { if (libname == NULL) { return NULL; } handle = GetModuleHandle(libname); } - return (void *)GetProcAddress((HMODULE)handle, symbol); + + void *addr = (void *)GetProcAddress((HMODULE)handle, symbol); + if (addr == NULL) { + LOG_ERR("Required symbol not found: %s", symbol); + } + + return addr; } #else /* Linux */ -void *util_open_library(const char *filename, int userFlags) { +void *utils_open_library(const char *filename, int userFlags) { int dlopenFlags = RTLD_LAZY; if (userFlags & UMF_UTIL_OPEN_LIBRARY_GLOBAL) { dlopenFlags |= RTLD_GLOBAL; } - return dlopen(filename, dlopenFlags); + + void *handle = dlopen(filename, dlopenFlags); + if (handle == NULL) { + LOG_FATAL("dlopen(%s) failed with error: %s", filename, dlerror()); + } + + return handle; } -int util_close_library(void *handle) { return dlclose(handle); } +int utils_close_library(void *handle) { return dlclose(handle); } -void *util_get_symbol_addr(void *handle, const char *symbol, - const char *libname) { +void *utils_get_symbol_addr(void *handle, const char *symbol, + const char *libname) { (void)libname; //unused if (!handle) { handle = RTLD_DEFAULT; } - return dlsym(handle, symbol); + + void *addr = dlsym(handle, symbol); + if (addr == NULL) { + LOG_ERR("required symbol not found: %s (error: %s)", symbol, dlerror()); + } + + return addr; } #endif diff --git a/src/utils/utils_load_library.h b/src/utils/utils_load_library.h index c066b548f..3206183f5 100644 --- a/src/utils/utils_load_library.h +++ b/src/utils/utils_load_library.h @@ -19,10 +19,10 @@ extern "C" { #endif #define UMF_UTIL_OPEN_LIBRARY_GLOBAL 1 -void *util_open_library(const char *filename, int userFlags); -int util_close_library(void *handle); -void *util_get_symbol_addr(void *handle, const char *symbol, - const char *libname); +void *utils_open_library(const char *filename, int userFlags); +int utils_close_library(void *handle); +void *utils_get_symbol_addr(void *handle, const char *symbol, + const char *libname); #ifdef __cplusplus } diff --git a/src/utils/utils_log.c b/src/utils/utils_log.c index ca16014f0..bdb9ce823 100644 --- a/src/utils/utils_log.c +++ b/src/utils/utils_log.c @@ -40,14 +40,14 @@ typedef struct { int timestamp; int pid; - util_log_level_t level; - util_log_level_t flushLevel; + utils_log_level_t level; + utils_log_level_t flushLevel; FILE *output; -} util_log_config_t; +} utils_log_config_t; -util_log_config_t loggerConfig = {0, 0, LOG_ERROR, LOG_ERROR, NULL}; +utils_log_config_t loggerConfig = {0, 0, LOG_ERROR, LOG_ERROR, NULL}; -static const char *level_to_str(util_log_level_t l) { +static const char *level_to_str(utils_log_level_t l) { switch (l) { case LOG_DEBUG: return "DEBUG"; @@ -73,9 +73,9 @@ static const char *level_to_str(util_log_level_t l) { #pragma warning(disable : 6262) #endif // _MSC_VER -static void util_log_internal(util_log_level_t level, int perror, - const char *func, const char *format, - va_list args) { +static void utils_log_internal(utils_log_level_t level, int perror, + const char *func, const char *format, + va_list args) { if (!loggerConfig.output && level != LOG_FATAL) { return; //logger not enabled } @@ -203,25 +203,25 @@ static void util_log_internal(util_log_level_t level, int perror, #pragma warning(pop) #endif // _MSC_VER -void util_log(util_log_level_t level, const char *func, const char *format, - ...) { +void utils_log(utils_log_level_t level, const char *func, const char *format, + ...) { va_list args; va_start(args, format); - util_log_internal(level, 0, func, format, args); + utils_log_internal(level, 0, func, format, args); va_end(args); } -void util_plog(util_log_level_t level, const char *func, const char *format, - ...) { +void utils_plog(utils_log_level_t level, const char *func, const char *format, + ...) { va_list args; va_start(args, format); - util_log_internal(level, 1, func, format, args); + utils_log_internal(level, 1, func, format, args); va_end(args); } static const char *bool_to_str(int b) { return b ? "yes" : "no"; } -void util_log_init(void) { +void utils_log_init(void) { const char *envVar = getenv("UMF_LOG"); if (!envVar) { @@ -229,11 +229,11 @@ void util_log_init(void) { } const char *arg; - if (util_parse_var(envVar, "output:stdout", NULL)) { + if (utils_parse_var(envVar, "output:stdout", NULL)) { loggerConfig.output = stdout; - } else if (util_parse_var(envVar, "output:stderr", NULL)) { + } else if (utils_parse_var(envVar, "output:stderr", NULL)) { loggerConfig.output = stderr; - } else if (util_parse_var(envVar, "output:file", &arg)) { + } else if (utils_parse_var(envVar, "output:file", &arg)) { loggerConfig.output = NULL; const char *argEnd = strstr(arg, ";"); char file[MAX_FILE_PATH + 1]; @@ -269,39 +269,39 @@ void util_log_init(void) { return; } - if (util_parse_var(envVar, "timestamp:yes", NULL)) { + if (utils_parse_var(envVar, "timestamp:yes", NULL)) { loggerConfig.timestamp = 1; - } else if (util_parse_var(envVar, "timestamp:no", NULL)) { + } else if (utils_parse_var(envVar, "timestamp:no", NULL)) { loggerConfig.timestamp = 0; } - if (util_parse_var(envVar, "pid:yes", NULL)) { + if (utils_parse_var(envVar, "pid:yes", NULL)) { loggerConfig.pid = 1; - } else if (util_parse_var(envVar, "pid:no", NULL)) { + } else if (utils_parse_var(envVar, "pid:no", NULL)) { loggerConfig.pid = 0; } - if (util_parse_var(envVar, "level:debug", NULL)) { + if (utils_parse_var(envVar, "level:debug", NULL)) { loggerConfig.level = LOG_DEBUG; - } else if (util_parse_var(envVar, "level:info", NULL)) { + } else if (utils_parse_var(envVar, "level:info", NULL)) { loggerConfig.level = LOG_INFO; - } else if (util_parse_var(envVar, "level:warning", NULL)) { + } else if (utils_parse_var(envVar, "level:warning", NULL)) { loggerConfig.level = LOG_WARNING; - } else if (util_parse_var(envVar, "level:error", NULL)) { + } else if (utils_parse_var(envVar, "level:error", NULL)) { loggerConfig.level = LOG_ERROR; - } else if (util_parse_var(envVar, "level:fatal", NULL)) { + } else if (utils_parse_var(envVar, "level:fatal", NULL)) { loggerConfig.level = LOG_FATAL; } - if (util_parse_var(envVar, "flush:debug", NULL)) { + if (utils_parse_var(envVar, "flush:debug", NULL)) { loggerConfig.flushLevel = LOG_DEBUG; - } else if (util_parse_var(envVar, "flush:info", NULL)) { + } else if (utils_parse_var(envVar, "flush:info", NULL)) { loggerConfig.flushLevel = LOG_INFO; - } else if (util_parse_var(envVar, "flush:warning", NULL)) { + } else if (utils_parse_var(envVar, "flush:warning", NULL)) { loggerConfig.flushLevel = LOG_WARNING; - } else if (util_parse_var(envVar, "flush:error", NULL)) { + } else if (utils_parse_var(envVar, "flush:error", NULL)) { loggerConfig.flushLevel = LOG_ERROR; - } else if (util_parse_var(envVar, "flush:fatal", NULL)) { + } else if (utils_parse_var(envVar, "flush:fatal", NULL)) { loggerConfig.flushLevel = LOG_FATAL; } diff --git a/src/utils/utils_log.h b/src/utils/utils_log.h index 4e9e98690..ab40121ce 100644 --- a/src/utils/utils_log.h +++ b/src/utils/utils_log.h @@ -20,31 +20,31 @@ typedef enum { LOG_WARNING, LOG_ERROR, LOG_FATAL -} util_log_level_t; +} utils_log_level_t; -#define LOG_DEBUG(...) util_log(LOG_DEBUG, __func__, __VA_ARGS__); -#define LOG_INFO(...) util_log(LOG_INFO, __func__, __VA_ARGS__); -#define LOG_WARN(...) util_log(LOG_WARNING, __func__, __VA_ARGS__); -#define LOG_ERR(...) util_log(LOG_ERROR, __func__, __VA_ARGS__); -#define LOG_FATAL(...) util_log(LOG_FATAL, __func__, __VA_ARGS__); +#define LOG_DEBUG(...) utils_log(LOG_DEBUG, __func__, __VA_ARGS__); +#define LOG_INFO(...) utils_log(LOG_INFO, __func__, __VA_ARGS__); +#define LOG_WARN(...) utils_log(LOG_WARNING, __func__, __VA_ARGS__); +#define LOG_ERR(...) utils_log(LOG_ERROR, __func__, __VA_ARGS__); +#define LOG_FATAL(...) utils_log(LOG_FATAL, __func__, __VA_ARGS__); -#define LOG_PDEBUG(...) util_plog(LOG_DEBUG, __func__, __VA_ARGS__); -#define LOG_PINFO(...) util_plog(LOG_INFO, __func__, __VA_ARGS__); -#define LOG_PWARN(...) util_plog(LOG_WARNING, __func__, __VA_ARGS__); -#define LOG_PERR(...) util_plog(LOG_ERROR, __func__, __VA_ARGS__); -#define LOG_PFATAL(...) util_plog(LOG_FATAL, __func__, __VA_ARGS__); +#define LOG_PDEBUG(...) utils_plog(LOG_DEBUG, __func__, __VA_ARGS__); +#define LOG_PINFO(...) utils_plog(LOG_INFO, __func__, __VA_ARGS__); +#define LOG_PWARN(...) utils_plog(LOG_WARNING, __func__, __VA_ARGS__); +#define LOG_PERR(...) utils_plog(LOG_ERROR, __func__, __VA_ARGS__); +#define LOG_PFATAL(...) utils_plog(LOG_FATAL, __func__, __VA_ARGS__); -void util_log_init(void); +void utils_log_init(void); #ifdef _WIN32 -void util_log(util_log_level_t level, const char *func, const char *format, - ...); -void util_plog(util_log_level_t level, const char *func, const char *format, +void utils_log(utils_log_level_t level, const char *func, const char *format, ...); +void utils_plog(utils_log_level_t level, const char *func, const char *format, + ...); #else -void util_log(util_log_level_t level, const char *func, const char *format, ...) - __attribute__((format(printf, 3, 4))); -void util_plog(util_log_level_t level, const char *func, const char *format, +void utils_log(utils_log_level_t level, const char *func, const char *format, ...) __attribute__((format(printf, 3, 4))); +void utils_plog(utils_log_level_t level, const char *func, const char *format, + ...) __attribute__((format(printf, 3, 4))); #endif #ifdef __cplusplus diff --git a/src/provider/provider_os_memory_macosx.c b/src/utils/utils_macosx_common.c similarity index 51% rename from src/provider/provider_os_memory_macosx.c rename to src/utils/utils_macosx_common.c index 0075ece64..ad1de12fd 100644 --- a/src/provider/provider_os_memory_macosx.c +++ b/src/utils/utils_macosx_common.c @@ -1,19 +1,23 @@ /* + * * Copyright (C) 2023-2024 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ + * + */ #include -#include +#include +#include -#include "provider_os_memory_internal.h" +#include "utils_common.h" #include "utils_log.h" -umf_result_t os_translate_mem_visibility_flag(umf_memory_visibility_t in_flag, - unsigned *out_flag) { +umf_result_t +utils_translate_mem_visibility_flag(umf_memory_visibility_t in_flag, + unsigned *out_flag) { switch (in_flag) { case UMF_MEM_MAP_PRIVATE: *out_flag = MAP_PRIVATE; @@ -24,38 +28,58 @@ umf_result_t os_translate_mem_visibility_flag(umf_memory_visibility_t in_flag, return UMF_RESULT_ERROR_INVALID_ARGUMENT; } +void *utils_mmap_file(void *hint_addr, size_t length, int prot, int flags, + int fd, size_t fd_offset, bool *map_sync) { + (void)hint_addr; // unused + (void)length; // unused + (void)prot; // unused + (void)flags; // unused + (void)fd; // unused + (void)fd_offset; // unused + (void)map_sync; // unused + return NULL; // not supported +} + +int utils_get_file_size(int fd, size_t *size) { + (void)fd; // unused + (void)size; // unused + return -1; // not supported on MacOSX +} + +int utils_set_file_size(int fd, size_t size) { + (void)fd; // unused + (void)size; // unused + return 0; // ignored on MacOSX +} + +int utils_fallocate(int fd, long offset, long len) { + (void)fd; // unused + (void)offset; // unused + (void)len; // unused + + return -1; +} + // create a shared memory file -int os_shm_create(const char *shm_name, size_t size) { +int utils_shm_create(const char *shm_name, size_t size) { (void)shm_name; // unused (void)size; // unused return 0; // ignored on MacOSX } // open a shared memory file -int os_shm_open(const char *shm_name) { +int utils_shm_open(const char *shm_name) { (void)shm_name; // unused return 0; // ignored on MacOSX } // unlink a shared memory file -int os_shm_unlink(const char *shm_name) { +int utils_shm_unlink(const char *shm_name) { (void)shm_name; // unused return 0; // ignored on MacOSX } // create an anonymous file descriptor -int os_create_anonymous_fd(void) { +int utils_create_anonymous_fd(void) { return 0; // ignored on MacOSX } - -int os_get_file_size(int fd, size_t *size) { - (void)fd; // unused - (void)size; // unused - return -1; // not supported on MacOSX -} - -int os_set_file_size(int fd, size_t size) { - (void)fd; // unused - (void)size; // unused - return 0; // ignored on MacOSX -} diff --git a/src/utils/utils_posix_common.c b/src/utils/utils_posix_common.c index 51049e613..4a60cbb1f 100644 --- a/src/utils/utils_posix_common.c +++ b/src/utils/utils_posix_common.c @@ -7,15 +7,22 @@ * */ +#include #include +#include +#include #include #include +#include +#include #include +#include #include #include "utils_common.h" #include "utils_concurrency.h" #include "utils_log.h" +#include "utils_sanitizers.h" #ifndef __NR_pidfd_open #define __NR_pidfd_open 434 /* Syscall id */ @@ -24,13 +31,19 @@ #define __NR_pidfd_getfd 438 /* Syscall id */ #endif +// maximum value of the off_t type +#define OFF_T_MAX \ + (sizeof(off_t) == sizeof(long long) \ + ? LLONG_MAX \ + : (sizeof(off_t) == sizeof(long) ? LONG_MAX : INT_MAX)) + static UTIL_ONCE_FLAG Page_size_is_initialized = UTIL_ONCE_FLAG_INIT; static size_t Page_size; -static void _util_get_page_size(void) { Page_size = sysconf(_SC_PAGE_SIZE); } +static void _utils_get_page_size(void) { Page_size = sysconf(_SC_PAGE_SIZE); } -size_t util_get_page_size(void) { - util_init_once(&Page_size_is_initialized, _util_get_page_size); +size_t utils_get_page_size(void) { + utils_init_once(&Page_size_is_initialized, _utils_get_page_size); return Page_size; } @@ -51,8 +64,7 @@ int utils_gettid(void) { int utils_close_fd(int fd) { return close(fd); } -#ifndef __APPLE__ -static umf_result_t errno_to_umf_result(int err) { +umf_result_t utils_errno_to_umf_result(int err) { switch (err) { case EBADF: case EINVAL: @@ -70,7 +82,6 @@ static umf_result_t errno_to_umf_result(int err) { return UMF_RESULT_ERROR_UNKNOWN; } } -#endif umf_result_t utils_duplicate_fd(int pid, int fd_in, int *fd_out) { #ifdef __APPLE__ @@ -89,14 +100,14 @@ umf_result_t utils_duplicate_fd(int pid, int fd_in, int *fd_out) { int pid_fd = syscall(__NR_pidfd_open, pid, 0); if (pid_fd == -1) { LOG_PERR("__NR_pidfd_open"); - return errno_to_umf_result(errno); + return utils_errno_to_umf_result(errno); } int fd_dup = syscall(__NR_pidfd_getfd, pid_fd, fd_in, 0); close(pid_fd); if (fd_dup == -1) { LOG_PERR("__NR_pidfd_open"); - return errno_to_umf_result(errno); + return utils_errno_to_umf_result(errno); } *fd_out = fd_dup; @@ -104,3 +115,178 @@ umf_result_t utils_duplicate_fd(int pid, int fd_in, int *fd_out) { return UMF_RESULT_SUCCESS; #endif } + +umf_result_t utils_translate_mem_protection_one_flag(unsigned in_protection, + unsigned *out_protection) { + switch (in_protection) { + case UMF_PROTECTION_NONE: + *out_protection = PROT_NONE; + return UMF_RESULT_SUCCESS; + case UMF_PROTECTION_READ: + *out_protection = PROT_READ; + return UMF_RESULT_SUCCESS; + case UMF_PROTECTION_WRITE: + *out_protection = PROT_WRITE; + return UMF_RESULT_SUCCESS; + case UMF_PROTECTION_EXEC: + *out_protection = PROT_EXEC; + return UMF_RESULT_SUCCESS; + } + return UMF_RESULT_ERROR_INVALID_ARGUMENT; +} + +size_t get_max_file_size(void) { return OFF_T_MAX; } + +umf_result_t utils_translate_mem_protection_flags(unsigned in_protection, + unsigned *out_protection) { + // translate protection - combination of 'umf_mem_protection_flags_t' flags + return utils_translate_flags(in_protection, UMF_PROTECTION_MAX, + utils_translate_mem_protection_one_flag, + out_protection); +} + +int utils_translate_purge_advise(umf_purge_advise_t advise) { + switch (advise) { + case UMF_PURGE_LAZY: + return MADV_FREE; + case UMF_PURGE_FORCE: + return MADV_DONTNEED; + default: + return -1; + } +} + +void *utils_mmap(void *hint_addr, size_t length, int prot, int flag, int fd, + size_t fd_offset) { + fd = (fd == 0) ? -1 : fd; + if (fd == -1) { + // MAP_ANONYMOUS - the mapping is not backed by any file + flag |= MAP_ANONYMOUS; + } + + void *ptr = mmap(hint_addr, length, prot, flag, fd, fd_offset); + if (ptr == MAP_FAILED) { + return NULL; + } + // this should be unnecessary but pairs of mmap/munmap do not reset + // asan's user-poisoning flags, leading to invalid error reports + // Bug 81619: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81619 + utils_annotate_memory_defined(ptr, length); + return ptr; +} + +int utils_munmap(void *addr, size_t length) { + // this should be unnecessary but pairs of mmap/munmap do not reset + // asan's user-poisoning flags, leading to invalid error reports + // Bug 81619: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81619 + utils_annotate_memory_defined(addr, length); + return munmap(addr, length); +} + +int utils_purge(void *addr, size_t length, int advice) { + return madvise(addr, length, utils_translate_purge_advise(advice)); +} + +void utils_strerror(int errnum, char *buf, size_t buflen) { +// 'strerror_r' implementation is XSI-compliant (returns 0 on success) +#if (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE + if (strerror_r(errnum, buf, buflen)) { +#else // 'strerror_r' implementation is GNU-specific (returns pointer on success) + if (!strerror_r(errnum, buf, buflen)) { +#endif + LOG_PERR("Retrieving error code description failed"); + } +} + +// open a devdax +int utils_devdax_open(const char *path) { + if (path == NULL) { + LOG_ERR("empty path"); + return -1; + } + + if (strstr(path, "/dev/dax") != path) { + LOG_ERR("path of the file \"%s\" does not start with \"/dev/dax\"", + path); + return -1; + } + + int fd = open(path, O_RDWR); + if (fd == -1) { + LOG_PERR("cannot open the file: %s", path); + return -1; + } + + struct stat statbuf; + int ret = stat(path, &statbuf); + if (ret) { + LOG_PERR("stat(%s) failed", path); + close(fd); + return -1; + } + + if (!S_ISCHR(statbuf.st_mode)) { + LOG_ERR("file %s is not a character device", path); + close(fd); + return -1; + } + + return fd; +} + +// open a file +int utils_file_open(const char *path) { + if (!path) { + LOG_ERR("empty path"); + return -1; + } + + int fd = open(path, O_RDWR); + if (fd == -1) { + LOG_PERR("cannot open the file: %s", path); + } + + return fd; +} + +// open a file or create +int utils_file_open_or_create(const char *path) { + if (!path) { + LOG_ERR("empty path"); + return -1; + } + + int fd = open(path, O_RDWR | O_CREAT, 0600); + if (fd == -1) { + LOG_PERR("cannot open/create the file: %s", path); + return -1; + } + + LOG_DEBUG("opened/created the file: %s", path); + + return fd; +} + +// Expected input: +// char *str_threshold = utils_env_var_get_str("UMF_PROXY", "size.threshold="); +long utils_get_size_threshold(char *str_threshold) { + if (!str_threshold) { + return 0; + } + + // move to the beginning of the number + str_threshold += strlen("size.threshold="); + + // check if the first character is a digit + if (!isdigit(str_threshold[0])) { + LOG_ERR("incorrect size threshold, expected numerical value >=0: %s", + str_threshold); + return -1; + } + + size_t threshold = atol(str_threshold); + LOG_DEBUG("Size_threshold_value = (char *) %s, (size_t) %zu", str_threshold, + threshold); + + return threshold; +} diff --git a/src/utils/utils_posix_concurrency.c b/src/utils/utils_posix_concurrency.c index 5f2b2569d..fcf04ed95 100644 --- a/src/utils/utils_posix_concurrency.c +++ b/src/utils/utils_posix_concurrency.c @@ -12,28 +12,28 @@ #include "utils_concurrency.h" -size_t util_mutex_get_size(void) { return sizeof(pthread_mutex_t); } +size_t utils_mutex_get_size(void) { return sizeof(pthread_mutex_t); } -os_mutex_t *util_mutex_init(void *ptr) { +utils_mutex_t *utils_mutex_init(void *ptr) { pthread_mutex_t *mutex = (pthread_mutex_t *)ptr; int ret = pthread_mutex_init(mutex, NULL); - return ret == 0 ? ((os_mutex_t *)mutex) : NULL; + return ret == 0 ? ((utils_mutex_t *)mutex) : NULL; } -void util_mutex_destroy_not_free(os_mutex_t *m) { +void utils_mutex_destroy_not_free(utils_mutex_t *m) { pthread_mutex_t *mutex = (pthread_mutex_t *)m; int ret = pthread_mutex_destroy(mutex); (void)ret; // TODO: add logging } -int util_mutex_lock(os_mutex_t *m) { +int utils_mutex_lock(utils_mutex_t *m) { return pthread_mutex_lock((pthread_mutex_t *)m); } -int util_mutex_unlock(os_mutex_t *m) { +int utils_mutex_unlock(utils_mutex_t *m) { return pthread_mutex_unlock((pthread_mutex_t *)m); } -void util_init_once(UTIL_ONCE_FLAG *flag, void (*oneCb)(void)) { +void utils_init_once(UTIL_ONCE_FLAG *flag, void (*oneCb)(void)) { pthread_once(flag, oneCb); } diff --git a/src/utils/utils_windows_common.c b/src/utils/utils_windows_common.c index 9358891ad..b6c5b0b4e 100644 --- a/src/utils/utils_windows_common.c +++ b/src/utils/utils_windows_common.c @@ -9,25 +9,30 @@ #include +#include #include #include +#include +#include +#include #include "utils_common.h" #include "utils_concurrency.h" +#include "utils_log.h" #define BUFFER_SIZE 1024 static UTIL_ONCE_FLAG Page_size_is_initialized = UTIL_ONCE_FLAG_INIT; static size_t Page_size; -static void _util_get_page_size(void) { +static void _utils_get_page_size(void) { SYSTEM_INFO SystemInfo; GetSystemInfo(&SystemInfo); Page_size = SystemInfo.dwPageSize; } -size_t util_get_page_size(void) { - util_init_once(&Page_size_is_initialized, _util_get_page_size); +size_t utils_get_page_size(void) { + utils_init_once(&Page_size_is_initialized, _utils_get_page_size); return Page_size; } @@ -40,9 +45,190 @@ int utils_close_fd(int fd) { return -1; } +umf_result_t utils_errno_to_umf_result(int err) { + (void)err; // unused + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + umf_result_t utils_duplicate_fd(int pid, int fd_in, int *fd_out) { (void)pid; // unused (void)fd_in; // unused (void)fd_out; // unused return UMF_RESULT_ERROR_NOT_SUPPORTED; } + +umf_result_t utils_translate_mem_protection_flags(unsigned in_protection, + unsigned *out_protection) { + switch (in_protection) { + case UMF_PROTECTION_NONE: + *out_protection = PAGE_NOACCESS; + return UMF_RESULT_SUCCESS; + case UMF_PROTECTION_EXEC: + *out_protection = PAGE_EXECUTE; + return UMF_RESULT_SUCCESS; + case (UMF_PROTECTION_EXEC | UMF_PROTECTION_READ): + *out_protection = PAGE_EXECUTE_READ; + return UMF_RESULT_SUCCESS; + case (UMF_PROTECTION_EXEC | UMF_PROTECTION_READ | UMF_PROTECTION_WRITE): + *out_protection = PAGE_EXECUTE_READWRITE; + return UMF_RESULT_SUCCESS; + case (UMF_PROTECTION_EXEC | UMF_PROTECTION_WRITE): + *out_protection = PAGE_EXECUTE_WRITECOPY; + return UMF_RESULT_SUCCESS; + case UMF_PROTECTION_READ: + *out_protection = PAGE_READONLY; + return UMF_RESULT_SUCCESS; + case (UMF_PROTECTION_READ | UMF_PROTECTION_WRITE): + *out_protection = PAGE_READWRITE; + return UMF_RESULT_SUCCESS; + case UMF_PROTECTION_WRITE: + *out_protection = PAGE_WRITECOPY; + return UMF_RESULT_SUCCESS; + } + LOG_ERR( + "utils_translate_mem_protection_flags(): unsupported protection flag: " + "%u", + in_protection); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; +} + +int utils_translate_purge_advise(umf_purge_advise_t advise) { + (void)advise; // unused + return -1; +} + +umf_result_t +utils_translate_mem_visibility_flag(umf_memory_visibility_t in_flag, + unsigned *out_flag) { + switch (in_flag) { + case UMF_MEM_MAP_PRIVATE: + *out_flag = 0; // ignored on Windows + return UMF_RESULT_SUCCESS; + case UMF_MEM_MAP_SHARED: + return UMF_RESULT_ERROR_NOT_SUPPORTED; // not supported on Windows yet + } + return UMF_RESULT_ERROR_INVALID_ARGUMENT; +} + +// create a shared memory file +int utils_shm_create(const char *shm_name, size_t size) { + (void)shm_name; // unused + (void)size; // unused + return 0; // ignored on Windows +} + +// open a shared memory file +int utils_shm_open(const char *shm_name) { + (void)shm_name; // unused + return 0; // ignored on Windows +} + +// unlink a shared memory file +int utils_shm_unlink(const char *shm_name) { + (void)shm_name; // unused + return 0; // ignored on Windows +} + +int utils_create_anonymous_fd(void) { + return 0; // ignored on Windows +} + +size_t get_max_file_size(void) { return SIZE_MAX; } + +int utils_get_file_size(int fd, size_t *size) { + (void)fd; // unused + (void)size; // unused + return -1; // not supported on Windows +} + +int utils_set_file_size(int fd, size_t size) { + (void)fd; // unused + (void)size; // unused + return 0; // ignored on Windows +} + +void *utils_mmap(void *hint_addr, size_t length, int prot, int flag, int fd, + size_t fd_offset) { + (void)flag; // ignored on Windows + (void)fd; // ignored on Windows + (void)fd_offset; // ignored on Windows + return VirtualAlloc(hint_addr, length, MEM_RESERVE | MEM_COMMIT, prot); +} + +void *utils_mmap_file(void *hint_addr, size_t length, int prot, int flags, + int fd, size_t fd_offset, bool *map_sync) { + (void)hint_addr; // unused + (void)length; // unused + (void)prot; // unused + (void)flags; // unused + (void)fd; // unused + (void)fd_offset; // unused + (void)map_sync; // unused + return NULL; // not supported +} + +int utils_munmap(void *addr, size_t length) { + // If VirtualFree() succeeds, the return value is nonzero. + // If VirtualFree() fails, the return value is 0 (zero). + (void)length; // unused + return (VirtualFree(addr, 0, MEM_RELEASE) == 0); +} + +int utils_purge(void *addr, size_t length, int advice) { + // If VirtualFree() succeeds, the return value is nonzero. + // If VirtualFree() fails, the return value is 0 (zero). + (void)advice; // unused + + // temporarily disable the C6250 warning as we intentionally use the + // MEM_DECOMMIT flag only +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 6250) +#endif // _MSC_VER + + return (VirtualFree(addr, length, MEM_DECOMMIT) == 0); + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif // _MSC_VER +} + +void utils_strerror(int errnum, char *buf, size_t buflen) { + strerror_s(buf, buflen, errnum); +} + +// open a devdax +int utils_devdax_open(const char *path) { + (void)path; // unused + + return -1; +} + +// open a file +int utils_file_open(const char *path) { + (void)path; // unused + + return -1; +} + +// open a file or create +int utils_file_open_or_create(const char *path) { + (void)path; // unused + + return -1; +} + +int utils_fallocate(int fd, long offset, long len) { + (void)fd; // unused + (void)offset; // unused + (void)len; // unused + + return -1; +} + +// Expected input: +// char *str_threshold = utils_env_var_get_str("UMF_PROXY", "size.threshold="); +long utils_get_size_threshold(char *str_threshold) { + (void)str_threshold; // unused + return 0; +} diff --git a/src/utils/utils_windows_concurrency.c b/src/utils/utils_windows_concurrency.c index 50bc71c66..696f4523b 100644 --- a/src/utils/utils_windows_concurrency.c +++ b/src/utils/utils_windows_concurrency.c @@ -9,21 +9,21 @@ #include "utils_concurrency.h" -size_t util_mutex_get_size(void) { return sizeof(os_mutex_t); } +size_t utils_mutex_get_size(void) { return sizeof(utils_mutex_t); } -os_mutex_t *util_mutex_init(void *ptr) { - os_mutex_t *mutex_internal = (os_mutex_t *)ptr; +utils_mutex_t *utils_mutex_init(void *ptr) { + utils_mutex_t *mutex_internal = (utils_mutex_t *)ptr; InitializeCriticalSection(&mutex_internal->lock); - return (os_mutex_t *)mutex_internal; + return (utils_mutex_t *)mutex_internal; } -void util_mutex_destroy_not_free(os_mutex_t *mutex) { - os_mutex_t *mutex_internal = (os_mutex_t *)mutex; +void utils_mutex_destroy_not_free(utils_mutex_t *mutex) { + utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; DeleteCriticalSection(&mutex_internal->lock); } -int util_mutex_lock(os_mutex_t *mutex) { - os_mutex_t *mutex_internal = (os_mutex_t *)mutex; +int utils_mutex_lock(utils_mutex_t *mutex) { + utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; EnterCriticalSection(&mutex_internal->lock); if (mutex_internal->lock.RecursionCount > 1) { @@ -34,8 +34,8 @@ int util_mutex_lock(os_mutex_t *mutex) { return 0; } -int util_mutex_unlock(os_mutex_t *mutex) { - os_mutex_t *mutex_internal = (os_mutex_t *)mutex; +int utils_mutex_unlock(utils_mutex_t *mutex) { + utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; LeaveCriticalSection(&mutex_internal->lock); return 0; } @@ -50,6 +50,6 @@ static BOOL CALLBACK initOnceCb(PINIT_ONCE InitOnce, PVOID Parameter, return TRUE; } -void util_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)) { +void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)) { InitOnceExecuteOnce(flag, initOnceCb, (void *)onceCb, NULL); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9af694489..c8b854ba5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -9,7 +9,7 @@ include(FetchContent) FetchContent_Declare( googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.12.1) + GIT_TAG v1.15.2) # For Windows: Prevent overriding the parent project's compiler/linker settings set(gtest_force_shared_crt @@ -22,6 +22,7 @@ FetchContent_MakeAvailable(googletest) enable_testing() set(UMF_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(UMF_UTILS_DIR ${UMF_CMAKE_SOURCE_DIR}/src/utils) function(build_umf_test) # Parameters: * NAME - a name of the test * SRCS - source files * LIBS - @@ -44,6 +45,10 @@ function(build_umf_test) set(LIB_DIRS ${LIB_DIRS} ${JEMALLOC_LIBRARY_DIRS}) endif() + if(UMF_BUILD_CUDA_PROVIDER) + set(LIB_DIRS ${LIB_DIRS} ${CUDA_LIBRARY_DIRS}) + endif() + set(TEST_LIBS umf_test_common ${ARG_LIBS} @@ -56,13 +61,26 @@ function(build_umf_test) SRCS ${ARG_SRCS} LIBS ${TEST_LIBS}) + if(UMF_POOL_JEMALLOC_ENABLED) + target_compile_definitions(${TEST_TARGET_NAME} + PRIVATE UMF_POOL_JEMALLOC_ENABLED=1) + endif() + + if(UMF_POOL_SCALABLE_ENABLED) + target_compile_definitions(${TEST_TARGET_NAME} + PRIVATE UMF_POOL_SCALABLE_ENABLED=1) + endif() + if(NOT MSVC) # Suppress 'cast discards const qualifier' warnings. Parametrized GTEST # tests retrieve arguments using 'GetParam()', which applies a 'const' # qualifier often discarded in the test scenarios. target_compile_options(${TEST_TARGET_NAME} PRIVATE -Wno-cast-qual) - endif() + if(UMF_DEVELOPER_MODE) + target_compile_options(${TEST_TARGET_NAME} PRIVATE -Werror) + endif() + endif() target_link_directories(${TEST_TARGET_NAME} PRIVATE ${LIB_DIRS}) target_include_directories( @@ -114,31 +132,70 @@ endfunction() add_subdirectory(common) -add_umf_test(NAME base SRCS base.cpp) -add_umf_test(NAME memoryPool SRCS memoryPoolAPI.cpp malloc_compliance_tests.cpp) -add_umf_test(NAME memoryProvider SRCS memoryProviderAPI.cpp) - if(UMF_BUILD_SHARED_LIBRARY) # if build as shared library, utils symbols won't be visible in tests set(UMF_UTILS_FOR_TEST umf_utils) if(LINUX OR MACOSX) set(UMF_UTILS_SOURCES - ../src/utils/utils_common.c ../src/utils/utils_posix_common.c - ../src/utils/utils_posix_concurrency.c) + ${UMF_UTILS_DIR}/utils_common.c + ${UMF_UTILS_DIR}/utils_posix_common.c + ${UMF_UTILS_DIR}/utils_posix_concurrency.c) + if(LINUX) + set(UMF_UTILS_SOURCES ${UMF_UTILS_SOURCES} + ${UMF_UTILS_DIR}/utils_linux_common.c) + set(UMF_LOGGER_LIBS rt) # librt for shm_open() + elseif(MACOSX) + set(UMF_UTILS_SOURCES ${UMF_UTILS_SOURCES} + ${UMF_UTILS_DIR}/utils_macosx_common.c) + endif() elseif(WINDOWS) set(UMF_UTILS_SOURCES - ../src/utils/utils_common.c ../src/utils/utils_windows_common.c - ../src/utils/utils_windows_concurrency.c) + ${UMF_UTILS_DIR}/utils_common.c + ${UMF_UTILS_DIR}/utils_windows_common.c + ${UMF_UTILS_DIR}/utils_windows_concurrency.c) endif() endif() -add_umf_test(NAME logger SRCS utils/utils_log.cpp ${UMF_UTILS_SOURCES}) +if(UMF_POOL_JEMALLOC_ENABLED) + set(LIB_JEMALLOC_POOL jemalloc_pool) +endif() + +if(UMF_BUILD_SHARED_LIBRARY) + # if build as shared library, ba symbols won't be visible in tests + set(BA_SOURCES_FOR_TEST ${BA_SOURCES}) +endif() + +add_umf_test(NAME base SRCS base.cpp) +add_umf_test( + NAME memoryPool + SRCS memoryPoolAPI.cpp malloc_compliance_tests.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) +add_umf_test( + NAME memoryProvider + SRCS memoryProviderAPI.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) +add_umf_test( + NAME logger + SRCS utils/utils_log.cpp ${UMF_UTILS_SOURCES} + LIBS ${UMF_LOGGER_LIBS}) add_umf_test( NAME utils_common SRCS utils/utils.cpp LIBS ${UMF_UTILS_FOR_TEST}) +if(LINUX) + add_umf_test( + NAME utils_linux_common + SRCS utils/utils_linux.cpp + LIBS ${UMF_UTILS_FOR_TEST}) +endif() + +add_umf_test( + NAME provider_coarse + SRCS provider_coarse.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + if(UMF_BUILD_LIBUMF_POOL_DISJOINT) add_umf_test( NAME disjointPool @@ -148,10 +205,14 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT) NAME c_api_disjoint_pool SRCS c_api/disjoint_pool.c LIBS disjoint_pool) + add_umf_test( + NAME disjointCoarseMallocPool + SRCS disjointCoarseMallocPool.cpp + LIBS disjoint_pool) endif() if(UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LIBUMF_POOL_JEMALLOC + AND UMF_POOL_JEMALLOC_ENABLED AND UMF_POOL_SCALABLE_ENABLED AND (NOT UMF_DISABLE_HWLOC)) add_umf_test( @@ -160,7 +221,7 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT LIBS disjoint_pool jemalloc_pool ${JEMALLOC_LIBRARIES}) endif() -if(UMF_BUILD_LIBUMF_POOL_JEMALLOC AND (NOT UMF_DISABLE_HWLOC)) +if(UMF_POOL_JEMALLOC_ENABLED AND (NOT UMF_DISABLE_HWLOC)) add_umf_test( NAME jemalloc_pool SRCS pools/jemalloc_pool.cpp malloc_compliance_tests.cpp @@ -168,13 +229,15 @@ if(UMF_BUILD_LIBUMF_POOL_JEMALLOC AND (NOT UMF_DISABLE_HWLOC)) endif() if(UMF_POOL_SCALABLE_ENABLED AND (NOT UMF_DISABLE_HWLOC)) - add_umf_test(NAME scalable_pool SRCS pools/scalable_pool.cpp - malloc_compliance_tests.cpp) + add_umf_test( + NAME scalable_pool + SRCS pools/scalable_pool.cpp malloc_compliance_tests.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) endif() if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented - # only for - # Linux now + # only for Linux now if(PkgConfig_FOUND) pkg_check_modules(LIBNUMA numa) endif() @@ -184,8 +247,14 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME provider_os_memory - SRCS provider_os_memory.cpp - LIBS ${UMF_UTILS_FOR_TEST}) + SRCS provider_os_memory.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST} ${LIB_JEMALLOC_POOL}) + if(UMF_BUILD_LIBUMF_POOL_DISJOINT) + target_compile_definitions(umf_test-provider_os_memory + PRIVATE UMF_POOL_DISJOINT_ENABLED=1) + target_link_libraries(umf_test-provider_os_memory PRIVATE disjoint_pool) + endif() + add_umf_test( NAME provider_os_memory_multiple_numa_nodes SRCS provider_os_memory_multiple_numa_nodes.cpp @@ -218,9 +287,76 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented NAME mempolicy SRCS memspaces/mempolicy.cpp LIBS ${LIBNUMA_LIBRARIES}) + add_umf_test( + NAME memspace + SRCS memspaces/memspace.cpp + LIBS ${LIBNUMA_LIBRARIES}) + add_umf_test( + NAME memtarget + SRCS memspaces/memtarget.cpp + LIBS ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + add_umf_test( + NAME provider_devdax_memory + SRCS provider_devdax_memory.cpp + LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( + NAME provider_devdax_memory_ipc + SRCS provider_devdax_memory_ipc.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST} ${LIB_JEMALLOC_POOL}) + add_umf_test( + NAME provider_file_memory + SRCS provider_file_memory.cpp + LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( + NAME provider_file_memory_ipc + SRCS provider_file_memory_ipc.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST} ${LIB_JEMALLOC_POOL}) + + # This test requires Linux-only file memory provider + if(UMF_POOL_JEMALLOC_ENABLED) + add_umf_test( + NAME jemalloc_coarse_file + SRCS pools/jemalloc_coarse_file.cpp malloc_compliance_tests.cpp + LIBS jemalloc_pool) + add_umf_test( + NAME jemalloc_coarse_devdax + SRCS pools/jemalloc_coarse_devdax.cpp malloc_compliance_tests.cpp + LIBS jemalloc_pool) + endif() + + # This test requires Linux-only file memory provider + if(UMF_POOL_SCALABLE_ENABLED) + add_umf_test( + NAME scalable_coarse_file + SRCS pools/scalable_coarse_file.cpp malloc_compliance_tests.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( + NAME scalable_coarse_devdax + SRCS pools/scalable_coarse_devdax.cpp malloc_compliance_tests.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + endif() + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND UMF_BUILD_FUZZTESTS) add_subdirectory(fuzz) endif() +else() + add_umf_test( + NAME provider_file_memory_not_impl + SRCS provider_file_memory_not_impl.cpp + LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( + NAME provider_devdax_memory_not_impl + SRCS provider_devdax_memory_not_impl.cpp + LIBS ${UMF_UTILS_FOR_TEST}) +endif() + +if(UMF_DISABLE_HWLOC) + add_umf_test( + NAME provider_os_memory_not_impl + SRCS provider_os_memory_not_impl.cpp + LIBS ${UMF_UTILS_FOR_TEST}) endif() if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) @@ -229,14 +365,16 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) # dlopen) add_umf_test( NAME provider_level_zero - SRCS providers/provider_level_zero.cpp providers/level_zero_helpers.cpp + SRCS providers/provider_level_zero.cpp + ${UMF_UTILS_DIR}/utils_level_zero.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST} ze_loader) target_include_directories(umf_test-provider_level_zero PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) add_umf_test( NAME provider_level_zero_dlopen - SRCS providers/provider_level_zero.cpp providers/level_zero_helpers.cpp + SRCS providers/provider_level_zero.cpp + ${UMF_UTILS_DIR}/utils_level_zero.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) target_compile_definitions(umf_test-provider_level_zero_dlopen PUBLIC USE_DLOPEN=1) @@ -244,9 +382,50 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) endif() -if(UMF_BUILD_SHARED_LIBRARY) - # if build as shared library, ba symbols won't be visible in tests - set(BA_SOURCES_FOR_TEST ${BA_SOURCES}) +if(NOT UMF_BUILD_LEVEL_ZERO_PROVIDER) + add_umf_test( + NAME provider_level_zero_not_impl + SRCS providers/provider_level_zero_not_impl.cpp + LIBS ${UMF_UTILS_FOR_TEST}) +endif() + +if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_CUDA_PROVIDER) + if(UMF_CUDA_ENABLED) + # we have two test binaries here that use the same sources, but differ + # in the way they are linked to the CUDA (statically or at runtime using + # dlopen) + add_umf_test( + NAME provider_cuda + SRCS providers/provider_cuda.cpp providers/cuda_helpers.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST} cuda) + target_include_directories(umf_test-provider_cuda + PRIVATE ${CUDA_INCLUDE_DIRS}) + target_link_directories(umf_test-provider_cuda PRIVATE + ${CUDA_LIBRARY_DIRS}) + + add_umf_test( + NAME provider_cuda_dlopen + SRCS providers/provider_cuda.cpp providers/cuda_helpers.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + target_compile_definitions(umf_test-provider_cuda_dlopen + PUBLIC USE_DLOPEN=1) + target_include_directories(umf_test-provider_cuda_dlopen + PRIVATE ${CUDA_INCLUDE_DIRS}) + else() + message( + STATUS + "CUDA provdier tests requires CUDA libraries to be installed and added to the default library search path - skipping" + ) + endif() +endif() + +if(NOT UMF_BUILD_CUDA_PROVIDER) + add_umf_test( + NAME provider_cuda_not_impl + SRCS providers/provider_cuda_not_impl.cpp + LIBS ${UMF_UTILS_FOR_TEST}) endif() add_umf_test( @@ -265,15 +444,22 @@ add_umf_test( LIBS ${UMF_UTILS_FOR_TEST}) # tests for the proxy library -if(UMF_PROXY_LIB_ENABLED - AND UMF_BUILD_SHARED_LIBRARY - AND NOT UMF_DISABLE_HWLOC - AND NOT UMF_LINK_HWLOC_STATICALLY) +if(UMF_PROXY_LIB_ENABLED AND UMF_BUILD_SHARED_LIBRARY) add_umf_test( NAME proxy_lib_basic SRCS ${BA_SOURCES_FOR_TEST} test_proxy_lib.cpp LIBS ${UMF_UTILS_FOR_TEST} umf_proxy) + # TODO enable this test on Windows + if(LINUX) + add_umf_test( + NAME test_proxy_lib_size_threshold + SRCS ${BA_SOURCES_FOR_TEST} test_proxy_lib_size_threshold.cpp + LIBS ${UMF_UTILS_FOR_TEST} umf_proxy) + set_property(TEST umf-test_proxy_lib_size_threshold + PROPERTY ENVIRONMENT UMF_PROXY="size.threshold=64") + endif() + # the memoryPool test run with the proxy library add_umf_test( NAME proxy_lib_memoryPool @@ -284,7 +470,12 @@ if(UMF_PROXY_LIB_ENABLED PUBLIC UMF_PROXY_LIB_ENABLED=1) endif() -add_umf_test(NAME ipc SRCS ipcAPI.cpp) +add_umf_test( + NAME ipc + SRCS ipcAPI.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + +add_umf_test(NAME ipc_negative SRCS ipc_negative.cpp) function(add_umf_ipc_test) # Parameters: * TEST - a name of the test * SRC_DIR - source files directory @@ -313,13 +504,14 @@ function(add_umf_ipc_test) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) set_tests_properties(${TEST_NAME} PROPERTIES LABELS "umf") + set_tests_properties(${TEST_NAME} PROPERTIES TIMEOUT 60) if(NOT UMF_TESTS_FAIL_ON_SKIP) set_tests_properties(${TEST_NAME} PROPERTIES SKIP_RETURN_CODE 125) endif() endfunction() if(LINUX) - if(NOT UMF_DISABLE_HWLOC) + if(NOT UMF_DISABLE_HWLOC AND UMF_POOL_SCALABLE_ENABLED) build_umf_test( NAME ipc_os_prov_consumer @@ -336,8 +528,58 @@ if(LINUX) common/ipc_os_prov_common.c) add_umf_ipc_test(TEST ipc_os_prov_anon_fd) add_umf_ipc_test(TEST ipc_os_prov_shm) + + if(UMF_PROXY_LIB_ENABLED AND UMF_BUILD_SHARED_LIBRARY) + build_umf_test( + NAME + ipc_os_prov_proxy + SRCS + ipc_os_prov_proxy.c + common/ipc_common.c + LIBS + ${UMF_UTILS_FOR_TEST}) + add_umf_ipc_test(TEST ipc_os_prov_proxy) + endif() + + build_umf_test( + NAME + ipc_devdax_prov_consumer + SRCS + ipc_devdax_prov_consumer.c + common/ipc_common.c + common/ipc_os_prov_common.c) + build_umf_test( + NAME + ipc_devdax_prov_producer + SRCS + ipc_devdax_prov_producer.c + common/ipc_common.c + common/ipc_os_prov_common.c) + add_umf_ipc_test(TEST ipc_devdax_prov) + + build_umf_test( + NAME + ipc_file_prov_consumer + SRCS + ipc_file_prov_consumer.c + common/ipc_common.c + common/ipc_os_prov_common.c) + build_umf_test( + NAME + ipc_file_prov_producer + SRCS + ipc_file_prov_producer.c + common/ipc_common.c + common/ipc_os_prov_common.c) + add_umf_ipc_test(TEST ipc_file_prov) + add_umf_ipc_test(TEST ipc_file_prov_fsdax) endif() - if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) + + # TODO add IPC tests for CUDA + + if(UMF_BUILD_GPU_TESTS + AND UMF_BUILD_LEVEL_ZERO_PROVIDER + AND UMF_BUILD_LIBUMF_POOL_DISJOINT) build_umf_test( NAME ipc_level_zero_prov_consumer @@ -345,9 +587,10 @@ if(LINUX) providers/ipc_level_zero_prov_consumer.c common/ipc_common.c providers/ipc_level_zero_prov_common.c - providers/level_zero_helpers.cpp + ${UMF_UTILS_DIR}/utils_level_zero.cpp LIBS ze_loader + disjoint_pool ${UMF_UTILS_FOR_TEST}) build_umf_test( NAME @@ -356,9 +599,10 @@ if(LINUX) providers/ipc_level_zero_prov_producer.c common/ipc_common.c providers/ipc_level_zero_prov_common.c - providers/level_zero_helpers.cpp + ${UMF_UTILS_DIR}/utils_level_zero.cpp LIBS ze_loader + disjoint_pool ${UMF_UTILS_FOR_TEST}) target_include_directories(umf_test-ipc_level_zero_prov_producer PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) @@ -366,6 +610,40 @@ if(LINUX) PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) add_umf_ipc_test(TEST ipc_level_zero_prov SRC_DIR providers) endif() + + if(UMF_BUILD_GPU_TESTS + AND UMF_BUILD_CUDA_PROVIDER + AND UMF_BUILD_LIBUMF_POOL_DISJOINT) + build_umf_test( + NAME + ipc_cuda_prov_consumer + SRCS + providers/ipc_cuda_prov_consumer.c + common/ipc_common.c + providers/ipc_cuda_prov_common.c + providers/cuda_helpers.cpp + LIBS + cuda + disjoint_pool + ${UMF_UTILS_FOR_TEST}) + build_umf_test( + NAME + ipc_cuda_prov_producer + SRCS + providers/ipc_cuda_prov_producer.c + common/ipc_common.c + providers/ipc_cuda_prov_common.c + providers/cuda_helpers.cpp + LIBS + cuda + disjoint_pool + ${UMF_UTILS_FOR_TEST}) + target_include_directories(umf_test-ipc_cuda_prov_producer + PRIVATE ${CUDA_INCLUDE_DIRS}) + target_include_directories(umf_test-ipc_cuda_prov_consumer + PRIVATE ${CUDA_INCLUDE_DIRS}) + add_umf_ipc_test(TEST ipc_cuda_prov SRC_DIR providers) + endif() else() message(STATUS "IPC tests are supported on Linux only - skipping") endif() @@ -393,29 +671,55 @@ if(LINUX (UMF_USE_ASAN OR UMF_USE_UBSAN OR UMF_USE_TSAN - OR UMF_USE_MSAN)) + OR UMF_USE_MSAN + OR UMF_USE_COVERAGE)) + set(EXAMPLES "") + if(UMF_POOL_SCALABLE_ENABLED) - set(EXAMPLES ${EXAMPLES} basic) + set(EXAMPLES ${EXAMPLES} basic custom_file_provider) + else() + message( + STATUS + "The basic and custom_file_provider examples require TBB to be installed and added to the default library search path - skipping" + ) + endif() + + if(LIBNUMA_LIBRARIES) + set(EXAMPLES ${EXAMPLES} memspace_hmat memspace_numa) else() message( STATUS - "The basic example requires TBB to be installed and added to the default library search path - skipping" + "The memspace_hmat and memspace_numa examples require libnuma to be installed and added to the default library search path - skipping" ) endif() if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_LEVEL_ZERO_PROVIDER) - set(EXAMPLES ${EXAMPLES} gpu_shared_memory) + set(EXAMPLES ${EXAMPLES} level_zero_shared_memory) else() message( STATUS - "GPU shared memory example requires UMF_BUILD_GPU_EXAMPLES, " + "GPU level zero shared memory example requires UMF_BUILD_GPU_EXAMPLES, " "UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT " "to be turned ON - skipping") endif() + if(UMF_BUILD_GPU_EXAMPLES + AND UMF_BUILD_LIBUMF_POOL_DISJOINT + AND UMF_BUILD_CUDA_PROVIDER + AND UMF_CUDA_ENABLED) + set(EXAMPLES ${EXAMPLES} cuda_shared_memory) + else() + message( + STATUS + "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES, " + "UMF_BUILD_CUDA_PROVIDER, UMF_BUILD_LIBUMF_POOL_DISJOINT " + "to be turned ON and installed CUDA libraries - skipping") + endif() + + # TODO add IPC examples for CUDA if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_LEVEL_ZERO_PROVIDER) @@ -427,7 +731,7 @@ if(LINUX ) endif() - if(LINUX AND UMF_POOL_SCALABLE_ENABLED) + if(UMF_POOL_SCALABLE_ENABLED) set(EXAMPLES ${EXAMPLES} ipc_ipcapi) else() message( @@ -436,13 +740,27 @@ if(LINUX ) endif() - if(NOT UMF_DISABLE_HWLOC) + if(UMF_POOL_JEMALLOC_ENABLED) + set(EXAMPLES ${EXAMPLES} dram_and_fsdax) + else() + message( + STATUS + "The dram_and_fsdax example is supported on Linux only and requires UMF_BUILD_LIBUMF_POOL_JEMALLOC to be turned ON - skipping" + ) + endif() + + if(EXAMPLES AND NOT UMF_DISABLE_HWLOC) + set(STANDALONE_CMAKE_OPTIONS + "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + ) add_test( - NAME umf_standalone_examples + NAME umf-standalone_examples COMMAND ${UMF_CMAKE_SOURCE_DIR}/test/test_examples.sh ${UMF_CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} - ${CMAKE_INSTALL_PREFIX} ${EXAMPLES} + ${CMAKE_CURRENT_BINARY_DIR}/umf_standalone_examples/install-dir + "${CMAKE_INSTALL_PREFIX}" "${STANDALONE_CMAKE_OPTIONS}" + ${EXAMPLES} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() endif() diff --git a/test/c_api/disjoint_pool.c b/test/c_api/disjoint_pool.c index f63b28355..4d4634def 100644 --- a/test/c_api/disjoint_pool.c +++ b/test/c_api/disjoint_pool.c @@ -7,17 +7,22 @@ #include "pool_disjoint.h" #include "provider_null.h" #include "test_helpers.h" +#include "test_ut_asserts.h" void test_disjoint_pool_default_params(void) { umf_memory_provider_handle_t provider = nullProviderCreate(); umf_result_t retp; umf_memory_pool_handle_t pool = NULL; - umf_disjoint_pool_params_t params = umfDisjointPoolParamsDefault(); - retp = umfPoolCreate(umfDisjointPoolOps(), provider, ¶ms, 0, &pool); + umf_disjoint_pool_params_handle_t params = NULL; + + retp = umfDisjointPoolParamsCreate(¶ms); + UT_ASSERTeq(retp, UMF_RESULT_SUCCESS); + retp = umfPoolCreate(umfDisjointPoolOps(), provider, params, 0, &pool); UT_ASSERTeq(retp, UMF_RESULT_SUCCESS); umfPoolDestroy(pool); + umfDisjointPoolParamsDestroy(params); umfMemoryProviderDestroy(provider); } @@ -25,19 +30,25 @@ void test_disjoint_pool_shared_limits(void) { umf_memory_provider_handle_t provider = nullProviderCreate(); umf_result_t retp; umf_memory_pool_handle_t pool = NULL; - umf_disjoint_pool_params_t params = umfDisjointPoolParamsDefault(); + umf_disjoint_pool_params_handle_t params = NULL; + + retp = umfDisjointPoolParamsCreate(¶ms); + UT_ASSERTeq(retp, UMF_RESULT_SUCCESS); - umf_disjoint_pool_shared_limits_t *limits = + umf_disjoint_pool_shared_limits_handle_t limits = umfDisjointPoolSharedLimitsCreate(1024); - params.SharedLimits = limits; + UT_ASSERTne(limits, NULL); - retp = umfPoolCreate(umfDisjointPoolOps(), provider, ¶ms, 0, &pool); + retp = umfDisjointPoolParamsSetSharedLimits(params, limits); + UT_ASSERTeq(retp, UMF_RESULT_SUCCESS); + retp = umfPoolCreate(umfDisjointPoolOps(), provider, ¶ms, 0, &pool); UT_ASSERTeq(retp, UMF_RESULT_SUCCESS); umfPoolDestroy(pool); umfMemoryProviderDestroy(provider); umfDisjointPoolSharedLimitsDestroy(limits); + umfDisjointPoolParamsDestroy(params); } int main(void) { diff --git a/test/c_api/multi_pool.c b/test/c_api/multi_pool.c index 9d4ee5d4c..bbd838312 100644 --- a/test/c_api/multi_pool.c +++ b/test/c_api/multi_pool.c @@ -12,14 +12,21 @@ #include #include "test_helpers.h" +#include "test_ut_asserts.h" umf_memory_pool_handle_t createDisjointPool(umf_memory_provider_handle_t provider) { umf_memory_pool_handle_t pool = NULL; - umf_disjoint_pool_params_t params = umfDisjointPoolParamsDefault(); - umf_result_t ret = - umfPoolCreate(umfDisjointPoolOps(), provider, ¶ms, 0, &pool); + umf_disjoint_pool_params_handle_t params = NULL; + + umf_result_t ret = umfDisjointPoolParamsCreate(¶ms); UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); + + ret = umfPoolCreate(umfDisjointPoolOps(), provider, params, 0, &pool); + UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); + + umfDisjointPoolParamsDestroy(params); + return pool; } @@ -53,11 +60,15 @@ createScalablePool(umf_memory_provider_handle_t provider) { #define ALLOC_SIZE 64 int main(void) { - umf_os_memory_provider_params_t params = umfOsMemoryProviderParamsDefault(); + umf_os_memory_provider_params_handle_t params = NULL; + umf_result_t ret = umfOsMemoryProviderParamsCreate(¶ms); + UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); umf_memory_provider_handle_t hProvider; - umf_result_t ret = - umfMemoryProviderCreate(umfOsMemoryProviderOps(), ¶ms, &hProvider); + ret = umfMemoryProviderCreate(umfOsMemoryProviderOps(), params, &hProvider); + UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); + + ret = umfOsMemoryProviderParamsDestroy(params); UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); umf_memory_pool_handle_t pools[4]; diff --git a/test/c_api/test_ut_asserts.h b/test/c_api/test_ut_asserts.h new file mode 100644 index 000000000..834d39bda --- /dev/null +++ b/test/c_api/test_ut_asserts.h @@ -0,0 +1,75 @@ +/* + * + * Copyright (C) 2023-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +/* + The project uses GTEST framework for testing, which is not supported in C + These asserts should NOT be used in other purposes than for testing C API + */ + +#ifndef UMF_TEST_UT_ASSERTS_H +#define UMF_TEST_UT_ASSERTS_H 1 + +#include +#include +#include + +static inline void UT_FATAL(const char *format, ...) { + va_list args_list; + va_start(args_list, format); + vfprintf(stderr, format, args_list); + va_end(args_list); + + fprintf(stderr, "\n"); + + abort(); +} + +static inline void UT_OUT(const char *format, ...) { + va_list args_list; + va_start(args_list, format); + vfprintf(stdout, format, args_list); + va_end(args_list); + + fprintf(stdout, "\n"); +} + +// Assert a condition is true at runtime +#define UT_ASSERT(cnd) \ + ((void)((cnd) || (UT_FATAL("%s:%d %s - assertion failure: %s", __FILE__, \ + __LINE__, __func__, #cnd), \ + 0))) + +// Assertion with extra info printed if assertion fails at runtime +#define UT_ASSERTinfo(cnd, info) \ + ((void)((cnd) || \ + (UT_FATAL("%s:%d %s - assertion failure: %s (%s = %s)", __FILE__, \ + __LINE__, __func__, #cnd, #info, info), \ + 0))) + +// Assert two integer values are equal at runtime +#define UT_ASSERTeq(lhs, rhs) \ + ((void)(((lhs) == (rhs)) || \ + (UT_FATAL("%s:%d %s - assertion failure: %s (0x%llx) == %s " \ + "(0x%llx)", \ + __FILE__, __LINE__, __func__, #lhs, \ + (unsigned long long)(lhs), #rhs, \ + (unsigned long long)(rhs)), \ + 0))) + +// Assert two integer values are not equal at runtime +#define UT_ASSERTne(lhs, rhs) \ + ((void)(((lhs) != (rhs)) || \ + (UT_FATAL("%s:%d %s - assertion failure: %s (0x%llx) != %s " \ + "(0x%llx)", \ + __FILE__, __LINE__, __func__, #lhs, \ + (unsigned long long)(lhs), #rhs, \ + (unsigned long long)(rhs)), \ + 0))) + +#endif /* UMF_TEST_UT_ASSERTS_H */ diff --git a/test/common/CMakeLists.txt b/test/common/CMakeLists.txt index 4f88fd7d8..6cffe5cfe 100644 --- a/test/common/CMakeLists.txt +++ b/test/common/CMakeLists.txt @@ -9,6 +9,10 @@ set(COMMON_SOURCES provider_null.c provider_trace.c) +if(LINUX) + set(COMMON_SOURCES ${COMMON_SOURCES} test_helpers_linux.c) +endif(LINUX) + add_umf_library( NAME umf_test_common TYPE STATIC diff --git a/test/common/base.hpp b/test/common/base.hpp index 8f2d5f6be..804eff48c 100644 --- a/test/common/base.hpp +++ b/test/common/base.hpp @@ -14,6 +14,8 @@ namespace umf_test { +#define IS_SKIPPED_OR_FAILED() (HasFatalFailure() || IsSkipped()) + #define NOEXCEPT_COND(cond, val, expected_val) \ try { \ cond(val, expected_val); \ @@ -48,6 +50,9 @@ std::function withGeneratedArgs(Ret (*f)(Args...)) { }; } +const size_t KB = 1024; +const size_t MB = 1024 * KB; + } // namespace umf_test #endif /* UMF_TEST_BASE_HPP */ diff --git a/test/common/ipc_common.c b/test/common/ipc_common.c index 7e08e79f0..140927079 100644 --- a/test/common/ipc_common.c +++ b/test/common/ipc_common.c @@ -15,8 +15,7 @@ #include "ipc_common.h" #define INET_ADDR "127.0.0.1" -#define MSG_SIZE 1024 -#define RECV_BUFF_SIZE 1024 +#define MSG_SIZE 1024 * 8 // consumer's response message #define CONSUMER_MSG \ @@ -34,6 +33,10 @@ Generally communication between the producer and the consumer looks like: - Producer creates a socket - Producer connects to the consumer - Consumer connects at IP 127.0.0.1 and a port to the producer +- Producer sends the IPC handle size to the consumer +- Consumer receives the IPC handle size from the producer +- Consumer sends the confirmation (IPC handle size) to the producer +- Producer receives the confirmation (IPC handle size) from the consumer - Producer sends the IPC handle to the consumer - Consumer receives the IPC handle from the producer - Consumer opens the IPC handle received from the producer @@ -107,11 +110,10 @@ int consumer_connect(int port) { return ret; } -int run_consumer(int port, umf_memory_provider_ops_t *provider_ops, - void *provider_params, memcopy_callback_t memcopy_callback, - void *memcopy_ctx) { +int run_consumer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params, + umf_memory_provider_ops_t *provider_ops, void *provider_params, + memcopy_callback_t memcopy_callback, void *memcopy_ctx) { char consumer_message[MSG_SIZE]; - char recv_buffer[RECV_BUFF_SIZE]; int producer_socket = -1; int ret = -1; umf_memory_provider_handle_t provider = NULL; @@ -129,28 +131,58 @@ int run_consumer(int port, umf_memory_provider_ops_t *provider_ops, return -1; } - // get the size of the IPC handle - size_t IPC_handle_size; - umf_result = umfMemoryProviderGetIPCHandleSize(provider, &IPC_handle_size); + umf_memory_pool_handle_t pool; + umf_result = umfPoolCreate(pool_ops, provider, pool_params, 0, &pool); if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, - "[consumer] ERROR: getting size of the IPC handle failed\n"); + fprintf(stderr, "[consumer] ERROR: creating memory pool failed\n"); goto err_umfMemoryProviderDestroy; } + umf_ipc_handler_handle_t ipc_handler; + umf_result = umfPoolGetIPCHandler(pool, &ipc_handler); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[consumer] ERROR: get IPC handler failed\n"); + goto err_umfMemoryPoolDestroy; + } + producer_socket = consumer_connect(port); if (producer_socket < 0) { - goto err_umfMemoryProviderDestroy; + goto err_umfMemoryPoolDestroy; } - // zero the receive buffer - memset(recv_buffer, 0, RECV_BUFF_SIZE); + // allocate the zeroed receive buffer + char *recv_buffer = calloc(1, MSG_SIZE); + if (!recv_buffer) { + fprintf(stderr, "[consumer] ERROR: out of memory\n"); + goto err_close_producer_socket; + } - // receive a producer's message - ssize_t recv_len = recv(producer_socket, recv_buffer, RECV_BUFF_SIZE, 0); + // get the size of the IPC handle from the producer + size_t IPC_handle_size; + ssize_t recv_len = recv(producer_socket, recv_buffer, MSG_SIZE, 0); if (recv_len < 0) { fprintf(stderr, "[consumer] ERROR: recv() failed\n"); - goto err_close_producer_socket; + goto err_free_recv_buffer; + } + IPC_handle_size = *(size_t *)recv_buffer; + fprintf(stderr, "[consumer] Got the size of the IPC handle: %zu\n", + IPC_handle_size); + + // send confirmation to the producer (IPC handle size) + recv_len = + send(producer_socket, &IPC_handle_size, sizeof(IPC_handle_size), 0); + if (recv_len < 0) { + fprintf(stderr, "[consumer] ERROR: sending confirmation failed\n"); + goto err_free_recv_buffer; + } + fprintf(stderr, + "[consumer] Send the confirmation (IPC handle size) to producer\n"); + + // receive IPC handle from the producer + recv_len = recv(producer_socket, recv_buffer, MSG_SIZE, 0); + if (recv_len < 0) { + fprintf(stderr, "[consumer] ERROR: recv() failed\n"); + goto err_free_recv_buffer; } size_t len = (size_t)recv_len; @@ -159,7 +191,7 @@ int run_consumer(int port, umf_memory_provider_ops_t *provider_ops, "[consumer] ERROR: recv() received a wrong number of bytes " "(%zi != %zu expected)\n", len, IPC_handle_size); - goto err_close_producer_socket; + goto err_free_recv_buffer; } void *IPC_handle = recv_buffer; @@ -170,7 +202,7 @@ int run_consumer(int port, umf_memory_provider_ops_t *provider_ops, len); void *SHM_ptr; - umf_result = umfMemoryProviderOpenIPCHandle(provider, IPC_handle, &SHM_ptr); + umf_result = umfOpenIPCHandle(ipc_handler, IPC_handle, &SHM_ptr); if (umf_result == UMF_RESULT_ERROR_NOT_SUPPORTED) { fprintf(stderr, "[consumer] SKIP: opening the IPC handle is not supported\n"); @@ -183,11 +215,11 @@ int run_consumer(int port, umf_memory_provider_ops_t *provider_ops, send(producer_socket, consumer_message, strlen(consumer_message) + 1, 0); - goto err_close_producer_socket; + goto err_free_recv_buffer; } if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "[consumer] ERROR: opening the IPC handle failed\n"); - goto err_close_producer_socket; + goto err_free_recv_buffer; } fprintf(stderr, @@ -227,8 +259,7 @@ int run_consumer(int port, umf_memory_provider_ops_t *provider_ops, err_closeIPCHandle: // we do not know the exact size of the remote shared memory - umf_result = umfMemoryProviderCloseIPCHandle(provider, SHM_ptr, - sizeof(unsigned long long)); + umf_result = umfCloseIPCHandle(SHM_ptr); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "[consumer] ERROR: closing the IPC handle failed\n"); } @@ -236,9 +267,15 @@ int run_consumer(int port, umf_memory_provider_ops_t *provider_ops, fprintf(stderr, "[consumer] Closed the IPC handle received from the producer\n"); +err_free_recv_buffer: + free(recv_buffer); + err_close_producer_socket: close(producer_socket); +err_umfMemoryPoolDestroy: + umfPoolDestroy(pool); + err_umfMemoryProviderDestroy: umfMemoryProviderDestroy(provider); @@ -290,9 +327,9 @@ int producer_connect(int port) { return -1; } -int run_producer(int port, umf_memory_provider_ops_t *provider_ops, - void *provider_params, memcopy_callback_t memcopy_callback, - void *memcopy_ctx) { +int run_producer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params, + umf_memory_provider_ops_t *provider_ops, void *provider_params, + memcopy_callback_t memcopy_callback, void *memcopy_ctx) { int ret = -1; umf_memory_provider_handle_t provider = NULL; umf_result_t umf_result = UMF_RESULT_ERROR_UNKNOWN; @@ -308,12 +345,19 @@ int run_producer(int port, umf_memory_provider_ops_t *provider_ops, return -1; } + umf_memory_pool_handle_t pool; + umf_result = umfPoolCreate(pool_ops, provider, pool_params, 0, &pool); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[producer] ERROR: creating memory pool failed\n"); + goto err_umfMemoryProviderDestroy; + } + size_t page_size; umf_result = umfMemoryProviderGetMinPageSize(provider, NULL, &page_size); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "[producer] ERROR: getting the minimum page size failed\n"); - goto err_umfMemoryProviderDestroy; + goto err_umfMemoryPoolDestroy; } // Make 3 allocations of size: 1 page, 2 pages and 3 pages @@ -322,45 +366,36 @@ int run_producer(int port, umf_memory_provider_ops_t *provider_ops, size_t ptr2_size = 2 * page_size; size_t size_IPC_shared_memory = 3 * page_size; - umf_result = umfMemoryProviderAlloc(provider, ptr1_size, 0, &ptr1); - if (umf_result != UMF_RESULT_SUCCESS) { + ptr1 = umfPoolMalloc(pool, ptr1_size); + if (ptr1 == NULL) { fprintf(stderr, "[producer] ERROR: allocating 1 page failed\n"); - goto err_umfMemoryProviderDestroy; + goto err_umfMemoryPoolDestroy; } - umf_result = umfMemoryProviderAlloc(provider, ptr2_size, 0, &ptr2); - if (umf_result != UMF_RESULT_SUCCESS) { + ptr2 = umfPoolMalloc(pool, ptr2_size); + if (ptr2 == NULL) { fprintf(stderr, "[producer] ERROR: allocating 2 pages failed\n"); goto err_free_ptr1; } - umf_result = umfMemoryProviderAlloc(provider, size_IPC_shared_memory, 0, - &IPC_shared_memory); - if (umf_result != UMF_RESULT_SUCCESS) { + IPC_shared_memory = umfPoolMalloc(pool, size_IPC_shared_memory); + if (IPC_shared_memory == NULL) { fprintf(stderr, "[producer] ERROR: allocating 3 pages failed\n"); goto err_free_ptr2; } // get size of the IPC handle size_t IPC_handle_size; - umf_result = umfMemoryProviderGetIPCHandleSize(provider, &IPC_handle_size); - if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, - "[producer] ERROR: getting size of the IPC handle failed\n"); - goto err_free_IPC_shared_memory; - } + umf_ipc_handle_t IPC_handle = NULL; - // allocate data for IPC provider - void *IPC_handle = malloc(IPC_handle_size); - if (IPC_handle == NULL) { - fprintf(stderr, - "[producer] ERROR: allocating memory for IPC handle failed\n"); + // get the IPC handle + umf_result = + umfGetIPCHandle(IPC_shared_memory, &IPC_handle, &IPC_handle_size); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[producer] ERROR: getting the IPC handle failed\n"); goto err_free_IPC_shared_memory; } - // zero the IPC handle and the shared memory - memset(IPC_handle, 0, IPC_handle_size); - // save a random number (&provider) in the shared memory unsigned long long SHM_number_1 = (unsigned long long)&provider; memcopy_callback(IPC_shared_memory, &SHM_number_1, sizeof(SHM_number_1), @@ -369,16 +404,6 @@ int run_producer(int port, umf_memory_provider_ops_t *provider_ops, fprintf(stderr, "[producer] My shared memory contains a number: %llu\n", SHM_number_1); - // get the IPC handle from the OS memory provider - umf_result = umfMemoryProviderGetIPCHandle( - provider, IPC_shared_memory, size_IPC_shared_memory, IPC_handle); - if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, - "[producer] ERROR: getting the IPC handle from the OS memory " - "provider failed\n"); - goto err_free_IPC_handle; - } - fprintf(stderr, "[producer] Got the IPC handle\n"); producer_socket = producer_connect(port); @@ -386,6 +411,44 @@ int run_producer(int port, umf_memory_provider_ops_t *provider_ops, goto err_PutIPCHandle; } + // send the IPC_handle_size to the consumer + ssize_t len = + send(producer_socket, &IPC_handle_size, sizeof(IPC_handle_size), 0); + if (len < 0) { + fprintf(stderr, "[producer] ERROR: unable to send the message\n"); + goto err_close_producer_socket; + } + + fprintf(stderr, + "[producer] Sent the size of the IPC handle (%zu) to the consumer " + "(sent %zu bytes)\n", + IPC_handle_size, len); + + // zero the consumer_message buffer + memset(consumer_message, 0, sizeof(consumer_message)); + + // receive the consumer's confirmation - IPC handle size + len = recv(producer_socket, consumer_message, sizeof(consumer_message), 0); + if (len < 0) { + fprintf(stderr, "[producer] ERROR: error while receiving the " + "confirmation from the consumer\n"); + goto err_close_producer_socket; + } + + size_t conf_IPC_handle_size = *(size_t *)consumer_message; + if (conf_IPC_handle_size == IPC_handle_size) { + fprintf(stderr, + "[producer] Received the correct confirmation (%zu) from the " + "consumer (%zu bytes)\n", + conf_IPC_handle_size, len); + } else { + fprintf(stderr, + "[producer] Received an INCORRECT confirmation (%zu) from the " + "consumer (%zu bytes)\n", + conf_IPC_handle_size, len); + goto err_close_producer_socket; + } + // send the IPC_handle of IPC_handle_size to the consumer if (send(producer_socket, IPC_handle, IPC_handle_size, 0) < 0) { fprintf(stderr, "[producer] ERROR: unable to send the message\n"); @@ -443,22 +506,25 @@ int run_producer(int port, umf_memory_provider_ops_t *provider_ops, close(producer_socket); err_PutIPCHandle: - umf_result = umfMemoryProviderPutIPCHandle(provider, IPC_handle); + umf_result = umfPutIPCHandle(IPC_handle); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "[producer] ERROR: putting the IPC handle failed\n"); } fprintf(stderr, "[producer] Put the IPC handle\n"); -err_free_IPC_handle: - free(IPC_handle); err_free_IPC_shared_memory: - (void)umfMemoryProviderFree(provider, IPC_shared_memory, - size_IPC_shared_memory); + (void)umfFree(IPC_shared_memory); + err_free_ptr2: - (void)umfMemoryProviderFree(provider, ptr2, ptr2_size); + (void)umfFree(ptr2); + err_free_ptr1: - (void)umfMemoryProviderFree(provider, ptr1, ptr1_size); + (void)umfFree(ptr1); + +err_umfMemoryPoolDestroy: + umfPoolDestroy(pool); + err_umfMemoryProviderDestroy: umfMemoryProviderDestroy(provider); diff --git a/test/common/ipc_common.h b/test/common/ipc_common.h index a73b01435..89303899b 100644 --- a/test/common/ipc_common.h +++ b/test/common/ipc_common.h @@ -8,7 +8,10 @@ #ifndef UMF_TEST_IPC_COMMON_H #define UMF_TEST_IPC_COMMON_H +#include +#include #include +#include // pointer to the function that returns void and accept two int values typedef void (*memcopy_callback_t)(void *dst, const void *src, size_t size, @@ -17,11 +20,12 @@ typedef void (*memcopy_callback_t)(void *dst, const void *src, size_t size, int producer_connect(int port); int consumer_connect(int port); -int run_producer(int port, umf_memory_provider_ops_t *provider_ops, - void *provider_params, memcopy_callback_t memcopy_callback, - void *memcopy_ctx); -int run_consumer(int port, umf_memory_provider_ops_t *provider_ops, - void *provider_params, memcopy_callback_t memcopy_callback, - void *memcopy_ctx); +int run_producer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params, + umf_memory_provider_ops_t *provider_ops, void *provider_params, + memcopy_callback_t memcopy_callback, void *memcopy_ctx); + +int run_consumer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params, + umf_memory_provider_ops_t *provider_ops, void *provider_params, + memcopy_callback_t memcopy_callback, void *memcopy_ctx); #endif // UMF_TEST_IPC_COMMON_H diff --git a/test/common/numa_helpers.h b/test/common/numa_helpers.h deleted file mode 100644 index aa9888fea..000000000 --- a/test/common/numa_helpers.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef UMF_TEST_NUMA_HELPERS_H -#define UMF_TEST_NUMA_HELPERS_H 1 - -#include -#include -#include -#include - -#include "test_helpers.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// returns the node where page starting at 'ptr' resides -int getNumaNodeByPtr(void *ptr) { - int nodeId; - int retm = - get_mempolicy(&nodeId, nullptr, 0, ptr, MPOL_F_ADDR | MPOL_F_NODE); - UT_ASSERTeq(retm, 0); - UT_ASSERT(nodeId >= 0); - - return nodeId; -} - -#ifdef __cplusplus -} -#endif - -#endif /* UMF_TEST_NUMA_HELPERS_H */ diff --git a/test/common/numa_helpers.hpp b/test/common/numa_helpers.hpp new file mode 100644 index 000000000..adcce9102 --- /dev/null +++ b/test/common/numa_helpers.hpp @@ -0,0 +1,205 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef UMF_TEST_NUMA_HELPERS_HPP +#define UMF_TEST_NUMA_HELPERS_HPP 1 + +#include +#include +#include +#include +#include + +#include "test_helpers.h" + +// returns the node where page starting at 'ptr' resides +static inline void getNumaNodeByPtr(void *ptr, int *node) { + int ret = get_mempolicy(node, nullptr, 0, ptr, MPOL_F_ADDR | MPOL_F_NODE); + + ASSERT_EQ(ret, 0) << "get_mempolicy failed"; + ASSERT_GE(*node, 0) + << "get_mempolicy returned nodeId < 0 - should never happen"; +} + +// returns the mode in which page starting at 'ptr' is bound +static inline void getBindModeByPtr(void *ptr, int *mode) { + int ret = get_mempolicy(mode, nullptr, 0, ptr, MPOL_F_ADDR); + + ASSERT_EQ(ret, 0) << "get_mempolicy failed"; +} + +// returns the mask in which page starting at 'ptr' is bound +static inline void getBindMaskByPtr(void *ptr, struct bitmask *mask) { + int ret = get_mempolicy(nullptr, mask->maskp, mask->size, ptr, MPOL_F_ADDR); + + ASSERT_EQ(ret, 0) << "get_mempolicy failed"; +} + +// Internal do not use directly +#define _CUSTOMIZABLE_ASSERT(expr, fatal, eq) \ + do { \ + if (fatal) { \ + if (eq) { \ + ASSERT_TRUE(expr); \ + } else { \ + ASSERT_FALSE(expr); \ + } \ + } else { \ + if (eq) { \ + EXPECT_TRUE(expr); \ + } else { \ + EXPECT_FALSE(expr); \ + } \ + } \ + } while (0) + +// Internal do not use directly +static inline void _assertNode(void *ptr, int expected_node, bool fatal, + bool eq) { + int node; + + getNumaNodeByPtr(ptr, &node); + if (testing::Test::HasFatalFailure()) { + return; + } + + _CUSTOMIZABLE_ASSERT(node == expected_node, fatal, eq); +} + +// Internal do not use directly +static inline void _assertNode(void *ptr, void *ptr2, bool fatal, bool eq) { + int node, node2; + + getNumaNodeByPtr(ptr, &node); + getNumaNodeByPtr(ptr2, &node2); + if (testing::Test::HasFatalFailure()) { + return; + } + + _CUSTOMIZABLE_ASSERT(node == node2, fatal, eq); +} + +// Internal do not use directly +static inline void _assertBindMode(void *ptr, int expected_mode, bool fatal, + bool eq) { + int mode; + + getBindModeByPtr(ptr, &mode); + if (testing::Test::HasFatalFailure()) { + return; + } + + _CUSTOMIZABLE_ASSERT(mode == expected_mode, fatal, eq); +} + +static inline void _assertBindMode(void *ptr, void *ptr2, bool fatal, bool eq) { + int mode, mode2; + + getBindModeByPtr(ptr, &mode); + getBindModeByPtr(ptr2, &mode2); + if (testing::Test::HasFatalFailure()) { + return; + } + + _CUSTOMIZABLE_ASSERT(mode == mode2, fatal, eq); +} + +// Internal do not use directly +static inline void _assertBindMask(void *ptr, struct bitmask *expected_mask, + bool fatal, bool eq) { + struct bitmask *mask = numa_allocate_nodemask(); + ASSERT_NE(mask, nullptr) << "numa_allocate_nodemask failed"; + + getBindMaskByPtr(ptr, mask); + if (testing::Test::HasFatalFailure()) { + return; + } + + _CUSTOMIZABLE_ASSERT(numa_bitmask_equal(mask, expected_mask), fatal, eq); + + numa_free_nodemask(mask); +} + +// Internal do not use directly +static inline void _assertBindMask(void *ptr, void *ptr2, bool fatal, bool eq) { + struct bitmask *mask = numa_allocate_nodemask(); + ASSERT_NE(mask, nullptr) << "numa_allocate_nodemask failed"; + + getBindMaskByPtr(ptr, mask); + + struct bitmask *mask2 = numa_allocate_nodemask(); + ASSERT_NE(mask2, nullptr) << "numa_allocate_nodemask failed"; + + getBindMaskByPtr(ptr2, mask2); + + if (testing::Test::HasFatalFailure()) { + return; + } + + _CUSTOMIZABLE_ASSERT(numa_bitmask_equal(mask, mask2), fatal, eq); + + numa_free_nodemask(mask); + numa_free_nodemask(mask2); +} + +// Asserts that a memory page starting at 'ptr' is on the expected NUMA node, +// The target can be either a specific node or another pointer, in which case we compare nodes of both ptr. +#define ASSERT_NODE_EQ(ptr, target) \ + ASSERT_NO_FATAL_FAILURE(_assertNode(ptr, target, true, true)) + +// Asserts that a memory page starting at 'ptr' is not on the expected NUMA node, +// The target can be either a specific node or another pointer, in which case we compare nodes of both ptr. +#define ASSERT_NODE_NE(ptr, target) \ + ASSERT_NO_FATAL_FAILURE(_assertNode(ptr, target, true, false)) + +// Expects that a memory page starting at 'ptr' is on the expected NUMA node, +// Target can be either a node id or another pointer, in which case we compare nodes of both ptr. +#define EXPECT_NODE_EQ(ptr, target) \ + ASSERT_NO_FATAL_FAILURE(_assertNode(ptr, target, false, true)) + +// Expects that a memory page starting at 'ptr' is not on the expected NUMA node, +// Target can be either a node id or another pointer, in which case we compare nodes of both ptr. +#define EXPECT_NODE_NE(ptr, target) \ + ASSERT_NO_FATAL_FAILURE(_assertNode(ptr, target, false, false)) + +// Asserts that a memory page starting at 'ptr' is bound in the expected memory binding mode. +// The target can be either a specific mode or another pointer, in which case we compare the modes of both ptr. +#define ASSERT_BIND_MODE_EQ(ptr, target) \ + ASSERT_NO_FATAL_FAILURE(_assertBindMode(ptr, target, true, true)) +// Asserts that a memory page starting at 'ptr' is not bound in the expected memory binding mode. +// The target can be either a specific mode or another pointer, in which case we compare the modes of both ptr. +#define ASSERT_BIND_MODE_NE(ptr, target) \ + ASSERT_NO_FATAL_FAILURE(_assertBindMode(ptr, target, true, false)) + +// Expects that a memory page starting at 'ptr' is bound in the expected memory binding mode. +// The target can be either a specific mode or another pointer, in which case we compare the modes of both ptr. +#define EXPECT_BIND_MODE_EQ(ptr, target) \ + ASSERT_NO_FATAL_FAILURE(_assertBindMode(ptr, target, false, true)) + +// Expects that a memory page starting at 'ptr' is not bound in the expected memory binding mode. +// The target can be either a specific mode or another pointer, in which case we compare the modes of both ptr. +#define EXPECT_BIND_MODE_NE(ptr, target) \ + ASSERT_NO_FATAL_FAILURE(_assertBindMode(ptr, target, false, false)) + +// Asserts that the memory binding mask for the page starting at 'ptr' matches the expected mask. +// The target can be either a bitmask or another pointer, in which case we compare the masks of both ptr. +#define ASSERT_BIND_MASK_EQ(ptr, target) \ + ASSERT_NO_FATAL_FAILURE(_assertBindMask(ptr, target, true, true)) + +// Asserts that the memory binding mask for the page starting at 'ptr' does not match the expected mask. +// The target can be either a bitmask or another pointer, in which case we compare the masks of both ptr. +#define ASSERT_BIND_MASK_NE(ptr, target) \ + ASSERT_NO_FATAL_FAILURE(_assertBindMask(ptr, target, true, false)) + +// Expects that the memory binding mask for the page starting at 'ptr' matches the expected mask. +// The target can be either a bitmask or another pointer, in which case we compare the masks of both ptr. +#define EXPECT_BIND_MASK_EQ(ptr, target) \ + ASSERT_NO_FATAL_FAILURE(_assertBindMask(ptr, target, false, true)) + +// Expects that the memory binding mask for the page starting at 'ptr' does not match the expected mask. +// The target can be either a bitmask or another pointer, in which case we compare the masks of both ptr. +#define EXPECT_BIND_MASK_NE(ptr, target) \ + ASSERT_NO_FATAL_FAILURE(_assertBindMask(ptr, target, false, false)) + +#endif /* UMF_TEST_NUMA_HELPERS_HPP */ diff --git a/test/common/provider.hpp b/test/common/provider.hpp index cb4835eb5..148f34dc8 100644 --- a/test/common/provider.hpp +++ b/test/common/provider.hpp @@ -14,6 +14,7 @@ #include #include "base.hpp" +#include "base_alloc_global.h" #include "cpp_helpers.hpp" #include "test_helpers.h" @@ -98,7 +99,7 @@ typedef struct provider_base_t { umf_memory_provider_ops_t BASE_PROVIDER_OPS = umf::providerMakeCOps(); -struct provider_malloc : public provider_base_t { +struct provider_ba_global : public provider_base_t { umf_result_t alloc(size_t size, size_t align, void **ptr) noexcept { if (!align) { align = 8; @@ -108,33 +109,28 @@ struct provider_malloc : public provider_base_t { // requirement of 'size' being multiple of 'align' even though the // documentation says that it has to. AddressSanitizer returns an // error because of this issue. - size_t aligned_size = ALIGN_UP(size, align); + size_t aligned_size = ALIGN_UP_SAFE(size, align); + if (aligned_size == 0) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } -#ifdef _WIN32 - *ptr = _aligned_malloc(aligned_size, align); -#else - *ptr = ::aligned_alloc(align, aligned_size); -#endif + *ptr = umf_ba_global_aligned_alloc(aligned_size, align); return (*ptr) ? UMF_RESULT_SUCCESS : UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } umf_result_t free(void *ptr, size_t) noexcept { -#ifdef _WIN32 - _aligned_free(ptr); -#else - ::free(ptr); -#endif + umf_ba_global_free(ptr); return UMF_RESULT_SUCCESS; } - const char *get_name() noexcept { return "malloc"; } + const char *get_name() noexcept { return "umf_ba_global"; } }; -umf_memory_provider_ops_t MALLOC_PROVIDER_OPS = - umf::providerMakeCOps(); +umf_memory_provider_ops_t BA_GLOBAL_PROVIDER_OPS = + umf::providerMakeCOps(); struct provider_mock_out_of_mem : public provider_base_t { - provider_malloc helper_prov; + provider_ba_global helper_prov; int allocNum = 0; umf_result_t initialize(int *inAllocNum) noexcept { allocNum = *inAllocNum; diff --git a/test/common/provider_null.c b/test/common/provider_null.c index e667bfce4..5db389e89 100644 --- a/test/common/provider_null.c +++ b/test/common/provider_null.c @@ -134,11 +134,11 @@ umf_memory_provider_ops_t UMF_NULL_PROVIDER_OPS = { .initialize = nullInitialize, .finalize = nullFinalize, .alloc = nullAlloc, - .free = nullFree, .get_last_native_error = nullGetLastError, .get_recommended_page_size = nullGetRecommendedPageSize, .get_min_page_size = nullGetPageSize, .get_name = nullName, + .ext.free = nullFree, .ext.purge_lazy = nullPurgeLazy, .ext.purge_force = nullPurgeForce, .ext.allocation_merge = nullAllocationMerge, diff --git a/test/common/provider_trace.c b/test/common/provider_trace.c index 9d063b4f5..219dde5cd 100644 --- a/test/common/provider_trace.c +++ b/test/common/provider_trace.c @@ -195,11 +195,11 @@ umf_memory_provider_ops_t UMF_TRACE_PROVIDER_OPS = { .initialize = traceInitialize, .finalize = traceFinalize, .alloc = traceAlloc, - .free = traceFree, .get_last_native_error = traceGetLastError, .get_recommended_page_size = traceGetRecommendedPageSize, .get_min_page_size = traceGetPageSize, .get_name = traceName, + .ext.free = traceFree, .ext.purge_lazy = tracePurgeLazy, .ext.purge_force = tracePurgeForce, .ext.allocation_merge = traceAllocationMerge, diff --git a/test/common/test_helpers.h b/test/common/test_helpers.h index e361feba4..494528b57 100644 --- a/test/common/test_helpers.h +++ b/test/common/test_helpers.h @@ -14,67 +14,14 @@ #include #include "provider_trace.h" +#include "utils_common.h" #ifdef __cplusplus extern "C" { #endif -static inline void UT_FATAL(const char *format, ...) { - va_list args_list; - va_start(args_list, format); - vfprintf(stderr, format, args_list); - va_end(args_list); - - fprintf(stderr, "\n"); - - abort(); -} - -static inline void UT_OUT(const char *format, ...) { - va_list args_list; - va_start(args_list, format); - vfprintf(stdout, format, args_list); - va_end(args_list); - - fprintf(stdout, "\n"); -} - -// Assert a condition is true at runtime -#define UT_ASSERT(cnd) \ - ((void)((cnd) || (UT_FATAL("%s:%d %s - assertion failure: %s", __FILE__, \ - __LINE__, __func__, #cnd), \ - 0))) - -// Assertion with extra info printed if assertion fails at runtime -#define UT_ASSERTinfo(cnd, info) \ - ((void)((cnd) || \ - (UT_FATAL("%s:%d %s - assertion failure: %s (%s = %s)", __FILE__, \ - __LINE__, __func__, #cnd, #info, info), \ - 0))) - -// Assert two integer values are equal at runtime -#define UT_ASSERTeq(lhs, rhs) \ - ((void)(((lhs) == (rhs)) || \ - (UT_FATAL("%s:%d %s - assertion failure: %s (0x%llx) == %s " \ - "(0x%llx)", \ - __FILE__, __LINE__, __func__, #lhs, \ - (unsigned long long)(lhs), #rhs, \ - (unsigned long long)(rhs)), \ - 0))) - -// Assert two integer values are not equal at runtime -#define UT_ASSERTne(lhs, rhs) \ - ((void)(((lhs) != (rhs)) || \ - (UT_FATAL("%s:%d %s - assertion failure: %s (0x%llx) != %s " \ - "(0x%llx)", \ - __FILE__, __LINE__, __func__, #lhs, \ - (unsigned long long)(lhs), #rhs, \ - (unsigned long long)(rhs)), \ - 0))) - -#ifndef ALIGN_UP -#define ALIGN_UP(value, align) (((value) + (align)-1) & ~((align)-1)) -#endif +// Needed for CI +#define TEST_SKIP_ERROR_CODE 125 int bufferIsFilledWithChar(void *ptr, size_t size, char c); diff --git a/test/common/test_helpers_linux.c b/test/common/test_helpers_linux.c new file mode 100644 index 000000000..431880bf7 --- /dev/null +++ b/test/common/test_helpers_linux.c @@ -0,0 +1,69 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// This file contains tests for UMF pool API + +#include +#include +#include +#include +#include +#include + +#include "test_helpers_linux.h" + +// Check if the file given by the 'path' argument was mapped with the MAP_SYNC flag: +// 1) Open and read the /proc/self/smaps file. +// 2) Look for the section of the 'path' file. +// 3) Check if the VmFlags of the 'path' file contains the "sf" flag +// marking that the file was mapped with the MAP_SYNC flag. +bool is_mapped_with_MAP_SYNC(char *path, char *buf, size_t size_buf) { + memset(buf, 0, size_buf); + + int fd = open("/proc/self/smaps", O_RDONLY); + if (fd == -1) { + return false; + } + + // number of bytes read from the file + ssize_t nbytes = 1; + // string starting from the path of the smaps + char *smaps = NULL; + + // Read the "/proc/self/smaps" file + // until the path of the smaps is found + // or EOF is reached. + while (nbytes > 0 && smaps == NULL) { + memset(buf, 0, nbytes); // erase previous data + nbytes = read(fd, buf, size_buf); + // look for the path of the smaps + smaps = strstr(buf, path); + } + + (void)close(fd); + + // String starting from the "sf" flag + // marking that memory was mapped with the MAP_SYNC flag. + char *sf_flag = NULL; + + if (smaps) { + // look for the "VmFlags:" string + char *VmFlags = strstr(smaps, "VmFlags:"); + if (VmFlags) { + // look for the EOL + char *eol = strstr(VmFlags, "\n"); + if (eol) { + // End the VmFlags string at EOL. + *eol = 0; + // Now the VmFlags string contains only one line with all VmFlags. + + // Look for the "sf" flag in VmFlags + // marking that memory was mapped + // with the MAP_SYNC flag. + sf_flag = strstr(VmFlags, "sf"); + } + } + } + + return (sf_flag != NULL); +} diff --git a/test/common/test_helpers_linux.h b/test/common/test_helpers_linux.h new file mode 100644 index 000000000..7755408b7 --- /dev/null +++ b/test/common/test_helpers_linux.h @@ -0,0 +1,21 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// This file contains helpers for tests for UMF pool API + +#ifndef UMF_TEST_HELPERS_LINUX_H +#define UMF_TEST_HELPERS_LINUX_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +bool is_mapped_with_MAP_SYNC(char *path, char *buf, size_t size_buf); + +#ifdef __cplusplus +} +#endif + +#endif /* UMF_TEST_HELPERS_LINUX_H */ diff --git a/test/disjointCoarseMallocPool.cpp b/test/disjointCoarseMallocPool.cpp new file mode 100644 index 000000000..32e1d24f3 --- /dev/null +++ b/test/disjointCoarseMallocPool.cpp @@ -0,0 +1,580 @@ +/* + * Copyright (C) 2023-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include + +#include "provider.hpp" + +#include +#include + +using umf_test::KB; +using umf_test::MB; +using umf_test::test; + +#define GetStats umfCoarseMemoryProviderGetStats + +umf_memory_provider_ops_t UMF_MALLOC_MEMORY_PROVIDER_OPS = + umf::providerMakeCOps(); + +struct CoarseWithMemoryStrategyTest + : umf_test::test, + ::testing::WithParamInterface { + void SetUp() override { + test::SetUp(); + allocation_strategy = this->GetParam(); + } + + coarse_memory_provider_strategy_t allocation_strategy; +}; + +INSTANTIATE_TEST_SUITE_P( + CoarseWithMemoryStrategyTest, CoarseWithMemoryStrategyTest, + ::testing::Values(UMF_COARSE_MEMORY_STRATEGY_FASTEST, + UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, + UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE)); + +TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_basic) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result_t umf_result; + + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + const size_t init_buffer_size = 20 * MB; + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.allocation_strategy = allocation_strategy; + coarse_memory_provider_params.upstream_memory_provider = + malloc_memory_provider; + coarse_memory_provider_params.destroy_upstream_memory_provider = true; + coarse_memory_provider_params.immediate_init_from_upstream = true; + coarse_memory_provider_params.init_buffer = nullptr; + coarse_memory_provider_params.init_buffer_size = init_buffer_size; + + umf_memory_provider_handle_t coarse_memory_provider; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_memory_provider, nullptr); + + umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; + umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(disjoint_pool_params, nullptr); + umf_result = + umfDisjointPoolParamsSetSlabMinSize(disjoint_pool_params, 4096); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMaxPoolableSize(disjoint_pool_params, 4096); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetCapacity(disjoint_pool_params, 4); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMinBucketSize(disjoint_pool_params, 64); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetTrace(disjoint_pool_params, 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_memory_pool_handle_t pool; + umf_result = umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, + disjoint_pool_params, + UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(pool, nullptr); + + umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // test + + umf_memory_provider_handle_t prov = NULL; + umf_result = umfPoolGetMemoryProvider(pool, &prov); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(prov, nullptr); + + // alloc 2x 2MB + void *p1 = umfPoolMalloc(pool, 2 * MB); + ASSERT_NE(p1, nullptr); + ASSERT_EQ(GetStats(prov).used_size, 2 * MB); + ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(prov).num_all_blocks, 2); + + void *p2 = umfPoolMalloc(pool, 2 * MB); + ASSERT_NE(p2, nullptr); + ASSERT_EQ(GetStats(prov).used_size, 4 * MB); + ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(prov).num_all_blocks, 3); + ASSERT_NE(p1, p2); + + // swap pointers to get p1 < p2 + if (p1 > p2) { + std::swap(p1, p2); + } + + // free + alloc first block + // the block should be reused + // currently there is no purging, so the alloc size shouldn't change + // there should be no block merging between used and not-used blocks + umf_result = umfPoolFree(pool, p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(prov).used_size, 2 * MB); + ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(prov).num_all_blocks, 3); + + p1 = umfPoolMalloc(pool, 2 * MB); + ASSERT_EQ(GetStats(prov).used_size, 4 * MB); + ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(prov).num_all_blocks, 3); + + // free all allocs + // overall alloc size shouldn't change + // block p2 should merge with the prev free block p1 + // and the remaining init block + umf_result = umfPoolFree(pool, p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(prov).num_all_blocks, 3); + umf_result = umfPoolFree(pool, p2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(prov).used_size, 0 * MB); + ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(prov).num_all_blocks, 1); + + // test allocations with alignment + // TODO: what about holes? + p1 = umfPoolAlignedMalloc(pool, 1 * MB - 4, 128); + ASSERT_NE(p1, nullptr); + ASSERT_EQ((uintptr_t)p1 & 127, 0); + p2 = umfPoolAlignedMalloc(pool, 1 * MB - 4, 128); + ASSERT_NE(p2, nullptr); + ASSERT_EQ((uintptr_t)p1 & 127, 0); + umf_result = umfPoolFree(pool, p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfPoolFree(pool, p2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // alloc whole buffer + // after this, there should be one single block + p1 = umfPoolMalloc(pool, init_buffer_size); + ASSERT_EQ(GetStats(prov).used_size, init_buffer_size); + ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(prov).num_all_blocks, 1); + + // free all memory + // alloc 2 MB block - the init block should be split + umf_result = umfPoolFree(pool, p1); + p1 = umfPoolMalloc(pool, 2 * MB); + ASSERT_EQ(GetStats(prov).used_size, 2 * MB); + ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(prov).num_all_blocks, 2); + + // alloc additional 2 MB + // the non-used block should be used + p2 = umfPoolMalloc(pool, 2 * MB); + ASSERT_EQ(GetStats(prov).used_size, 4 * MB); + ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(prov).num_all_blocks, 3); + ASSERT_NE(p1, p2); + + // make sure that p1 < p2 + if (p1 > p2) { + std::swap(p1, p2); + } + + // free blocks in order: p2, p1 + // block p1 should merge with the next block p2 + // swap pointers to get p1 < p2 + umfPoolFree(pool, p2); + umfPoolFree(pool, p1); + ASSERT_EQ(GetStats(prov).used_size, 0 * MB); + ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(prov).num_all_blocks, 1); + + // alloc 10x 2 MB - this should occupy all allocated memory + constexpr int allocs_size = 10; + void *allocs[allocs_size] = {0}; + for (int i = 0; i < allocs_size; i++) { + ASSERT_EQ(GetStats(prov).used_size, i * 2 * MB); + allocs[i] = umfPoolMalloc(pool, 2 * MB); + ASSERT_NE(allocs[i], nullptr); + } + ASSERT_EQ(GetStats(prov).used_size, 20 * MB); + ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); + // there should be no block with the free memory + ASSERT_EQ(GetStats(prov).num_all_blocks, allocs_size); + + // free all memory + for (int i = 0; i < allocs_size; i++) { + umf_result = umfPoolFree(pool, allocs[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + ASSERT_EQ(GetStats(prov).num_all_blocks, 1); + ASSERT_EQ(GetStats(prov).used_size, 0 * MB); + ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); + + umfPoolDestroy(pool); + // Both coarse_memory_provider and malloc_memory_provider + // have already been destroyed by umfPoolDestroy(), because: + // UMF_POOL_CREATE_FLAG_OWN_PROVIDER was set in umfPoolCreate() and + // coarse_memory_provider_params.destroy_upstream_memory_provider = true; +} + +TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple1) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result_t umf_result; + + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + const size_t init_buffer_size = 20 * MB; + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.allocation_strategy = allocation_strategy; + coarse_memory_provider_params.upstream_memory_provider = + malloc_memory_provider; + coarse_memory_provider_params.immediate_init_from_upstream = true; + coarse_memory_provider_params.init_buffer = NULL; + coarse_memory_provider_params.init_buffer_size = init_buffer_size; + + umf_memory_provider_handle_t coarse_memory_provider; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_memory_provider, nullptr); + + umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; + umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(disjoint_pool_params, nullptr); + umf_result = + umfDisjointPoolParamsSetSlabMinSize(disjoint_pool_params, 4096); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMaxPoolableSize(disjoint_pool_params, 4096); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetCapacity(disjoint_pool_params, 4); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMinBucketSize(disjoint_pool_params, 64); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetTrace(disjoint_pool_params, 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_memory_pool_handle_t pool; + umf_result = + umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, + disjoint_pool_params, UMF_POOL_CREATE_FLAG_NONE, &pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(pool, nullptr); + + umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); + + umf_memory_provider_handle_t prov = NULL; + umfPoolGetMemoryProvider(pool, &prov); + ASSERT_NE(prov, nullptr); + + // test 1 + + size_t s1 = 74659 * KB; + size_t s2 = 8206 * KB; + + size_t max_alloc_size = 0; + + const int nreps = 2; + const int nptrs = 6; + + // s1 + for (int j = 0; j < nreps; j++) { + void *t[nptrs] = {0}; + for (int i = 0; i < nptrs; i++) { + t[i] = umfPoolMalloc(pool, s1); + ASSERT_NE(t[i], nullptr); + } + + if (max_alloc_size == 0) { + max_alloc_size = GetStats(prov).alloc_size; + } + + for (int i = 0; i < nptrs; i++) { + umf_result = umfPoolFree(pool, t[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + + // s2 + for (int j = 0; j < nreps; j++) { + void *t[nptrs] = {0}; + for (int i = 0; i < nptrs; i++) { + t[i] = umfPoolMalloc(pool, s2); + ASSERT_NE(t[i], nullptr); + } + + // all s2 should fit into single block leaved after freeing s1 + ASSERT_LE(GetStats(prov).alloc_size, max_alloc_size); + + for (int i = 0; i < nptrs; i++) { + umf_result = umfPoolFree(pool, t[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + + umfPoolDestroy(pool); + umfMemoryProviderDestroy(coarse_memory_provider); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple2) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result_t umf_result; + + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + const size_t init_buffer_size = 20 * MB; + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.allocation_strategy = allocation_strategy; + coarse_memory_provider_params.upstream_memory_provider = + malloc_memory_provider; + coarse_memory_provider_params.immediate_init_from_upstream = true; + coarse_memory_provider_params.init_buffer = NULL; + coarse_memory_provider_params.init_buffer_size = init_buffer_size; + + umf_memory_provider_handle_t coarse_memory_provider; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_memory_provider, nullptr); + + umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; + umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(disjoint_pool_params, nullptr); + umf_result = + umfDisjointPoolParamsSetSlabMinSize(disjoint_pool_params, 4096); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMaxPoolableSize(disjoint_pool_params, 4096); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetCapacity(disjoint_pool_params, 4); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMinBucketSize(disjoint_pool_params, 64); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetTrace(disjoint_pool_params, 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_memory_pool_handle_t pool; + umf_result = + umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, + disjoint_pool_params, UMF_POOL_CREATE_FLAG_NONE, &pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(pool, nullptr); + + umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // test + double sizes[] = {2, 4, 0.5, 1, 8, 0.25}; + size_t alignment[] = {0, 4, 0, 16, 32, 128}; + for (int i = 0; i < 6; i++) { + size_t s = (size_t)(sizes[i] * MB); + void *t[8] = {0}; + for (int j = 0; j < 8; j++) { + t[j] = umfPoolAlignedMalloc(pool, s, alignment[i]); + ASSERT_NE(t[j], nullptr); + } + + for (int j = 0; j < 8; j++) { + umf_result = umfPoolFree(pool, t[j]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + + umfPoolDestroy(pool); + umfMemoryProviderDestroy(coarse_memory_provider); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +struct alloc_ptr_size { + void *ptr; + size_t size; + + bool operator<(const alloc_ptr_size &other) const { + if (ptr == other.ptr) { + return size < other.size; + } + return ptr < other.ptr; + } +}; + +TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMMapPool_random) { + umf_result_t umf_result; + + const size_t init_buffer_size = 200 * MB; + + // preallocate some memory and initialize the vector with zeros + std::vector buffer(init_buffer_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + const unsigned char alloc_check_val = 11; + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.allocation_strategy = allocation_strategy; + coarse_memory_provider_params.upstream_memory_provider = NULL; + coarse_memory_provider_params.immediate_init_from_upstream = false; + coarse_memory_provider_params.init_buffer = buf; + coarse_memory_provider_params.init_buffer_size = init_buffer_size; + + umf_memory_provider_handle_t coarse_memory_provider; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_memory_provider, nullptr); + + umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; + umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(disjoint_pool_params, nullptr); + umf_result = + umfDisjointPoolParamsSetSlabMinSize(disjoint_pool_params, 1024); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMaxPoolableSize(disjoint_pool_params, 1024); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetCapacity(disjoint_pool_params, 2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMinBucketSize(disjoint_pool_params, 16); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetTrace(disjoint_pool_params, 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_memory_pool_handle_t pool; + umf_result = + umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, + disjoint_pool_params, UMF_POOL_CREATE_FLAG_NONE, &pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(pool, nullptr); + + umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // set constant seed so each test run will have the same scenario + uint32_t seed = 1234; + std::mt19937 mt(seed); + + // different sizes to alloc + std::vector sizes = {15, 49, 588, 1025, + 2 * KB, 5 * KB, 160 * KB, 511 * KB, + 1000 * KB, MB, 3 * MB, 7 * MB}; + std::uniform_int_distribution sizes_dist(0, (int)(sizes.size() - 1)); + + // each alloc would be done few times + std::vector counts = {1, 3, 4, 8, 9, 11}; + std::uniform_int_distribution counts_dist(0, (int)(counts.size() - 1)); + + // action to take will be random + // alloc = <0, .5), free = <.5, 1) + std::uniform_real_distribution actions_dist(0, 1); + + std::set allocs; + const int nreps = 100; + + for (size_t i = 0; i < nreps; i++) { + size_t count = counts[counts_dist(mt)]; + float action = actions_dist(mt); + + if (action < 0.5) { + size_t size = sizes[sizes_dist(mt)]; + std::cout << "size: " << size << " count: " << count + << " action: alloc" << std::endl; + + // alloc + for (size_t j = 0; j < count; j++) { + void *ptr = umfPoolMalloc(pool, size); + ASSERT_NE(ptr, nullptr); + + if (ptr == nullptr) { + break; + } + + // check if first and last bytes are empty and fill them with control data + ASSERT_EQ(((unsigned char *)ptr)[0], 0); + ASSERT_EQ(((unsigned char *)ptr)[size - 1], 0); + ((unsigned char *)ptr)[0] = alloc_check_val; + ((unsigned char *)ptr)[size - 1] = alloc_check_val; + + allocs.insert({ptr, size}); + } + } else { + std::cout << "count: " << count << " action: free" << std::endl; + + // free random allocs + for (size_t j = 0; j < count; j++) { + if (allocs.size() == 0) { + continue; + } + + std::uniform_int_distribution free_dist( + 0, (int)(allocs.size() - 1)); + size_t free_id = free_dist(mt); + auto it = allocs.begin(); + std::advance(it, free_id); + auto [ptr, size] = (*it); + ASSERT_NE(ptr, nullptr); + + // check if control bytes are set and clean them + + ASSERT_EQ(((unsigned char *)ptr)[0], alloc_check_val); + ASSERT_EQ(((unsigned char *)ptr)[size - 1], alloc_check_val); + ((unsigned char *)ptr)[0] = 0; + ((unsigned char *)ptr)[size - 1] = 0; + + umf_result_t ret = umfPoolFree(pool, ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + allocs.erase(it); + } + } + } + + std::cout << "cleanup" << std::endl; + + while (allocs.size()) { + umf_result_t ret = umfPoolFree(pool, (*allocs.begin()).ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + allocs.erase(allocs.begin()); + } + + umfPoolDestroy(pool); + umfMemoryProviderDestroy(coarse_memory_provider); +} diff --git a/test/fuzz/umfFuzz.cpp b/test/fuzz/umfFuzz.cpp index ac52c96b5..360184c73 100644 --- a/test/fuzz/umfFuzz.cpp +++ b/test/fuzz/umfFuzz.cpp @@ -13,10 +13,15 @@ constexpr int MAX_PROVIDER_ALLOC_SIZE = 100 * 1024; // 100 kB int umf_memory_provider_create(TestState &test_state) { umf_memory_provider_ops_t *provider_ops = umfOsMemoryProviderOps(); - umf_os_memory_provider_params_t params = umfOsMemoryProviderParamsDefault(); - umf_result_t res = - umfMemoryProviderCreate(provider_ops, ¶ms, &test_state.provider); + umf_os_memory_provider_params_handle_t params = NULL; + + umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); + if (res != UMF_RESULT_SUCCESS) { + return -1; + } + res = umfMemoryProviderCreate(provider_ops, params, &test_state.provider); + umfOsMemoryProviderParamsDestroy(params); if (res != UMF_RESULT_SUCCESS) { return -1; } diff --git a/test/ipcAPI.cpp b/test/ipcAPI.cpp index 524c26d4b..4df32a1c9 100644 --- a/test/ipcAPI.cpp +++ b/test/ipcAPI.cpp @@ -24,9 +24,9 @@ struct provider_mock_ipc : public umf_test::provider_base_t { size_t size; }; - umf_test::provider_malloc helper_prov; - allocations_mutex_type alloc_mutex; - allocations_map_type allocations; + umf_test::provider_ba_global helper_prov; + static allocations_mutex_type alloc_mutex; + static allocations_map_type allocations; umf_result_t alloc(size_t size, size_t align, void **ptr) noexcept { auto ret = helper_prov.alloc(size, align, ptr); @@ -105,6 +105,9 @@ struct provider_mock_ipc : public umf_test::provider_base_t { } }; +provider_mock_ipc::allocations_mutex_type provider_mock_ipc::alloc_mutex; +provider_mock_ipc::allocations_map_type provider_mock_ipc::allocations; + static umf_memory_provider_ops_t IPC_MOCK_PROVIDER_OPS = umf::providerMakeCOps(); @@ -113,4 +116,4 @@ HostMemoryAccessor hostMemoryAccessor; INSTANTIATE_TEST_SUITE_P(umfIpcTestSuite, umfIpcTest, ::testing::Values(ipcTestParams{ umfProxyPoolOps(), nullptr, &IPC_MOCK_PROVIDER_OPS, - nullptr, &hostMemoryAccessor})); + nullptr, &hostMemoryAccessor, false})); diff --git a/test/ipcFixtures.hpp b/test/ipcFixtures.hpp index 1eb7865e3..8dca83f10 100644 --- a/test/ipcFixtures.hpp +++ b/test/ipcFixtures.hpp @@ -46,16 +46,26 @@ class HostMemoryAccessor : public MemoryAccessor { } }; +// ipcTestParams: +// pool_ops, pool_params, provider_ops, provider_params, memoryAccessor, free_not_supp +// free_not_supp (bool) - provider does not support the free() op using ipcTestParams = std::tuple; + void *, MemoryAccessor *, bool>; struct umfIpcTest : umf_test::test, ::testing::WithParamInterface { - umfIpcTest() : pool(nullptr, nullptr) {} + umfIpcTest() {} void SetUp() override { test::SetUp(); - this->pool = makePool(); + auto [pool_ops, pool_params, provider_ops, provider_params, accessor, + free_not_supp] = this->GetParam(); + poolOps = pool_ops; + poolParams = pool_params; + providerOps = provider_ops; + providerParams = provider_params; + memAccessor = accessor; + freeNotSupported = free_not_supp; } void TearDown() override { test::TearDown(); } @@ -63,18 +73,18 @@ struct umfIpcTest : umf_test::test, umf::pool_unique_handle_t makePool() { // TODO: The function is similar to poolCreateExt function // from memoryPool.hpp - umf_memory_provider_handle_t hProvider; - umf_memory_pool_handle_t hPool; - auto [pool_ops, pool_params, provider_ops, provider_params, accessor] = - this->GetParam(); + umf_memory_provider_handle_t hProvider = NULL; + umf_memory_pool_handle_t hPool = NULL; auto ret = - umfMemoryProviderCreate(provider_ops, provider_params, &hProvider); + umfMemoryProviderCreate(providerOps, providerParams, &hProvider); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); auto trace = [](void *trace_context, const char *name) { stats_type *stat = static_cast(trace_context); - if (std::strcmp(name, "get_ipc_handle") == 0) { + if (std::strcmp(name, "alloc") == 0) { + ++stat->allocCount; + } else if (std::strcmp(name, "get_ipc_handle") == 0) { ++stat->getCount; } else if (std::strcmp(name, "put_ipc_handle") == 0) { ++stat->putCount; @@ -88,40 +98,99 @@ struct umfIpcTest : umf_test::test, umf_memory_provider_handle_t hTraceProvider = traceProviderCreate(hProvider, true, (void *)&stat, trace); - ret = umfPoolCreate(pool_ops, hTraceProvider, pool_params, + ret = umfPoolCreate(poolOps, hTraceProvider, poolParams, UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - memAccessor = accessor; - return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); } struct stats_type { + std::atomic allocCount; std::atomic getCount; std::atomic putCount; std::atomic openCount; std::atomic closeCount; - stats_type() : getCount(0), putCount(0), openCount(0), closeCount(0) {} + stats_type() + : allocCount(0), getCount(0), putCount(0), openCount(0), + closeCount(0) {} }; - umf::pool_unique_handle_t pool; static constexpr int NTHREADS = 10; stats_type stat; MemoryAccessor *memAccessor = nullptr; + umf_memory_pool_ops_t *poolOps = nullptr; + void *poolParams = nullptr; + umf_memory_provider_ops_t *providerOps = nullptr; + void *providerParams = nullptr; + bool freeNotSupported = false; }; +static inline umf_result_t +get_umf_result_of_free(bool freeNotSupported, umf_result_t expected_result) { + if (freeNotSupported) { + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + return expected_result; +} + TEST_P(umfIpcTest, GetIPCHandleSize) { size_t size = 0; + umf::pool_unique_handle_t pool = makePool(); + umf_result_t ret = umfPoolGetIPCHandleSize(pool.get(), &size); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); EXPECT_GT(size, 0); } +TEST_P(umfIpcTest, GetIPCHandleSizeInvalidArgs) { + size_t size = 0; + umf_result_t ret = umfPoolGetIPCHandleSize(nullptr, &size); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf::pool_unique_handle_t pool = makePool(); + ret = umfPoolGetIPCHandleSize(pool.get(), nullptr); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(umfIpcTest, GetIPCHandleInvalidArgs) { + constexpr size_t SIZE = 100; + umf_ipc_handle_t ipcHandle = nullptr; + size_t handleSize = 0; + umf_result_t ret = umfGetIPCHandle(nullptr, &ipcHandle, &handleSize); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + void *ptr = (void *)0xBAD; + ret = umfGetIPCHandle(ptr, &ipcHandle, &handleSize); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf::pool_unique_handle_t pool = makePool(); + ptr = umfPoolMalloc(pool.get(), SIZE); + EXPECT_NE(ptr, nullptr); + + ret = umfGetIPCHandle(ptr, nullptr, &handleSize); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfGetIPCHandle(ptr, &ipcHandle, nullptr); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfFree(ptr); + EXPECT_EQ(ret, + get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); +} + +TEST_P(umfIpcTest, CloseIPCHandleInvalidPtr) { + int local_var; + auto ret = umfCloseIPCHandle(&local_var); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + TEST_P(umfIpcTest, BasicFlow) { constexpr size_t SIZE = 100; std::vector expected_data(SIZE); + umf::pool_unique_handle_t pool = makePool(); int *ptr = (int *)umfPoolMalloc(pool.get(), SIZE * sizeof(int)); EXPECT_NE(ptr, nullptr); @@ -138,12 +207,17 @@ TEST_P(umfIpcTest, BasicFlow) { ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_EQ(handleFullSize, handleHalfSize); + umf_ipc_handler_handle_t ipcHandler = nullptr; + ret = umfPoolGetIPCHandler(pool.get(), &ipcHandler); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ipcHandler, nullptr); + void *fullArray = nullptr; - ret = umfOpenIPCHandle(pool.get(), ipcHandleFull, &fullArray); + ret = umfOpenIPCHandle(ipcHandler, ipcHandleFull, &fullArray); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); void *halfArray = nullptr; - ret = umfOpenIPCHandle(pool.get(), ipcHandleHalf, &halfArray); + ret = umfOpenIPCHandle(ipcHandler, ipcHandleHalf, &halfArray); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); std::vector actual_data(SIZE); @@ -170,12 +244,202 @@ TEST_P(umfIpcTest, BasicFlow) { EXPECT_EQ(ret, UMF_RESULT_SUCCESS); ret = umfPoolFree(pool.get(), ptr); + EXPECT_EQ(ret, + get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + + pool.reset(nullptr); + EXPECT_EQ(stat.getCount, 1); + EXPECT_EQ(stat.putCount, stat.getCount); + EXPECT_EQ(stat.openCount, 1); + EXPECT_EQ(stat.closeCount, stat.openCount); +} + +TEST_P(umfIpcTest, GetPoolByOpenedHandle) { + constexpr size_t SIZE = 100; + constexpr size_t NUM_ALLOCS = 100; + constexpr size_t NUM_POOLS = 4; + void *ptrs[NUM_ALLOCS]; + void *openedPtrs[NUM_POOLS][NUM_ALLOCS]; + std::vector pools_to_open; + umf::pool_unique_handle_t pool = makePool(); + + for (size_t i = 0; i < NUM_POOLS; ++i) { + pools_to_open.push_back(makePool()); + } + + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + void *ptr = umfPoolMalloc(pool.get(), SIZE); + ASSERT_NE(ptr, nullptr); + ptrs[i] = ptr; + } + + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + umf_ipc_handle_t ipcHandle = nullptr; + size_t handleSize = 0; + umf_result_t ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + for (size_t pool_id = 0; pool_id < NUM_POOLS; pool_id++) { + void *ptr = nullptr; + umf_ipc_handler_handle_t ipcHandler = nullptr; + ret = + umfPoolGetIPCHandler(pools_to_open[pool_id].get(), &ipcHandler); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ipcHandler, nullptr); + + ret = umfOpenIPCHandle(ipcHandler, ipcHandle, &ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + openedPtrs[pool_id][i] = ptr; + } + + ret = umfPutIPCHandle(ipcHandle); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + for (size_t pool_id = 0; pool_id < NUM_POOLS; pool_id++) { + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + umf_memory_pool_handle_t openedPool = + umfPoolByPtr(openedPtrs[pool_id][i]); + EXPECT_EQ(openedPool, pools_to_open[pool_id].get()); + } + } + + for (size_t pool_id = 0; pool_id < NUM_POOLS; pool_id++) { + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + umf_result_t ret = umfCloseIPCHandle(openedPtrs[pool_id][i]); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + } + + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + umf_result_t ret = umfFree(ptrs[i]); + EXPECT_EQ(ret, + get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + } +} + +TEST_P(umfIpcTest, AllocFreeAllocTest) { + constexpr size_t SIZE = 64 * 1024; + umf::pool_unique_handle_t pool = makePool(); + umf_ipc_handler_handle_t ipcHandler = nullptr; + + umf_result_t ret = umfPoolGetIPCHandler(pool.get(), &ipcHandler); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ipcHandler, nullptr); + + void *ptr = umfPoolMalloc(pool.get(), SIZE); + EXPECT_NE(ptr, nullptr); + + umf_ipc_handle_t ipcHandle = nullptr; + size_t handleSize = 0; + ret = umfGetIPCHandle(ptr, &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + void *opened_ptr = nullptr; + ret = umfOpenIPCHandle(ipcHandler, ipcHandle, &opened_ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfCloseIPCHandle(opened_ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfPutIPCHandle(ipcHandle); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfPoolFree(pool.get(), ptr); + EXPECT_EQ(ret, + get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + + ptr = umfPoolMalloc(pool.get(), SIZE); + ASSERT_NE(ptr, nullptr); + + // test if the allocated memory is usable - fill it with the 0xAB pattern. + const uint32_t pattern = 0xAB; + memAccessor->fill(ptr, SIZE, &pattern, sizeof(pattern)); + + ret = umfGetIPCHandle(ptr, &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfOpenIPCHandle(ipcHandler, ipcHandle, &opened_ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfCloseIPCHandle(opened_ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfPutIPCHandle(ipcHandle); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfPoolFree(pool.get(), ptr); + EXPECT_EQ(ret, + get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + + pool.reset(nullptr); + EXPECT_EQ(stat.getCount, stat.putCount); + EXPECT_EQ(stat.openCount, stat.getCount); + EXPECT_EQ(stat.openCount, stat.closeCount); +} + +TEST_P(umfIpcTest, openInTwoIpcHandlers) { + constexpr size_t SIZE = 100; + std::vector expected_data(SIZE); + umf::pool_unique_handle_t pool1 = makePool(); + umf::pool_unique_handle_t pool2 = makePool(); + umf_ipc_handler_handle_t ipcHandler1 = nullptr; + umf_ipc_handler_handle_t ipcHandler2 = nullptr; + + umf_result_t ret = umfPoolGetIPCHandler(pool1.get(), &ipcHandler1); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ipcHandler1, nullptr); + + ret = umfPoolGetIPCHandler(pool2.get(), &ipcHandler2); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ipcHandler2, nullptr); + + void *ptr = umfPoolMalloc(pool1.get(), sizeof(expected_data[0]) * SIZE); + EXPECT_NE(ptr, nullptr); + + std::iota(expected_data.begin(), expected_data.end(), 0); + memAccessor->copy(ptr, expected_data.data(), SIZE * sizeof(int)); + + umf_ipc_handle_t ipcHandle = nullptr; + size_t handleSize = 0; + ret = umfGetIPCHandle(ptr, &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + void *openedPtr1 = nullptr; + ret = umfOpenIPCHandle(ipcHandler1, ipcHandle, &openedPtr1); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + void *openedPtr2 = nullptr; + ret = umfOpenIPCHandle(ipcHandler2, ipcHandle, &openedPtr2); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfPutIPCHandle(ipcHandle); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + std::vector actual_data(SIZE); + memAccessor->copy(actual_data.data(), openedPtr1, SIZE * sizeof(int)); + ASSERT_TRUE(std::equal(expected_data.begin(), expected_data.end(), + actual_data.begin())); + + ret = umfCloseIPCHandle(openedPtr1); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + + memAccessor->copy(actual_data.data(), openedPtr2, SIZE * sizeof(int)); + ASSERT_TRUE(std::equal(expected_data.begin(), expected_data.end(), + actual_data.begin())); + + ret = umfCloseIPCHandle(openedPtr2); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfPoolFree(pool1.get(), ptr); + EXPECT_EQ(ret, + get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + + pool1.reset(nullptr); + pool2.reset(nullptr); EXPECT_EQ(stat.getCount, 1); EXPECT_EQ(stat.putCount, stat.getCount); - // TODO: enale check below once cache for open IPC handles is implemented - // EXPECT_EQ(stat.openCount, 1); + EXPECT_EQ(stat.openCount, 2); EXPECT_EQ(stat.closeCount, stat.openCount); } @@ -183,6 +447,8 @@ TEST_P(umfIpcTest, ConcurrentGetPutHandles) { std::vector ptrs; constexpr size_t ALLOC_SIZE = 100; constexpr size_t NUM_POINTERS = 100; + umf::pool_unique_handle_t pool = makePool(); + for (size_t i = 0; i < NUM_POINTERS; ++i) { void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); EXPECT_NE(ptr, nullptr); @@ -218,18 +484,21 @@ TEST_P(umfIpcTest, ConcurrentGetPutHandles) { for (void *ptr : ptrs) { umf_result_t ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + EXPECT_EQ(ret, + get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); } - EXPECT_GE(stat.getCount, NUM_POINTERS); - EXPECT_LE(stat.getCount, NUM_POINTERS * NTHREADS); + pool.reset(nullptr); EXPECT_EQ(stat.putCount, stat.getCount); } TEST_P(umfIpcTest, ConcurrentOpenCloseHandles) { + umf_result_t ret; std::vector ptrs; constexpr size_t ALLOC_SIZE = 100; constexpr size_t NUM_POINTERS = 100; + umf::pool_unique_handle_t pool = makePool(); + for (size_t i = 0; i < NUM_POINTERS; ++i) { void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); EXPECT_NE(ptr, nullptr); @@ -240,21 +509,25 @@ TEST_P(umfIpcTest, ConcurrentOpenCloseHandles) { for (size_t i = 0; i < NUM_POINTERS; ++i) { umf_ipc_handle_t ipcHandle; size_t handleSize; - umf_result_t ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); + ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ipcHandles[i] = ipcHandle; } std::array, NTHREADS> openedIpcHandles; + umf_ipc_handler_handle_t ipcHandler = nullptr; + ret = umfPoolGetIPCHandler(pool.get(), &ipcHandler); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ipcHandler, nullptr); umf_test::syncthreads_barrier syncthreads(NTHREADS); - auto openHandlesFn = [this, &ipcHandles, &openedIpcHandles, - &syncthreads](size_t tid) { + auto openHandlesFn = [&ipcHandles, &openedIpcHandles, &syncthreads, + ipcHandler](size_t tid) { syncthreads(); for (auto ipcHandle : ipcHandles) { void *ptr; - umf_result_t ret = umfOpenIPCHandle(pool.get(), ipcHandle, &ptr); + umf_result_t ret = umfOpenIPCHandle(ipcHandler, ipcHandle, &ptr); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); openedIpcHandles[tid].push_back(ptr); } @@ -273,15 +546,20 @@ TEST_P(umfIpcTest, ConcurrentOpenCloseHandles) { umf_test::parallel_exec(NTHREADS, closeHandlesFn); for (auto ipcHandle : ipcHandles) { - umf_result_t ret = umfPutIPCHandle(ipcHandle); + ret = umfPutIPCHandle(ipcHandle); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); } for (void *ptr : ptrs) { - umf_result_t ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + ret = umfPoolFree(pool.get(), ptr); + EXPECT_EQ(ret, + get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); } + pool.reset(nullptr); + EXPECT_EQ(stat.getCount, stat.allocCount); + EXPECT_EQ(stat.putCount, stat.getCount); + EXPECT_EQ(stat.openCount, stat.allocCount); EXPECT_EQ(stat.openCount, stat.closeCount); } diff --git a/test/ipc_devdax_prov.sh b/test/ipc_devdax_prov.sh new file mode 100755 index 000000000..7c5ba3675 --- /dev/null +++ b/test/ipc_devdax_prov.sh @@ -0,0 +1,34 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + +#!/bin/bash + +set -e + +if [ "$UMF_TESTS_DEVDAX_PATH" = "" ]; then + echo "Test skipped, UMF_TESTS_DEVDAX_PATH is not set" + exit 0 +fi + +if [ "$UMF_TESTS_DEVDAX_SIZE" = "" ]; then + echo "Test skipped, UMF_TESTS_DEVDAX_SIZE is not set" + exit 0 +fi + +# port should be a number from the range <1024, 65535> +PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) + +UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" + +echo "Starting ipc_devdax_prov CONSUMER on port $PORT ..." +UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_devdax_prov_consumer $PORT & + +echo "Waiting 1 sec ..." +sleep 1 + +echo "Starting ipc_devdax_prov PRODUCER on port $PORT ..." +UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_devdax_prov_producer $PORT diff --git a/test/ipc_devdax_prov_consumer.c b/test/ipc_devdax_prov_consumer.c new file mode 100644 index 000000000..286b6de78 --- /dev/null +++ b/test/ipc_devdax_prov_consumer.c @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include +#include + +#include + +#include "ipc_common.h" +#include "ipc_os_prov_common.h" + +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + return -1; + } + + int ret = 0; + int port = atoi(argv[1]); + + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + if (path == NULL || path[0] == 0) { + fprintf(stderr, "Test skipped, UMF_TESTS_DEVDAX_PATH is not set\n"); + return 0; + } + + char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); + if (size == NULL || size[0] == 0) { + fprintf(stderr, "Test skipped, UMF_TESTS_DEVDAX_SIZE is not set\n"); + return 0; + } + + umf_devdax_memory_provider_params_handle_t devdax_params = NULL; + umf_result_t umf_result = + umfDevDaxMemoryProviderParamsCreate(&devdax_params, path, atol(size)); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[consumer] ERROR: creating DevDax Memory Provider " + "params failed\n"); + return -1; + } + + void *pool_params = NULL; + + ret = run_consumer(port, umfScalablePoolOps(), pool_params, + umfDevDaxMemoryProviderOps(), devdax_params, memcopy, + NULL); + + umfDevDaxMemoryProviderParamsDestroy(devdax_params); + + return ret; +} diff --git a/test/ipc_devdax_prov_producer.c b/test/ipc_devdax_prov_producer.c new file mode 100644 index 000000000..479c1f945 --- /dev/null +++ b/test/ipc_devdax_prov_producer.c @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include +#include + +#include + +#include "ipc_common.h" +#include "ipc_os_prov_common.h" + +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + return -1; + } + + int ret = 0; + int port = atoi(argv[1]); + + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + if (path == NULL || path[0] == 0) { + fprintf(stderr, "Test skipped, UMF_TESTS_DEVDAX_PATH is not set\n"); + return 0; + } + + char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); + if (size == NULL || size[0] == 0) { + fprintf(stderr, "Test skipped, UMF_TESTS_DEVDAX_SIZE is not set\n"); + return 0; + } + + umf_devdax_memory_provider_params_handle_t devdax_params = NULL; + umf_result_t umf_result = + umfDevDaxMemoryProviderParamsCreate(&devdax_params, path, atol(size)); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[producer] ERROR: creating DevDax Memory Provider " + "params failed\n"); + return -1; + } + + void *pool_params = NULL; + + ret = run_producer(port, umfScalablePoolOps(), pool_params, + umfDevDaxMemoryProviderOps(), devdax_params, memcopy, + NULL); + + umfDevDaxMemoryProviderParamsDestroy(devdax_params); + + return ret; +} diff --git a/test/ipc_file_prov.sh b/test/ipc_file_prov.sh new file mode 100755 index 000000000..b3e3091a8 --- /dev/null +++ b/test/ipc_file_prov.sh @@ -0,0 +1,32 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + +#!/bin/bash + +set -e + +FILE_NAME="/tmp/umf_file_provider_$$" + +# port should be a number from the range <1024, 65535> +PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) + +UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" + +# make sure the temp file does not exist +rm -f ${FILE_NAME} + +echo "Starting ipc_file_prov CONSUMER on port $PORT ..." +UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_file_prov_consumer $PORT ${FILE_NAME}_consumer & + +echo "Waiting 1 sec ..." +sleep 1 + +echo "Starting ipc_file_prov PRODUCER on port $PORT ..." +UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_file_prov_producer $PORT ${FILE_NAME}_producer + +# remove the SHM file +rm -f ${FILE_NAME} diff --git a/test/ipc_file_prov_consumer.c b/test/ipc_file_prov_consumer.c new file mode 100644 index 000000000..0c50991a9 --- /dev/null +++ b/test/ipc_file_prov_consumer.c @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include +#include +#include +#include +#include + +#include + +#include "ipc_common.h" +#include "ipc_os_prov_common.h" + +int main(int argc, char *argv[]) { + if (argc < 3) { + fprintf(stderr, "usage: %s \n", argv[0]); + return -1; + } + + int ret = 0; + int port = atoi(argv[1]); + char *file_name = argv[2]; + + umf_file_memory_provider_params_handle_t file_params = NULL; + umf_result_t umf_result = + umfFileMemoryProviderParamsCreate(&file_params, file_name); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf( + stderr, + "[consumer] ERROR: creating File Memory Provider params failed\n"); + return -1; + } + + umf_result = umfFileMemoryProviderParamsSetVisibility(file_params, + UMF_MEM_MAP_SHARED); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[consumer] ERROR: setting File Memory Provider " + "visibility failed\n"); + ret = -1; + goto destroy_provider_params; + } + + void *pool_params = NULL; + + ret = run_consumer(port, umfScalablePoolOps(), pool_params, + umfFileMemoryProviderOps(), file_params, memcopy, NULL); + +destroy_provider_params: + umfFileMemoryProviderParamsDestroy(file_params); + + return ret; +} diff --git a/test/ipc_file_prov_fsdax.sh b/test/ipc_file_prov_fsdax.sh new file mode 100755 index 000000000..4e908869b --- /dev/null +++ b/test/ipc_file_prov_fsdax.sh @@ -0,0 +1,43 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + +#!/bin/bash + +set -e + +if [ "$UMF_TESTS_FSDAX_PATH" = "" ]; then + echo "$0: Test skipped, UMF_TESTS_FSDAX_PATH is not set"; + exit 0 +fi + +if [ "$UMF_TESTS_FSDAX_PATH_2" = "" ]; then + echo "$0: Test skipped, UMF_TESTS_FSDAX_PATH_2 is not set"; + exit 0 +fi + +FILE_NAME="$UMF_TESTS_FSDAX_PATH" +FILE_NAME_2="$UMF_TESTS_FSDAX_PATH_2" + +# port should be a number from the range <1024, 65535> +PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) + +UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" + +# make sure the temp file does not exist +rm -f ${FILE_NAME} + +echo "Starting ipc_file_prov_fsdax CONSUMER on port $PORT ..." +UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_file_prov_consumer $PORT $FILE_NAME & + +echo "Waiting 1 sec ..." +sleep 1 + +echo "Starting ipc_file_prov_fsdax PRODUCER on port $PORT ..." +UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_file_prov_producer $PORT $FILE_NAME_2 + +# remove the SHM file +rm -f ${FILE_NAME} diff --git a/test/ipc_file_prov_producer.c b/test/ipc_file_prov_producer.c new file mode 100644 index 000000000..c620437e0 --- /dev/null +++ b/test/ipc_file_prov_producer.c @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include +#include +#include +#include +#include + +#include + +#include "ipc_common.h" +#include "ipc_os_prov_common.h" + +int main(int argc, char *argv[]) { + if (argc < 3) { + fprintf(stderr, "usage: %s \n", argv[0]); + return -1; + } + + int ret = 0; + int port = atoi(argv[1]); + char *file_name = argv[2]; + + umf_file_memory_provider_params_handle_t file_params = NULL; + umf_result_t umf_result = + umfFileMemoryProviderParamsCreate(&file_params, file_name); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf( + stderr, + "[producer] ERROR: creating File Memory Provider params failed\n"); + return -1; + } + + umf_result = umfFileMemoryProviderParamsSetVisibility(file_params, + UMF_MEM_MAP_SHARED); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[producer] ERROR: setting File Memory Provider " + "visibility failed\n"); + ret = -1; + goto destroy_provider_params; + } + + void *pool_params = NULL; + + ret = run_producer(port, umfScalablePoolOps(), pool_params, + umfFileMemoryProviderOps(), file_params, memcopy, NULL); + +destroy_provider_params: + umfFileMemoryProviderParamsDestroy(file_params); + + return ret; +} diff --git a/test/ipc_negative.cpp b/test/ipc_negative.cpp new file mode 100644 index 000000000..5c4cccf22 --- /dev/null +++ b/test/ipc_negative.cpp @@ -0,0 +1,57 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "base.hpp" +#include "pool_null.h" +#include "provider_null.h" + +#include +#include +#include + +#include + +struct IpcNotSupported : umf_test::test { + protected: + void SetUp() override { + umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; + provider_ops.ipc.get_ipc_handle_size = nullptr; + provider_ops.ipc.get_ipc_handle = nullptr; + provider_ops.ipc.open_ipc_handle = nullptr; + provider_ops.ipc.put_ipc_handle = nullptr; + provider_ops.ipc.close_ipc_handle = nullptr; + + umf_result_t ret; + ret = umfMemoryProviderCreate(&provider_ops, nullptr, &provider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfPoolCreate(&UMF_NULL_POOL_OPS, provider, nullptr, + UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &pool); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + void TearDown() override { umfPoolDestroy(pool); } + + umf_memory_provider_handle_t provider; + umf_memory_pool_handle_t pool; +}; + +TEST_F(IpcNotSupported, GetIPCHandleSizeNotSupported) { + size_t size; + auto ret = umfPoolGetIPCHandleSize(pool, &size); + EXPECT_EQ(ret, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +TEST_F(IpcNotSupported, OpenIPCHandleNotSupported) { + // This data doesn't matter, as the ipc call is no-op + std::array ipc_data = {}; + void *ptr; + umf_ipc_handler_handle_t ipc_handler; + auto ret = umfPoolGetIPCHandler(pool, &ipc_handler); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfOpenIPCHandle(ipc_handler, + reinterpret_cast(&ipc_data), &ptr); + EXPECT_EQ(ret, UMF_RESULT_ERROR_NOT_SUPPORTED); +} diff --git a/test/ipc_os_prov_consumer.c b/test/ipc_os_prov_consumer.c index 7df1e7049..f3f8d0090 100644 --- a/test/ipc_os_prov_consumer.c +++ b/test/ipc_os_prov_consumer.c @@ -19,16 +19,44 @@ int main(int argc, char *argv[]) { return -1; } + int ret = 0; int port = atoi(argv[1]); - umf_os_memory_provider_params_t os_params; + umf_os_memory_provider_params_handle_t os_params = NULL; + + umf_result_t umf_result = umfOsMemoryProviderParamsCreate(&os_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf( + stderr, + "[consumer] ERROR: creating OS memory provider params failed\n"); + return -1; + } + + umf_result = + umfOsMemoryProviderParamsSetVisibility(os_params, UMF_MEM_MAP_SHARED); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[consumer] ERROR: setting visibility mode failed\n"); + ret = -1; + goto destroy_provider_params; + } - os_params = umfOsMemoryProviderParamsDefault(); - os_params.visibility = UMF_MEM_MAP_SHARED; if (argc >= 3) { - os_params.shm_name = argv[2]; + umf_result = umfOsMemoryProviderParamsSetShmName(os_params, argv[2]); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "[consumer] ERROR: setting shared memory name failed\n"); + ret = -1; + goto destroy_provider_params; + } } - return run_consumer(port, umfOsMemoryProviderOps(), &os_params, memcopy, - NULL); + void *pool_params = NULL; + + ret = run_consumer(port, umfScalablePoolOps(), pool_params, + umfOsMemoryProviderOps(), os_params, memcopy, NULL); + +destroy_provider_params: + umfOsMemoryProviderParamsDestroy(os_params); + + return ret; } diff --git a/test/ipc_os_prov_producer.c b/test/ipc_os_prov_producer.c index a9a2ab56c..890f1eb3e 100644 --- a/test/ipc_os_prov_producer.c +++ b/test/ipc_os_prov_producer.c @@ -19,16 +19,44 @@ int main(int argc, char *argv[]) { return -1; } + int ret = 0; int port = atoi(argv[1]); - umf_os_memory_provider_params_t os_params; + umf_os_memory_provider_params_handle_t os_params = NULL; + + umf_result_t umf_result = umfOsMemoryProviderParamsCreate(&os_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf( + stderr, + "[producer] ERROR: creating OS memory provider params failed\n"); + return -1; + } + + umf_result = + umfOsMemoryProviderParamsSetVisibility(os_params, UMF_MEM_MAP_SHARED); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[producer] ERROR: setting visibility mode failed\n"); + ret = -1; + goto destroy_provider_params; + } - os_params = umfOsMemoryProviderParamsDefault(); - os_params.visibility = UMF_MEM_MAP_SHARED; if (argc >= 3) { - os_params.shm_name = argv[2]; + umf_result = umfOsMemoryProviderParamsSetShmName(os_params, argv[2]); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "[producer] ERROR: setting shared memory name failed\n"); + ret = -1; + goto destroy_provider_params; + } } - return run_producer(port, umfOsMemoryProviderOps(), &os_params, memcopy, - NULL); + void *pool_params = NULL; + + ret = run_producer(port, umfScalablePoolOps(), pool_params, + umfOsMemoryProviderOps(), os_params, memcopy, NULL); + +destroy_provider_params: + umfOsMemoryProviderParamsDestroy(os_params); + + return ret; } diff --git a/test/ipc_os_prov_proxy.c b/test/ipc_os_prov_proxy.c new file mode 100644 index 000000000..0a4b64442 --- /dev/null +++ b/test/ipc_os_prov_proxy.c @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "ipc_common.h" +#include "utils_load_library.h" + +umf_result_t (*pfnGetIPCHandle)(const void *ptr, umf_ipc_handle_t *umfIPCHandle, + size_t *size); +umf_result_t (*pfnPutIPCHandle)(umf_ipc_handle_t umfIPCHandle); + +// This is a test for a scenario where a user process is started using the +// LD_PRELOAD with the UMF Proxy Lib and this process uses UMF by loading +// libumf.so at runtime. +// In this test, we expect that all allocations made by the process will be +// handled by UMF in the Proxy Lib and added to the UMF tracker so that they +// can be used later in the UMF IPC API. +int main(int argc, char *argv[]) { + int ret = 0; + umf_result_t umf_result = UMF_RESULT_ERROR_UNKNOWN; + int producer_socket = -1; + const size_t MSG_SIZE = 2048; + char consumer_message[MSG_SIZE]; + + if (argc < 2) { + fprintf(stderr, "usage: %s [shm_name]\n", argv[0]); + return -1; + } + + int port = atoi(argv[1]); + + int fd = open("/proc/self/maps", O_RDONLY); + if (fd == -1) { + return -1; + } + + // read the "/proc/self/maps" file until the "libumf_proxy.so" of the maps + // is found or EOF is reached. + const size_t SIZE_BUF = 8192; + char buf[SIZE_BUF]; + ssize_t nbytes = 1; + char *found = NULL; + while (nbytes > 0 && found == NULL) { + memset(buf, 0, SIZE_BUF); // erase previous data + nbytes = read(fd, buf, SIZE_BUF); + if (nbytes <= 0) { + break; + } + found = strstr(buf, "libumf_proxy.so"); + } + (void)close(fd); + + if (found == NULL) { + fprintf( + stderr, + "test binary not run under LD_PRELOAD with \"libumf_proxy.so\"\n"); + return -1; + } + + // open the UMF library and get umfGetIPCHandle() function + const char *umf_lib_name = "libumf.so"; + void *umf_lib_handle = utils_open_library(umf_lib_name, 0); + if (umf_lib_handle == NULL) { + fprintf(stderr, "utils_open_library: UMF library not found (%s)\n", + umf_lib_name); + return -1; + } + + *(void **)&pfnGetIPCHandle = + utils_get_symbol_addr(umf_lib_handle, "umfGetIPCHandle", umf_lib_name); + if (pfnGetIPCHandle == NULL) { + ret = -1; + goto err_close_lib; + } + + *(void **)&pfnPutIPCHandle = + utils_get_symbol_addr(umf_lib_handle, "umfPutIPCHandle", umf_lib_name); + if (pfnPutIPCHandle == NULL) { + ret = -1; + goto err_close_lib; + } + + // create simple allocation - it should be added to the UMF tracker if the + // process was launched under UMF Proxy Lib + size_t size = 2137; + void *ptr = malloc(size); + if (ptr == NULL) { + fprintf(stderr, "malloc() failed!\n"); + ret = -1; + goto err_close_lib; + } + + fprintf(stderr, "Allocated memory - %zu\n", size); + size_t val = 144; + size_t expected_val = val / 2; + *(size_t *)ptr = val; + + // get IPC handle of the allocation + umf_ipc_handle_t ipc_handle = NULL; + size_t ipc_handle_size = 0; + umf_result_t res = pfnGetIPCHandle(ptr, &ipc_handle, &ipc_handle_size); + if (res != UMF_RESULT_SUCCESS) { + fprintf(stderr, "pfnGetIPCHandle() failed!\n"); + ret = -1; + goto err_free_mem; + } + + // check if we got valid data + if (ipc_handle == NULL || ipc_handle_size == 0) { + fprintf(stderr, "pfnGetIPCHandle() couldn't find the handle data!\n"); + ret = -1; + goto err_free_mem; + } + + fprintf(stderr, "Got IPCHandle for memory - %p | size - %zu\n", + (void *)ipc_handle, ipc_handle_size); + + producer_socket = producer_connect(port); + if (producer_socket < 0) { + goto err_PutIPCHandle; + } + + // send the ipc_handle_size to the consumer + ssize_t len = + send(producer_socket, &ipc_handle_size, sizeof(ipc_handle_size), 0); + if (len < 0) { + fprintf(stderr, "[producer] ERROR: unable to send the ipc_handle_size " + "to the consumer\n"); + goto err_close_producer_socket; + } + + fprintf(stderr, + "[producer] Sent the size of the IPC handle (%zu) to the consumer " + "(sent %zu bytes)\n", + ipc_handle_size, len); + + // zero the consumer_message buffer + memset(consumer_message, 0, sizeof(consumer_message)); + + // receive the consumer's confirmation - IPC handle size + len = recv(producer_socket, consumer_message, sizeof(consumer_message), 0); + if (len < 0) { + fprintf(stderr, "[producer] ERROR: error while receiving the " + "confirmation from the consumer\n"); + goto err_close_producer_socket; + } + + size_t conf_IPC_handle_size = *(size_t *)consumer_message; + if (conf_IPC_handle_size == ipc_handle_size) { + fprintf(stderr, + "[producer] Received the correct confirmation (%zu) from the " + "consumer (%zu bytes)\n", + conf_IPC_handle_size, len); + } else { + fprintf(stderr, + "[producer] Received an INCORRECT confirmation (%zu) from the " + "consumer (%zu bytes)\n", + conf_IPC_handle_size, len); + goto err_close_producer_socket; + } + + // send the ipc_handle of ipc_handle_size to the consumer + if (send(producer_socket, ipc_handle, ipc_handle_size, 0) < 0) { + fprintf(stderr, "[producer] ERROR: unable to send the ipc_handle to " + "the consumer\n"); + goto err_close_producer_socket; + } + + fprintf(stderr, + "[producer] Sent the IPC handle to the consumer (%zu bytes)\n", + ipc_handle_size); + + // zero the consumer_message buffer + memset(consumer_message, 0, sizeof(consumer_message)); + + // receive the consumer's response + if (recv(producer_socket, consumer_message, sizeof(consumer_message) - 1, + 0) < 0) { + fprintf( + stderr, + "[producer] ERROR: error while receiving the consumer's message\n"); + goto err_close_producer_socket; + } + + fprintf(stderr, "[producer] Received the consumer's response: \"%s\"\n", + consumer_message); + + if (strncmp(consumer_message, "SKIP", 5 /* length of "SKIP" + 1 */) == 0) { + fprintf(stderr, "[producer] SKIP: received the 'SKIP' response from " + "consumer, skipping ...\n"); + ret = 1; + goto err_close_producer_socket; + } + + // read a new value - the expected correct value val / 2 + volatile unsigned long long new_val = *(unsigned long long *)ptr; + if (new_val == expected_val) { + ret = 0; // got the correct value - success! + fprintf( + stderr, + "[producer] The consumer wrote the correct value (the old one / 2) " + "to my shared memory: %llu\n", + new_val); + } else { + fprintf( + stderr, + "[producer] ERROR: The consumer did NOT write the correct value " + "(the old one / 2 = %zu) to my shared memory: %llu\n", + expected_val, new_val); + } + +err_close_producer_socket: + close(producer_socket); + +err_PutIPCHandle: + umf_result = pfnPutIPCHandle(ipc_handle); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "[producer] ERROR: putting the IPC handle failed\n"); + } + + fprintf(stderr, "[producer] Put the IPC handle\n"); + + if (ret == 0) { + fprintf(stderr, "[producer] Shutting down (status OK) ...\n"); + } else if (ret == 1) { + fprintf(stderr, "[producer] Shutting down (status SKIP) ...\n"); + ret = 0; + } else { + fprintf(stderr, "[producer] Shutting down (status ERROR) ...\n"); + } + + return ret; + +err_free_mem: + free(ptr); + +err_close_lib: + utils_close_library(umf_lib_handle); + + return ret; +} diff --git a/test/ipc_os_prov_proxy.sh b/test/ipc_os_prov_proxy.sh new file mode 100755 index 000000000..86b95a235 --- /dev/null +++ b/test/ipc_os_prov_proxy.sh @@ -0,0 +1,26 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + +#!/bin/bash + +set -e + +UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" +UMF_PROXY_VAL="page.disposition=shared-shm" +LD_PRELOAD_VAL="../lib/libumf_proxy.so" + +# port should be a number from the range <1024, 65535> +PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) + +echo "Starting CONSUMER on port $PORT ..." +UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_consumer $PORT & + +echo "Waiting 1 sec ..." +sleep 1 + +echo "Starting ipc_os_prov_proxy PRODUCER on port $PORT ..." +LD_PRELOAD=$LD_PRELOAD_VAL UMF_LOG=$UMF_LOG_VAL UMF_PROXY=$UMF_PROXY_VAL ./umf_test-ipc_os_prov_proxy $PORT diff --git a/test/ipc_os_prov_shm.sh b/test/ipc_os_prov_shm.sh index 088d77169..efa2de35a 100755 --- a/test/ipc_os_prov_shm.sh +++ b/test/ipc_os_prov_shm.sh @@ -20,7 +20,7 @@ UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" rm -f /dev/shm/${SHM_NAME} echo "Starting ipc_os_prov_shm CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_consumer $PORT $SHM_NAME & +UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 diff --git a/test/malloc_compliance_tests.cpp b/test/malloc_compliance_tests.cpp index c3c67ae21..06e3b5dd7 100644 --- a/test/malloc_compliance_tests.cpp +++ b/test/malloc_compliance_tests.cpp @@ -85,12 +85,11 @@ void calloc_compliance_test(umf_memory_pool_handle_t hPool) { // Checking that the memory returned by calloc is zero filled for (int i = 0; i < ITERATIONS; i++) { alloc_size = rand_alloc_size(MAX_ALLOC_SIZE); - alloc_ptr[i] = umfPoolCalloc(hPool, i + 1, alloc_size); + alloc_ptr[i] = umfPoolCalloc(hPool, 2, alloc_size); ASSERT_NE(alloc_ptr[i], nullptr) << "calloc returned NULL, couldn't allocate much memory"; - ASSERT_NE(bufferIsFilledWithChar(alloc_ptr[i], alloc_size * (i + 1), 0), - 0) + ASSERT_NE(bufferIsFilledWithChar(alloc_ptr[i], 2 * alloc_size, 0), 0) << "Memory returned by calloc was not zeroed"; } free_memory(hPool, alloc_ptr); diff --git a/test/memoryPoolAPI.cpp b/test/memoryPoolAPI.cpp index 0fb2a4422..1c6d83f2a 100644 --- a/test/memoryPoolAPI.cpp +++ b/test/memoryPoolAPI.cpp @@ -139,7 +139,8 @@ TEST_P(umfPoolWithCreateFlagsTest, memoryPoolWithCustomProvider) { } TEST_F(test, retrieveMemoryProvider) { - umf_memory_provider_handle_t provider = (umf_memory_provider_handle_t)0x1; + auto nullProvider = umf_test::wrapProviderUnique(nullProviderCreate()); + umf_memory_provider_handle_t provider = nullProvider.get(); auto pool = wrapPoolUnique(createPoolChecked(umfProxyPoolOps(), provider, nullptr)); @@ -155,7 +156,7 @@ TEST_F(test, BasicPoolByPtrTest) { umf_memory_provider_handle_t provider; umf_result_t ret = - umfMemoryProviderCreate(&MALLOC_PROVIDER_OPS, NULL, &provider); + umfMemoryProviderCreate(&BA_GLOBAL_PROVIDER_OPS, NULL, &provider); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); auto pool = wrapPoolUnique(createPoolChecked(umfProxyPoolOps(), provider, nullptr, @@ -180,14 +181,16 @@ TEST_F(test, BasicPoolByPtrTest) { INSTANTIATE_TEST_SUITE_P( mallocPoolTest, umfPoolTest, ::testing::Values(poolCreateExtParams{&MALLOC_POOL_OPS, nullptr, - &UMF_NULL_PROVIDER_OPS, nullptr}, + &UMF_NULL_PROVIDER_OPS, nullptr, + nullptr}, poolCreateExtParams{umfProxyPoolOps(), nullptr, - &MALLOC_PROVIDER_OPS, nullptr})); + &BA_GLOBAL_PROVIDER_OPS, nullptr, + nullptr})); INSTANTIATE_TEST_SUITE_P(mallocMultiPoolTest, umfMultiPoolTest, ::testing::Values(poolCreateExtParams{ - umfProxyPoolOps(), nullptr, &MALLOC_PROVIDER_OPS, - nullptr})); + umfProxyPoolOps(), nullptr, + &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); INSTANTIATE_TEST_SUITE_P(umfPoolWithCreateFlagsTest, umfPoolWithCreateFlagsTest, ::testing::Values(0, @@ -258,7 +261,8 @@ TEST_P(poolInitializeTest, errorPropagation) { } TEST_F(test, retrieveMemoryProvidersError) { - umf_memory_provider_handle_t provider = (umf_memory_provider_handle_t)0x1; + auto nullProvider = umf_test::wrapProviderUnique(nullProviderCreate()); + umf_memory_provider_handle_t provider = nullProvider.get(); auto pool = wrapPoolUnique(createPoolChecked(umfProxyPoolOps(), provider, nullptr)); diff --git a/test/memoryProviderAPI.cpp b/test/memoryProviderAPI.cpp index 144aa4d55..866ae6dae 100644 --- a/test/memoryProviderAPI.cpp +++ b/test/memoryProviderAPI.cpp @@ -43,14 +43,15 @@ TEST_F(test, memoryProviderTrace) { ASSERT_EQ(calls["get_last_native_error"], 1); ASSERT_EQ(calls.size(), ++call_count); + size_t page_size; ret = umfMemoryProviderGetRecommendedPageSize(tracingProvider.get(), 0, - nullptr); + &page_size); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_EQ(calls["get_recommended_page_size"], 1); ASSERT_EQ(calls.size(), ++call_count); ret = umfMemoryProviderGetMinPageSize(tracingProvider.get(), nullptr, - nullptr); + &page_size); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_EQ(calls["get_min_page_size"], 1); ASSERT_EQ(calls.size(), ++call_count); @@ -60,12 +61,14 @@ TEST_F(test, memoryProviderTrace) { ASSERT_EQ(calls.size(), ++call_count); ASSERT_EQ(std::string(pName), std::string("null")); - ret = umfMemoryProviderPurgeLazy(tracingProvider.get(), nullptr, 0); + ret = umfMemoryProviderPurgeLazy(tracingProvider.get(), &page_size, + sizeof(page_size)); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_EQ(calls["purge_lazy"], 1); ASSERT_EQ(calls.size(), ++call_count); - ret = umfMemoryProviderPurgeForce(tracingProvider.get(), nullptr, 0); + ret = umfMemoryProviderPurgeForce(tracingProvider.get(), &page_size, + sizeof(page_size)); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_EQ(calls["purge_force"], 1); ASSERT_EQ(calls.size(), ++call_count); @@ -86,6 +89,19 @@ TEST_F(test, memoryProviderTrace) { ASSERT_EQ(calls.size(), ++call_count); } +TEST_F(test, memoryProviderOpsNullFreeField) { + umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; + provider_ops.ext.free = nullptr; + umf_memory_provider_handle_t hProvider; + auto ret = umfMemoryProviderCreate(&provider_ops, nullptr, &hProvider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfMemoryProviderFree(hProvider, nullptr, 0); + ASSERT_EQ(ret, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umfMemoryProviderDestroy(hProvider); +} + TEST_F(test, memoryProviderOpsNullPurgeLazyField) { umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; provider_ops.ext.purge_lazy = nullptr; @@ -94,7 +110,7 @@ TEST_F(test, memoryProviderOpsNullPurgeLazyField) { ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ret = umfMemoryProviderPurgeLazy(hProvider, nullptr, 0); - ASSERT_EQ(ret, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); umfMemoryProviderDestroy(hProvider); } @@ -107,7 +123,7 @@ TEST_F(test, memoryProviderOpsNullPurgeForceField) { ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ret = umfMemoryProviderPurgeForce(hProvider, nullptr, 0); - ASSERT_EQ(ret, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); umfMemoryProviderDestroy(hProvider); } @@ -133,6 +149,39 @@ TEST_F(test, memoryProviderOpsNullAllocationSplitAllocationMergeFields) { umfMemoryProviderDestroy(hProvider); } +TEST_F(test, memoryProviderOpsNullAllIPCFields) { + umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; + provider_ops.ipc.get_ipc_handle_size = nullptr; + provider_ops.ipc.get_ipc_handle = nullptr; + provider_ops.ipc.put_ipc_handle = nullptr; + provider_ops.ipc.open_ipc_handle = nullptr; + provider_ops.ipc.close_ipc_handle = nullptr; + + umf_memory_provider_handle_t hProvider; + auto ret = umfMemoryProviderCreate(&provider_ops, nullptr, &hProvider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + size_t size; + ret = umfMemoryProviderGetIPCHandleSize(hProvider, &size); + ASSERT_EQ(ret, UMF_RESULT_ERROR_NOT_SUPPORTED); + + void *ptr = nullptr; + void *providerIpcData = nullptr; + ret = umfMemoryProviderGetIPCHandle(hProvider, ptr, size, providerIpcData); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfMemoryProviderPutIPCHandle(hProvider, providerIpcData); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfMemoryProviderOpenIPCHandle(hProvider, providerIpcData, &ptr); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfMemoryProviderCloseIPCHandle(hProvider, ptr, size); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umfMemoryProviderDestroy(hProvider); +} + ////////////////// Negative test cases ///////////////// TEST_F(test, memoryProviderCreateNullOps) { @@ -155,14 +204,6 @@ TEST_F(test, memoryProviderOpsNullAllocField) { ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); } -TEST_F(test, memoryProviderOpsNullFreeField) { - umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; - provider_ops.free = nullptr; - umf_memory_provider_handle_t hProvider; - auto ret = umfMemoryProviderCreate(&provider_ops, nullptr, &hProvider); - ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); -} - TEST_F(test, memoryProviderOpsNullGetLastNativeErrorField) { umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; provider_ops.get_last_native_error = nullptr; @@ -251,6 +292,37 @@ TEST_F(test, memoryProviderOpsNullCloseIpcHandle) { ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); } +TEST_F(test, memoryProviderOpsNullAllocationSplitAllocationMergeNegative) { + umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; + umf_memory_provider_handle_t hProvider; + + auto ret = umfMemoryProviderCreate(&provider_ops, nullptr, &hProvider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfMemoryProviderAllocationSplit(hProvider, nullptr, 2 * 4096, 4096); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = + umfMemoryProviderAllocationMerge(hProvider, nullptr, nullptr, 2 * 4096); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + void *lowPtr = (void *)0xBAD; + void *highPtr = (void *)((uintptr_t)lowPtr + 4096); + size_t totalSize = 0; + ret = + umfMemoryProviderAllocationMerge(hProvider, lowPtr, highPtr, totalSize); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + totalSize = 4096; + lowPtr = (void *)0xBAD; + highPtr = (void *)((uintptr_t)lowPtr + 2 * totalSize); + ret = + umfMemoryProviderAllocationMerge(hProvider, lowPtr, highPtr, totalSize); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umfMemoryProviderDestroy(hProvider); +} + struct providerInitializeTest : umf_test::test, ::testing::WithParamInterface {}; diff --git a/test/memspaces/memspace.cpp b/test/memspaces/memspace.cpp new file mode 100644 index 000000000..412c5beb7 --- /dev/null +++ b/test/memspaces/memspace.cpp @@ -0,0 +1,43 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "memspace_helpers.hpp" + +using umf_test::test; + +TEST_F(test, memspaceNewInvalid) { + auto ret = umfMemspaceNew(NULL); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +class emptyMemspace : public testing::Test { + public: + umf_memspace_handle_t memspace; + + void SetUp() override { + auto ret = umfMemspaceNew(&memspace); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(memspace, nullptr); + } + void TearDown() override { umfMemspaceDestroy(memspace); } +}; + +TEST_F(emptyMemspace, basic) { + size_t len = umfMemspaceMemtargetNum(memspace); + ASSERT_EQ(len, 0); +} + +TEST_F(emptyMemspace, create_pool) { + umf_memory_pool_handle_t pool = nullptr; + auto ret = umfPoolCreateFromMemspace(memspace, NULL, &pool); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(pool, nullptr); +} + +TEST_F(emptyMemspace, create_provider) { + umf_memory_provider_handle_t provider = nullptr; + auto ret = umfMemoryProviderCreateFromMemspace(memspace, NULL, &provider); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(provider, nullptr); +} diff --git a/test/memspaces/memspace_fixtures.hpp b/test/memspaces/memspace_fixtures.hpp index fac50b031..da174c4f1 100644 --- a/test/memspaces/memspace_fixtures.hpp +++ b/test/memspaces/memspace_fixtures.hpp @@ -5,6 +5,7 @@ #ifndef UMF_TEST_MEMSPACE_FIXTURES_HPP #define UMF_TEST_MEMSPACE_FIXTURES_HPP +#include #include #include #include @@ -30,7 +31,7 @@ struct numaNodesTest : ::umf_test::test { ::umf_test::test::SetUp(); if (numa_available() == -1 || numa_all_nodes_ptr == nullptr) { - GTEST_FAIL() << "Failed to initialize libnuma"; + GTEST_SKIP() << "No available NUMA support; skipped"; } int maxNode = numa_max_node(); @@ -50,7 +51,7 @@ struct numaNodesTest : ::umf_test::test { unsigned long maxNodeId = 0; }; -using isQuerySupportedFunc = bool (*)(size_t); +using isQuerySupportedFunc = void (*)(size_t); using memspaceGetFunc = umf_const_memspace_handle_t (*)(); using memspaceGetParams = std::tuple; @@ -59,10 +60,15 @@ struct memspaceGetTest : ::numaNodesTest, void SetUp() override { ::numaNodesTest::SetUp(); + if (numa_available() == -1 || numa_all_nodes_ptr == nullptr) { + GTEST_SKIP() << "No available NUMA support; skipped"; + } + auto [isQuerySupported, memspaceGet] = this->GetParam(); + isQuerySupported(nodeIds.front()); - if (!isQuerySupported(nodeIds.front())) { - GTEST_SKIP(); + if (IS_SKIPPED_OR_FAILED()) { + return; } hMemspace = memspaceGet(); @@ -76,8 +82,20 @@ struct memspaceProviderTest : ::memspaceGetTest { void SetUp() override { ::memspaceGetTest::SetUp(); - if (::memspaceGetTest::IsSkipped()) { - GTEST_SKIP(); + if (numa_available() == -1 || numa_all_nodes_ptr == nullptr) { + GTEST_SKIP() << "No available NUMA support; skipped"; + } + + auto [isQuerySupported, memspaceGet] = ::memspaceGetTest::GetParam(); + (void)memspaceGet; + + isQuerySupported(nodeIds.front()); + + // The test has been marked as skipped in isQuerySupported, + // repeating GTEST_SKIP in fixture would only duplicate + // the output message + if (IS_SKIPPED_OR_FAILED()) { + return; } umf_result_t ret = @@ -97,6 +115,18 @@ struct memspaceProviderTest : ::memspaceGetTest { umf_memory_provider_handle_t hProvider = nullptr; }; +struct numaNodesCapacityTest : numaNodesTest { + void SetUp() override { + numaNodesTest::SetUp(); + + for (auto nodeId : nodeIds) { + capacities.push_back(numa_node_size64(nodeId, nullptr)); + } + } + + std::vector capacities; +}; + GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(memspaceGetTest); GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(memspaceProviderTest); @@ -104,8 +134,8 @@ TEST_P(memspaceGetTest, providerFromMemspace) { umf_memory_provider_handle_t hProvider = nullptr; umf_result_t ret = umfMemoryProviderCreateFromMemspace(hMemspace, nullptr, &hProvider); - UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); - UT_ASSERTne(hProvider, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider, nullptr); umfMemoryProviderDestroy(hProvider); } @@ -116,15 +146,15 @@ TEST_P(memspaceProviderTest, allocFree) { size_t alignment = 0; umf_result_t ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); - UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); - UT_ASSERTne(ptr, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); // Access the allocation, so that all the pages associated with it are // allocated on some NUMA node. memset(ptr, 0xFF, size); ret = umfMemoryProviderFree(hProvider, ptr, size); - UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); } static std::vector getAllCpus() { @@ -144,20 +174,20 @@ TEST_P(memspaceProviderTest, allocLocalMt) { auto pinAllocValidate = [&](umf_memory_provider_handle_t hProvider, int cpu) { hwloc_topology_t topology = NULL; - UT_ASSERTeq(hwloc_topology_init(&topology), 0); - UT_ASSERTeq(hwloc_topology_load(topology), 0); + ASSERT_EQ(hwloc_topology_init(&topology), 0); + ASSERT_EQ(hwloc_topology_load(topology), 0); // Pin current thread to the provided CPU. hwloc_cpuset_t pinCpuset = hwloc_bitmap_alloc(); - UT_ASSERTeq(hwloc_bitmap_set(pinCpuset, cpu), 0); - UT_ASSERTeq( - hwloc_set_cpubind(topology, pinCpuset, HWLOC_CPUBIND_THREAD), 0); + ASSERT_EQ(hwloc_bitmap_set(pinCpuset, cpu), 0); + ASSERT_EQ(hwloc_set_cpubind(topology, pinCpuset, HWLOC_CPUBIND_THREAD), + 0); // Confirm that the thread is pinned to the provided CPU. hwloc_cpuset_t curCpuset = hwloc_bitmap_alloc(); - UT_ASSERTeq( - hwloc_get_cpubind(topology, curCpuset, HWLOC_CPUBIND_THREAD), 0); - UT_ASSERT(hwloc_bitmap_isequal(curCpuset, pinCpuset)); + ASSERT_EQ(hwloc_get_cpubind(topology, curCpuset, HWLOC_CPUBIND_THREAD), + 0); + ASSERT_TRUE(hwloc_bitmap_isequal(curCpuset, pinCpuset)); hwloc_bitmap_free(curCpuset); hwloc_bitmap_free(pinCpuset); @@ -168,8 +198,8 @@ TEST_P(memspaceProviderTest, allocLocalMt) { umf_result_t ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); - UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); - UT_ASSERTne(ptr, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); // Access the allocation, so that all the pages associated with it are // allocated on some NUMA node. @@ -179,7 +209,8 @@ TEST_P(memspaceProviderTest, allocLocalMt) { int mode = -1; std::vector boundNodeIds; size_t allocNodeId = SIZE_MAX; - getAllocationPolicy(ptr, maxNodeId, mode, boundNodeIds, allocNodeId); + ASSERT_NO_FATAL_FAILURE(getAllocationPolicy(ptr, maxNodeId, mode, + boundNodeIds, allocNodeId)); // Get the CPUs associated with the specified NUMA node. hwloc_obj_t allocNodeObj = @@ -190,20 +221,20 @@ TEST_P(memspaceProviderTest, allocLocalMt) { hwloc_location loc; loc.location.object = allocNodeObj, loc.type = hwloc_location_type_alias::HWLOC_LOCATION_TYPE_OBJECT; - UT_ASSERTeq(hwloc_get_local_numanode_objs(topology, &loc, &nNodes, - localNodes.data(), 0), - 0); - UT_ASSERT(nNodes <= MAX_NODES); + ASSERT_EQ(hwloc_get_local_numanode_objs(topology, &loc, &nNodes, + localNodes.data(), 0), + 0); + ASSERT_LE(nNodes, MAX_NODES); // Confirm that the allocation from this thread was made to a local // NUMA node. - UT_ASSERT(std::any_of(localNodes.begin(), localNodes.end(), - [&allocNodeObj](hwloc_obj_t node) { - return node == allocNodeObj; - })); + ASSERT_TRUE(std::any_of(localNodes.begin(), localNodes.end(), + [&allocNodeObj](hwloc_obj_t node) { + return node == allocNodeObj; + })); ret = umfMemoryProviderFree(hProvider, ptr, size); - UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); hwloc_topology_destroy(topology); }; diff --git a/test/memspaces/memspace_helpers.hpp b/test/memspaces/memspace_helpers.hpp index 1adee2607..5385e5344 100644 --- a/test/memspaces/memspace_helpers.hpp +++ b/test/memspaces/memspace_helpers.hpp @@ -36,10 +36,10 @@ void getAllocationPolicy(void *ptr, unsigned long maxNodeId, int &mode, // Get policy and the nodes associated with this policy. int ret = get_mempolicy(&memMode, memNodeMasks.data(), nrUlongs * bitsPerUlong, ptr, MPOL_F_ADDR); - UT_ASSERTeq(ret, 0); + ASSERT_EQ(ret, 0); mode = memMode; - UT_ASSERTeq(boundNodeIds.size(), 0); + ASSERT_EQ(boundNodeIds.size(), 0); for (size_t i = 0; i <= maxNodeId; i++) { const size_t memNodeMaskIdx = ((i + bitsPerUlong) / bitsPerUlong) - 1; const auto &memNodeMask = memNodeMasks.at(memNodeMaskIdx); @@ -52,7 +52,7 @@ void getAllocationPolicy(void *ptr, unsigned long maxNodeId, int &mode, // Get the node that allocated the memory at 'ptr'. int nodeId = -1; ret = get_mempolicy(&nodeId, nullptr, 0, ptr, MPOL_F_ADDR | MPOL_F_NODE); - UT_ASSERTeq(ret, 0); + ASSERT_EQ(ret, 0); allocNodeId = static_cast(nodeId); } diff --git a/test/memspaces/memspace_highest_bandwidth.cpp b/test/memspaces/memspace_highest_bandwidth.cpp index a5bffb41d..5c30696a8 100644 --- a/test/memspaces/memspace_highest_bandwidth.cpp +++ b/test/memspaces/memspace_highest_bandwidth.cpp @@ -9,16 +9,17 @@ #include "memspace_internal.h" #include "test_helpers.h" -static bool canQueryBandwidth(size_t nodeId) { +static void canQueryBandwidth(size_t nodeId) { hwloc_topology_t topology = nullptr; int ret = hwloc_topology_init(&topology); - UT_ASSERTeq(ret, 0); + ASSERT_EQ(ret, 0); + ret = hwloc_topology_load(topology); - UT_ASSERTeq(ret, 0); + ASSERT_EQ(ret, 0); hwloc_obj_t numaNode = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, nodeId); - UT_ASSERTne(numaNode, nullptr); + ASSERT_NE(numaNode, nullptr); // Setup initiator structure. struct hwloc_location initiator; @@ -30,7 +31,12 @@ static bool canQueryBandwidth(size_t nodeId) { numaNode, &initiator, 0, &value); hwloc_topology_destroy(topology); - return (ret == 0); + + if (ret != 0) { + GTEST_SKIP() + << "Error: hwloc_memattr_get_value return value is equal to " << ret + << ", should be " << 0; + } } INSTANTIATE_TEST_SUITE_P(memspaceLowestLatencyTest, memspaceGetTest, diff --git a/test/memspaces/memspace_highest_capacity.cpp b/test/memspaces/memspace_highest_capacity.cpp index 3f3e99c76..8452e74a7 100644 --- a/test/memspaces/memspace_highest_capacity.cpp +++ b/test/memspaces/memspace_highest_capacity.cpp @@ -2,11 +2,11 @@ // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "memory_target_numa.h" #include "memspace_fixtures.hpp" #include "memspace_helpers.hpp" #include "memspace_internal.h" -#include "numa_helpers.h" +#include "memtarget_numa.h" +#include "numa_helpers.hpp" #include "test_helpers.h" #include @@ -21,7 +21,7 @@ struct memspaceHighestCapacityProviderTest : ::numaNodesTest { ::numaNodesTest::SetUp(); umf_const_memspace_handle_t hMemspace = umfMemspaceHighestCapacityGet(); - UT_ASSERTne(hMemspace, nullptr); + ASSERT_NE(hMemspace, nullptr); umf_result_t ret = umfMemoryProviderCreateFromMemspace(hMemspace, nullptr, &hProvider); @@ -60,7 +60,9 @@ TEST_F(memspaceHighestCapacityProviderTest, highestCapacityVerify) { memset(ptr, 0, alloc_size); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - auto nodeId = getNumaNodeByPtr(ptr); + int nodeId = -1; + ASSERT_NO_FATAL_FAILURE(getNumaNodeByPtr(ptr, &nodeId)); + ASSERT_TRUE(std::any_of(maxCapacityNodes.begin(), maxCapacityNodes.end(), [nodeId](int node) { return nodeId == node; })); diff --git a/test/memspaces/memspace_host_all.cpp b/test/memspaces/memspace_host_all.cpp index 3b0825eb5..3462b87dc 100644 --- a/test/memspaces/memspace_host_all.cpp +++ b/test/memspaces/memspace_host_all.cpp @@ -9,11 +9,11 @@ #include -#include "memory_target_numa.h" #include "memspace_fixtures.hpp" #include "memspace_helpers.hpp" #include "memspace_internal.h" -#include "numa_helpers.h" +#include "memtarget_numa.h" +#include "numa_helpers.hpp" #include "test_helpers.h" #include "utils_sanitizers.h" @@ -51,17 +51,18 @@ struct memspaceHostAllProviderTest : ::memspaceHostAllTest { TEST_F(numaNodesTest, memspaceGet) { umf_const_memspace_handle_t hMemspace = umfMemspaceHostAllGet(); - UT_ASSERTne(hMemspace, nullptr); + ASSERT_NE(hMemspace, nullptr); // Confirm that the HOST ALL memspace is composed of all available NUMA nodes. - UT_ASSERTeq(hMemspace->size, nodeIds.size()); + ASSERT_EQ(hMemspace->size, nodeIds.size()); for (size_t i = 0; i < hMemspace->size; i++) { // NUMA memory target internally casts the config directly into priv. // TODO: Use the memory target API when it becomes available. - struct umf_numa_memory_target_config_t *numaTargetCfg = - (struct umf_numa_memory_target_config_t *)hMemspace->nodes[i]->priv; - UT_ASSERT(std::find(nodeIds.begin(), nodeIds.end(), - numaTargetCfg->physical_id) != nodeIds.end()); + struct umf_numa_memtarget_config_t *numaTargetCfg = + (struct umf_numa_memtarget_config_t *)hMemspace->nodes[i]->priv; + ASSERT_NE(std::find(nodeIds.begin(), nodeIds.end(), + numaTargetCfg->physical_id), + nodeIds.end()); } } @@ -69,8 +70,8 @@ TEST_F(memspaceHostAllTest, providerFromHostAllMemspace) { umf_memory_provider_handle_t hProvider = nullptr; umf_result_t ret = umfMemoryProviderCreateFromMemspace(hMemspace, nullptr, &hProvider); - UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); - UT_ASSERTne(hProvider, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider, nullptr); umfMemoryProviderDestroy(hProvider); } @@ -81,13 +82,13 @@ TEST_F(memspaceHostAllProviderTest, allocFree) { size_t alignment = 0; umf_result_t ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); - UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); - UT_ASSERTne(ptr, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); memset(ptr, 0xFF, size); ret = umfMemoryProviderFree(hProvider, ptr, size); - UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); } TEST_F(memspaceHostAllProviderTest, hostAllDefaults) { @@ -96,7 +97,7 @@ TEST_F(memspaceHostAllProviderTest, hostAllDefaults) { // default kernel path (no mbind). umf_const_memspace_handle_t hMemspace = umfMemspaceHostAllGet(); - UT_ASSERTne(hMemspace, nullptr); + ASSERT_NE(hMemspace, nullptr); umf_memory_provider_handle_t hProvider = nullptr; umf_result_t ret = umfMemoryProviderCreateFromMemspace( @@ -110,45 +111,82 @@ TEST_F(memspaceHostAllProviderTest, hostAllDefaults) { size_t alignment = 0; ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr1); - UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); - UT_ASSERTne(ptr1, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); memset(ptr1, 0xFF, size); // Create single allocation using mmap void *ptr2 = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - UT_ASSERTne(ptr2, nullptr); + ASSERT_NE(ptr2, nullptr); memset(ptr2, 0xFF, size); - // Compare UMF and kernel default allocation policy - struct bitmask *nodemask1 = numa_allocate_nodemask(); - struct bitmask *nodemask2 = numa_allocate_nodemask(); - int memMode1 = -1, memMode2 = -1; - - int ret2 = get_mempolicy(&memMode1, nodemask1->maskp, nodemask1->size, ptr1, - MPOL_F_ADDR); - UT_ASSERTeq(ret2, 0); - ret2 = get_mempolicy(&memMode2, nodemask2->maskp, nodemask2->size, ptr2, - MPOL_F_ADDR); - UT_ASSERTeq(ret2, 0); - UT_ASSERTeq(memMode1, memMode2); - UT_ASSERTeq(nodemask1->size, nodemask2->size); - UT_ASSERTeq(numa_bitmask_equal(nodemask1, nodemask2), 1); - - int nodeId1 = -1, nodeId2 = -1; - ret2 = get_mempolicy(&nodeId1, nullptr, 0, ptr1, MPOL_F_ADDR | MPOL_F_NODE); - UT_ASSERTeq(ret2, 0); - ret2 = get_mempolicy(&nodeId2, nullptr, 0, ptr2, MPOL_F_ADDR | MPOL_F_NODE); - UT_ASSERTeq(ret2, 0); - UT_ASSERTeq(nodeId1, nodeId2); - - numa_free_nodemask(nodemask2); - numa_free_nodemask(nodemask1); - - ret2 = munmap(ptr2, size); - UT_ASSERTeq(ret2, 0); + EXPECT_NODE_EQ(ptr1, ptr2); + EXPECT_BIND_MODE_EQ(ptr1, ptr2); + EXPECT_BIND_MASK_EQ(ptr1, ptr2); + + auto ret2 = munmap(ptr2, size); + ASSERT_EQ(ret2, 0); ret = umfMemoryProviderFree(hProvider, ptr1, size); - UT_ASSERTeq(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); umfMemoryProviderDestroy(hProvider); } + +TEST_F(memspaceHostAllProviderTest, HostAllVsCopy) { + umf_memspace_handle_t hMemspaceCopy = nullptr; + auto ret = umfMemspaceNew(&hMemspaceCopy); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hMemspaceCopy, nullptr); + + for (size_t i = 0; i < umfMemspaceMemtargetNum(hMemspace); ++i) { + auto target = umfMemspaceMemtargetGet(hMemspace, i); + ASSERT_NE(target, nullptr); + + ret = umfMemspaceMemtargetAdd(hMemspaceCopy, target); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + ASSERT_EQ(umfMemspaceMemtargetNum(hMemspace), + umfMemspaceMemtargetNum(hMemspaceCopy)); + + umf_memory_provider_handle_t hProvider1, hProvider2; + ret = umfMemoryProviderCreateFromMemspace(hMemspace, nullptr, &hProvider1); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider1, nullptr); + + ret = umfMemoryProviderCreateFromMemspace(hMemspaceCopy, nullptr, + &hProvider2); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider2, nullptr); + + void *ptr1, *ptr2; + ret = umfMemoryProviderAlloc(hProvider1, SIZE_4K, 0, &ptr1); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); + + ret = umfMemoryProviderAlloc(hProvider2, SIZE_4K, 0, &ptr2); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + memset(ptr1, 0xFF, SIZE_4K); + memset(ptr2, 0xFF, SIZE_4K); + + ASSERT_NODE_EQ(ptr1, ptr2); + // HostAll memspace bind memory in the unique way (MPOL_DEFAULT), + // but this works only for this specific memspaces, but not for it's copies. + ASSERT_BIND_MASK_NE(ptr1, ptr2); + ASSERT_BIND_MODE_NE(ptr1, ptr2); + + ASSERT_BIND_MODE_EQ(ptr1, MPOL_DEFAULT); + ASSERT_BIND_MODE_EQ(ptr2, MPOL_BIND); + + ret = umfMemoryProviderFree(hProvider1, ptr1, SIZE_4K); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfMemoryProviderFree(hProvider2, ptr2, SIZE_4K); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(hProvider1); + umfMemoryProviderDestroy(hProvider2); + umfMemspaceDestroy(hMemspaceCopy); +} diff --git a/test/memspaces/memspace_lowest_latency.cpp b/test/memspaces/memspace_lowest_latency.cpp index cf921612c..fc35f465a 100644 --- a/test/memspaces/memspace_lowest_latency.cpp +++ b/test/memspaces/memspace_lowest_latency.cpp @@ -9,16 +9,17 @@ #include "memspace_internal.h" #include "test_helpers.h" -static bool canQueryLatency(size_t nodeId) { +static void canQueryLatency(size_t nodeId) { hwloc_topology_t topology = nullptr; int ret = hwloc_topology_init(&topology); - UT_ASSERTeq(ret, 0); + ASSERT_EQ(ret, 0); + ret = hwloc_topology_load(topology); - UT_ASSERTeq(ret, 0); + ASSERT_EQ(ret, 0); hwloc_obj_t numaNode = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, nodeId); - UT_ASSERTne(numaNode, nullptr); + ASSERT_NE(numaNode, nullptr); // Setup initiator structure. struct hwloc_location initiator; @@ -30,7 +31,12 @@ static bool canQueryLatency(size_t nodeId) { &initiator, 0, &value); hwloc_topology_destroy(topology); - return (ret == 0); + + if (ret != 0) { + GTEST_SKIP() + << "Error: hwloc_memattr_get_value return value is equal to " << ret + << ", should be " << 0; + } } INSTANTIATE_TEST_SUITE_P(memspaceLowestLatencyTest, memspaceGetTest, diff --git a/test/memspaces/memspace_numa.cpp b/test/memspaces/memspace_numa.cpp index b50eceac9..068df6886 100644 --- a/test/memspaces/memspace_numa.cpp +++ b/test/memspaces/memspace_numa.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2024 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -6,13 +6,19 @@ #include "memspace_fixtures.hpp" #include "memspace_helpers.hpp" #include "memspace_internal.h" +#include "numa_helpers.hpp" +#include #include struct memspaceNumaTest : ::numaNodesTest { void SetUp() override { ::numaNodesTest::SetUp(); + if (numa_available() == -1) { + GTEST_SKIP() << "NUMA not supported on this system; test skipped"; + } + umf_result_t ret = umfMemspaceCreateFromNumaArray( nodeIds.data(), nodeIds.size(), &hMemspace); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); @@ -33,6 +39,10 @@ struct memspaceNumaProviderTest : ::memspaceNumaTest { void SetUp() override { ::memspaceNumaTest::SetUp(); + if (numa_available() == -1) { + GTEST_SKIP() << "NUMA not supported on this system; test skipped"; + } + umf_result_t ret = umfMemoryProviderCreateFromMemspace(hMemspace, nullptr, &hProvider); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); @@ -56,6 +66,10 @@ TEST_F(numaNodesTest, createDestroy) { nodeIds.data(), nodeIds.size(), &hMemspace); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_NE(hMemspace, nullptr); + EXPECT_EQ(umfMemspaceMemtargetNum(hMemspace), nodeIds.size()); + for (size_t i = 0; i < umfMemspaceMemtargetNum(hMemspace); ++i) { + EXPECT_NE(umfMemspaceMemtargetGet(hMemspace, i), nullptr); + } umfMemspaceDestroy(hMemspace); } @@ -91,6 +105,122 @@ TEST_F(memspaceNumaTest, providerFromNumaMemspace) { umfMemoryProviderDestroy(hProvider); } +TEST_F(memspaceNumaTest, memtargetsInvalid) { + EXPECT_EQ(umfMemspaceMemtargetNum(nullptr), 0); + EXPECT_EQ(umfMemspaceMemtargetGet(nullptr, 0), nullptr); + + ASSERT_EQ(umfMemspaceMemtargetNum(hMemspace), nodeIds.size()); + EXPECT_EQ(umfMemspaceMemtargetGet(hMemspace, nodeIds.size()), nullptr); +} + +TEST_F(memspaceNumaTest, memspaceCopyTarget) { + umf_memspace_handle_t hMemspaceCopy = nullptr; + auto ret = umfMemspaceNew(&hMemspaceCopy); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hMemspaceCopy, nullptr); + + for (size_t i = 0; i < umfMemspaceMemtargetNum(hMemspace); ++i) { + auto target = umfMemspaceMemtargetGet(hMemspace, i); + ASSERT_NE(target, nullptr); + + ret = umfMemspaceMemtargetAdd(hMemspaceCopy, target); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + ASSERT_EQ(umfMemspaceMemtargetNum(hMemspace), + umfMemspaceMemtargetNum(hMemspaceCopy)); + + umf_memory_provider_handle_t hProvider1, hProvider2; + ret = umfMemoryProviderCreateFromMemspace(hMemspace, nullptr, &hProvider1); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider1, nullptr); + + ret = umfMemoryProviderCreateFromMemspace(hMemspaceCopy, nullptr, + &hProvider2); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider2, nullptr); + + void *ptr1, *ptr2; + ret = umfMemoryProviderAlloc(hProvider1, SIZE_4K, 0, &ptr1); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); + + ret = umfMemoryProviderAlloc(hProvider2, SIZE_4K, 0, &ptr2); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ASSERT_BIND_MASK_EQ(ptr1, ptr2); + ASSERT_BIND_MODE_EQ(ptr1, ptr2); + + ret = umfMemoryProviderFree(hProvider1, ptr1, SIZE_4K); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfMemoryProviderFree(hProvider2, ptr2, SIZE_4K); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(hProvider1); + umfMemoryProviderDestroy(hProvider2); + umfMemspaceDestroy(hMemspaceCopy); +} + +TEST_F(memspaceNumaTest, memspaceDeleteTarget) { + if (numa_max_node() < 2) { + GTEST_SKIP() << "Not enough NUMA nodes to run test"; + } + + umf_memspace_handle_t hMemspaceCopy = nullptr; + auto ret = umfMemspaceNew(&hMemspaceCopy); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hMemspaceCopy, nullptr); + + auto target = umfMemspaceMemtargetGet(hMemspace, 0); + ASSERT_NE(target, nullptr); + + ret = umfMemspaceMemtargetAdd(hMemspaceCopy, target); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + while (umfMemspaceMemtargetNum(hMemspace) > 1) { + auto target = umfMemspaceMemtargetGet(hMemspace, 1); + ASSERT_NE(target, nullptr); + + ret = umfMemspaceMemtargetRemove(hMemspace, target); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + ASSERT_EQ(umfMemspaceMemtargetNum(hMemspace), + umfMemspaceMemtargetNum(hMemspaceCopy)); + + umf_memory_provider_handle_t hProvider1, hProvider2; + ret = umfMemoryProviderCreateFromMemspace(hMemspace, nullptr, &hProvider1); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider1, nullptr); + + ret = umfMemoryProviderCreateFromMemspace(hMemspaceCopy, nullptr, + &hProvider2); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider2, nullptr); + + void *ptr1, *ptr2; + ret = umfMemoryProviderAlloc(hProvider1, SIZE_4K, 0, &ptr1); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); + + ret = umfMemoryProviderAlloc(hProvider2, SIZE_4K, 0, &ptr2); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ASSERT_BIND_MASK_EQ(ptr1, ptr2); + ASSERT_BIND_MODE_EQ(ptr1, ptr2); + + ret = umfMemoryProviderFree(hProvider1, ptr1, SIZE_4K); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfMemoryProviderFree(hProvider2, ptr2, SIZE_4K); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(hProvider1); + umfMemoryProviderDestroy(hProvider2); + umfMemspaceDestroy(hMemspaceCopy); +} + TEST_F(memspaceNumaProviderTest, allocFree) { void *ptr = nullptr; size_t size = SIZE_4K; @@ -105,3 +235,152 @@ TEST_F(memspaceNumaProviderTest, allocFree) { ret = umfMemoryProviderFree(hProvider, ptr, size); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); } + +TEST_F(numaNodesCapacityTest, CapacityFilter) { + if (capacities.size() <= 1) { + GTEST_SKIP() << "Not enough numa nodes - skipping the test"; + } + + umf_memspace_handle_t hMemspace; + auto ret = umfMemspaceClone(umfMemspaceHostAllGet(), &hMemspace); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hMemspace, nullptr); + + std::sort(capacities.begin(), capacities.end()); + + size_t filter_size = capacities[capacities.size() / 2]; + ret = umfMemspaceFilterByCapacity(hMemspace, filter_size); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ASSERT_EQ(umfMemspaceMemtargetNum(hMemspace), (capacities.size() + 1) / 2); + for (size_t i = 0; i < umfMemspaceMemtargetNum(hMemspace); i++) { + auto hTarget = umfMemspaceMemtargetGet(hMemspace, i); + ASSERT_NE(hTarget, nullptr); + size_t capacity; + auto ret = umfMemtargetGetCapacity(hTarget, &capacity); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + auto it = std::find(capacities.begin(), capacities.end(), capacity); + EXPECT_NE(it, capacities.end()); + EXPECT_GE(capacity, filter_size); + if (it != capacities.end()) { + capacities.erase(it); + } + } + umfMemspaceDestroy(hMemspace); +} + +TEST_F(numaNodesTest, idfilter) { + if (nodeIds.size() <= 1) { + GTEST_SKIP() << "Not enough numa nodes - skipping the test"; + } + + umf_memspace_handle_t hMemspace; + auto ret = umfMemspaceClone(umfMemspaceHostAllGet(), &hMemspace); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hMemspace, nullptr); + + std::vector ids = {nodeIds[0], nodeIds[1]}; + ret = umfMemspaceFilterById(hMemspace, ids.data(), 2); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ASSERT_EQ(umfMemspaceMemtargetNum(hMemspace), 2); + + for (size_t i = 0; i < umfMemspaceMemtargetNum(hMemspace); i++) { + auto hTarget = umfMemspaceMemtargetGet(hMemspace, i); + ASSERT_NE(hTarget, nullptr); + unsigned id; + auto ret = umfMemtargetGetId(hTarget, &id); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + auto it = std::find(ids.begin(), ids.end(), id); + EXPECT_NE(it, ids.end()); + if (it != ids.end()) { + ids.erase(it); + } + } + umfMemspaceDestroy(hMemspace); +} + +int customfilter(umf_const_memspace_handle_t memspace, + umf_const_memtarget_handle_t memtarget, void *args) { + static unsigned customFilterCounter = 0; + + EXPECT_NE(args, nullptr); + EXPECT_NE(memspace, nullptr); + EXPECT_NE(memtarget, nullptr); + + auto ids = (std::vector *)args; + if (customFilterCounter++ % 2) { + ids->push_back(memtarget); + return 1; + } else { + return 0; + } +} + +TEST_F(numaNodesTest, customfilter) { + if (nodeIds.size() <= 1) { + GTEST_SKIP() << "Not enough numa nodes - skipping the test"; + } + + umf_memspace_handle_t hMemspace; + auto ret = umfMemspaceClone(umfMemspaceHostAllGet(), &hMemspace); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hMemspace, nullptr); + + std::vector vec; + ret = umfMemspaceUserFilter(hMemspace, &customfilter, &vec); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ASSERT_EQ(umfMemspaceMemtargetNum(hMemspace), nodeIds.size() / 2); + + for (size_t i = 0; i < umfMemspaceMemtargetNum(hMemspace); i++) { + auto hTarget = umfMemspaceMemtargetGet(hMemspace, i); + ASSERT_NE(hTarget, nullptr); + + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + auto it = std::find(vec.begin(), vec.end(), hTarget); + EXPECT_NE(it, vec.end()); + if (it != vec.end()) { + vec.erase(it); + } + } + ASSERT_EQ(vec.size(), 0); + umfMemspaceDestroy(hMemspace); +} + +int invalidFilter(umf_const_memspace_handle_t memspace, + umf_const_memtarget_handle_t memtarget, void *args) { + EXPECT_EQ(args, nullptr); + EXPECT_NE(memspace, nullptr); + EXPECT_NE(memtarget, nullptr); + + return -1; +} + +TEST_F(numaNodesTest, invalidFilters) { + umf_memspace_handle_t hMemspace; + auto ret = umfMemspaceClone(umfMemspaceHostAllGet(), &hMemspace); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hMemspace, nullptr); + + ret = umfMemspaceFilterByCapacity(nullptr, 0); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfMemspaceFilterByCapacity(hMemspace, -1); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfMemspaceFilterById(hMemspace, nullptr, 0); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + unsigned id = 0; + ret = umfMemspaceFilterById(nullptr, &id, 1); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfMemspaceUserFilter(hMemspace, nullptr, nullptr); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ret = umfMemspaceUserFilter(nullptr, invalidFilter, nullptr); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfMemspaceUserFilter(hMemspace, invalidFilter, nullptr); + ASSERT_EQ(ret, UMF_RESULT_ERROR_USER_SPECIFIC); + umfMemspaceDestroy(hMemspace); +} diff --git a/test/memspaces/memtarget.cpp b/test/memspaces/memtarget.cpp new file mode 100644 index 000000000..325fa9d1d --- /dev/null +++ b/test/memspaces/memtarget.cpp @@ -0,0 +1,105 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "memspace_fixtures.hpp" +#include "memspace_helpers.hpp" + +#include +#include +#include + +using umf_test::test; + +TEST_F(test, memTargetNuma) { + auto memspace = umfMemspaceHostAllGet(); + ASSERT_NE(memspace, nullptr); + umf_memtarget_type_t type; + for (size_t i = 0; i < umfMemspaceMemtargetNum(memspace); i++) { + auto hTarget = umfMemspaceMemtargetGet(memspace, i); + ASSERT_NE(hTarget, nullptr); + auto ret = umfMemtargetGetType(hTarget, &type); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + EXPECT_EQ(type, UMF_MEMTARGET_TYPE_NUMA); + } +} + +TEST_F(numaNodesCapacityTest, getCapacity) { + auto memspace = umfMemspaceHostAllGet(); + ASSERT_NE(memspace, nullptr); + + for (size_t i = 0; i < umfMemspaceMemtargetNum(memspace); i++) { + auto hTarget = umfMemspaceMemtargetGet(memspace, i); + ASSERT_NE(hTarget, nullptr); + size_t capacity; + auto ret = umfMemtargetGetCapacity(hTarget, &capacity); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + auto it = std::find(capacities.begin(), capacities.end(), capacity); + EXPECT_NE(it, capacities.end()); + if (it != capacities.end()) { + capacities.erase(it); + } + } + ASSERT_EQ(capacities.size(), 0); +} + +TEST_F(numaNodesTest, getId) { + auto memspace = umfMemspaceHostAllGet(); + ASSERT_NE(memspace, nullptr); + + for (size_t i = 0; i < umfMemspaceMemtargetNum(memspace); i++) { + auto hTarget = umfMemspaceMemtargetGet(memspace, i); + ASSERT_NE(hTarget, nullptr); + unsigned id; + auto ret = umfMemtargetGetId(hTarget, &id); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + auto it = std::find(nodeIds.begin(), nodeIds.end(), id); + EXPECT_NE(it, nodeIds.end()); + if (it != nodeIds.end()) { + nodeIds.erase(it); + } + } + ASSERT_EQ(nodeIds.size(), 0); +} + +TEST_F(numaNodesTest, getCapacityInvalid) { + auto memspace = umfMemspaceHostAllGet(); + ASSERT_NE(memspace, nullptr); + size_t capacity; + auto ret = umfMemtargetGetCapacity(NULL, &capacity); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ret = umfMemtargetGetCapacity(NULL, NULL); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + auto hTarget = umfMemspaceMemtargetGet(memspace, 0); + ASSERT_NE(hTarget, nullptr); + ret = umfMemtargetGetCapacity(hTarget, NULL); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_F(test, memTargetInvalid) { + auto memspace = umfMemspaceHostAllGet(); + ASSERT_NE(memspace, nullptr); + umf_memtarget_type_t type; + auto ret = umfMemtargetGetType(NULL, &type); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ret = umfMemtargetGetType(NULL, NULL); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + auto hTarget = umfMemspaceMemtargetGet(memspace, 0); + ASSERT_NE(hTarget, nullptr); + ret = umfMemtargetGetType(hTarget, NULL); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_F(numaNodesTest, getIdInvalid) { + auto memspace = umfMemspaceHostAllGet(); + ASSERT_NE(memspace, nullptr); + unsigned id; + auto ret = umfMemtargetGetId(NULL, &id); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ret = umfMemtargetGetId(NULL, NULL); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + auto hTarget = umfMemspaceMemtargetGet(memspace, 0); + ASSERT_NE(hTarget, nullptr); + ret = umfMemtargetGetId(hTarget, NULL); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} diff --git a/test/poolFixtures.hpp b/test/poolFixtures.hpp index 1285c57bf..e5ec85012 100644 --- a/test/poolFixtures.hpp +++ b/test/poolFixtures.hpp @@ -2,11 +2,14 @@ // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef UMF_TEST_MEMORY_POOL_OPS_HPP -#define UMF_TEST_MEMORY_POOL_OPS_HPP +#ifndef UMF_TEST_POOL_FIXTURES_HPP +#define UMF_TEST_POOL_FIXTURES_HPP 1 #include "pool.hpp" #include "provider.hpp" +#include "umf/providers/provider_coarse.h" +#include "umf/providers/provider_devdax_memory.h" +#include "utils/utils_sanitizers.h" #include #include @@ -17,21 +20,46 @@ #include "../malloc_compliance_tests.hpp" -using poolCreateExtParams = std::tuple; +using poolCreateExtParams = + std::tuple; umf::pool_unique_handle_t poolCreateExtUnique(poolCreateExtParams params) { - umf_memory_pool_handle_t hPool; - auto [pool_ops, pool_params, provider_ops, provider_params] = params; + auto [pool_ops, pool_params, provider_ops, provider_params, coarse_params] = + params; + umf_memory_provider_handle_t upstream_provider = nullptr; umf_memory_provider_handle_t provider = nullptr; - auto ret = - umfMemoryProviderCreate(provider_ops, provider_params, &provider); + umf_memory_pool_handle_t hPool = nullptr; + umf_result_t ret; + + ret = umfMemoryProviderCreate(provider_ops, provider_params, + &upstream_provider); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + EXPECT_NE(upstream_provider, nullptr); + + provider = upstream_provider; + + if (coarse_params) { + coarse_memory_provider_params_t *coarse_memory_provider_params = + (coarse_memory_provider_params_t *)coarse_params; + coarse_memory_provider_params->upstream_memory_provider = + upstream_provider; + coarse_memory_provider_params->destroy_upstream_memory_provider = true; + + umf_memory_provider_handle_t coarse_provider = nullptr; + ret = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + coarse_params, &coarse_provider); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + EXPECT_NE(coarse_provider, nullptr); + + provider = coarse_provider; + } ret = umfPoolCreate(pool_ops, provider, pool_params, UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + EXPECT_NE(hPool, nullptr); return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); } @@ -40,6 +68,7 @@ struct umfPoolTest : umf_test::test, ::testing::WithParamInterface { void SetUp() override { test::SetUp(); + pool = poolCreateExtUnique(this->GetParam()); } @@ -407,4 +436,32 @@ TEST_P(umfPoolTest, realloc_compliance) { TEST_P(umfPoolTest, free_compliance) { free_compliance_test(pool.get()); } -#endif /* UMF_TEST_MEMORY_POOL_OPS_HPP */ +TEST_P(umfPoolTest, allocMaxSize) { + auto *ptr = umfPoolMalloc(pool.get(), SIZE_MAX); + ASSERT_EQ(ptr, nullptr); +} + +TEST_P(umfPoolTest, mallocUsableSize) { +#ifdef __SANITIZE_ADDRESS__ + // Sanitizer replaces malloc_usable_size implementation with its own + GTEST_SKIP() + << "This test is invalid with AddressSanitizer instrumentation"; +#else + + for (size_t allocSize : {32, 48, 1024, 8192}) { + char *ptr = static_cast(umfPoolMalloc(pool.get(), allocSize)); + ASSERT_NE(ptr, nullptr); + size_t result = umfPoolMallocUsableSize(pool.get(), ptr); + ASSERT_TRUE(result == 0 || result >= allocSize); + + // Make sure we can write to this memory + for (size_t i = 0; i < result; i++) { + ptr[i] = 123; + } + + umfPoolFree(pool.get(), ptr); + } +#endif +} + +#endif /* UMF_TEST_POOL_FIXTURES_HPP */ diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index d7612f4d5..319997c82 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -2,6 +2,8 @@ // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include + #include "pool.hpp" #include "poolFixtures.hpp" #include "pool_disjoint.h" @@ -9,13 +11,43 @@ #include "provider_null.h" #include "provider_trace.h" -umf_disjoint_pool_params_t poolConfig() { - umf_disjoint_pool_params_t config{}; - config.SlabMinSize = 4096; - config.MaxPoolableSize = 4096; - config.Capacity = 4; - config.MinBucketSize = 64; - return config; +using disjoint_params_unique_handle_t = + std::unique_ptr; + +static constexpr size_t DEFAULT_DISJOINT_SLAB_MIN_SIZE = 4096; +static constexpr size_t DEFAULT_DISJOINT_MAX_POOLABLE_SIZE = 4096; +static constexpr size_t DEFAULT_DISJOINT_CAPACITY = 4; +static constexpr size_t DEFAULT_DISJOINT_MIN_BUCKET_SIZE = 64; + +disjoint_params_unique_handle_t poolConfig() { + umf_disjoint_pool_params_handle_t config = nullptr; + umf_result_t res = umfDisjointPoolParamsCreate(&config); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to create pool params"); + } + res = umfDisjointPoolParamsSetSlabMinSize(config, + DEFAULT_DISJOINT_SLAB_MIN_SIZE); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to set slab min size"); + } + res = umfDisjointPoolParamsSetMaxPoolableSize( + config, DEFAULT_DISJOINT_MAX_POOLABLE_SIZE); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to set max poolable size"); + } + res = umfDisjointPoolParamsSetCapacity(config, DEFAULT_DISJOINT_CAPACITY); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to set capacity"); + } + res = umfDisjointPoolParamsSetMinBucketSize( + config, DEFAULT_DISJOINT_MIN_BUCKET_SIZE); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to set min bucket size"); + } + + return disjoint_params_unique_handle_t(config, + &umfDisjointPoolParamsDestroy); } using umf_test::test; @@ -47,12 +79,14 @@ TEST_F(test, freeErrorPropagation) { provider_handle = providerUnique.get(); // force all allocations to go to memory provider - umf_disjoint_pool_params_t params = poolConfig(); - params.MaxPoolableSize = 0; + disjoint_params_unique_handle_t params = poolConfig(); + umf_result_t retp = + umfDisjointPoolParamsSetMaxPoolableSize(params.get(), 0); + EXPECT_EQ(retp, UMF_RESULT_SUCCESS); umf_memory_pool_handle_t pool = NULL; - umf_result_t retp = - umfPoolCreate(umfDisjointPoolOps(), provider_handle, ¶ms, 0, &pool); + retp = umfPoolCreate(umfDisjointPoolOps(), provider_handle, params.get(), 0, + &pool); EXPECT_EQ(retp, UMF_RESULT_SUCCESS); auto poolHandle = umf_test::wrapPoolUnique(pool); @@ -92,8 +126,10 @@ TEST_F(test, sharedLimits) { static constexpr size_t SlabMinSize = 1024; static constexpr size_t MaxSize = 4 * SlabMinSize; - auto config = poolConfig(); - config.SlabMinSize = SlabMinSize; + disjoint_params_unique_handle_t config = poolConfig(); + umf_result_t ret = + umfDisjointPoolParamsSetSlabMinSize(config.get(), SlabMinSize); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); auto limits = std::unique_ptr(defaultPoolConfig.Capacity) / 2))); +INSTANTIATE_TEST_SUITE_P(disjointPoolTests, umfMemTest, + ::testing::Values(std::make_tuple( + poolCreateExtParams{ + umfDisjointPoolOps(), + (void *)defaultPoolConfig.get(), + &MOCK_OUT_OF_MEM_PROVIDER_OPS, + (void *)&DEFAULT_DISJOINT_CAPACITY, nullptr}, + static_cast(DEFAULT_DISJOINT_CAPACITY) / 2))); INSTANTIATE_TEST_SUITE_P(disjointMultiPoolTests, umfMultiPoolTest, ::testing::Values(poolCreateExtParams{ - umfDisjointPoolOps(), (void *)&defaultPoolConfig, - &MALLOC_PROVIDER_OPS, nullptr})); + umfDisjointPoolOps(), + (void *)defaultPoolConfig.get(), + &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); diff --git a/test/pools/jemalloc_coarse_devdax.cpp b/test/pools/jemalloc_coarse_devdax.cpp new file mode 100644 index 000000000..350e053ab --- /dev/null +++ b/test/pools/jemalloc_coarse_devdax.cpp @@ -0,0 +1,45 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "umf/pools/pool_jemalloc.h" +#include "umf/providers/provider_devdax_memory.h" + +#include "pool_coarse.hpp" + +using devdax_params_unique_handle_t = + std::unique_ptr; + +devdax_params_unique_handle_t create_devdax_params() { + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); + if (path == nullptr || path[0] == 0 || size == nullptr || size[0] == 0) { + return devdax_params_unique_handle_t( + nullptr, &umfDevDaxMemoryProviderParamsDestroy); + } + + umf_devdax_memory_provider_params_handle_t params = NULL; + umf_result_t res = + umfDevDaxMemoryProviderParamsCreate(¶ms, path, atol(size)); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create DevDax Memory Provider params"); + } + + return devdax_params_unique_handle_t(params, + &umfDevDaxMemoryProviderParamsDestroy); +} + +auto coarseParams = umfCoarseMemoryProviderParamsDefault(); +auto devdaxParams = create_devdax_params(); + +static std::vector poolParamsList = + devdaxParams.get() + ? std::vector{poolCreateExtParams{ + umfJemallocPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), + devdaxParams.get(), &coarseParams}} + : std::vector{}; + +INSTANTIATE_TEST_SUITE_P(jemallocCoarseDevDaxTest, umfPoolTest, + ::testing::ValuesIn(poolParamsList)); diff --git a/test/pools/jemalloc_coarse_file.cpp b/test/pools/jemalloc_coarse_file.cpp new file mode 100644 index 000000000..74ad36d56 --- /dev/null +++ b/test/pools/jemalloc_coarse_file.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "umf/pools/pool_jemalloc.h" +#include "umf/providers/provider_file_memory.h" + +#include "pool_coarse.hpp" + +using file_params_unique_handle_t = + std::unique_ptr; + +file_params_unique_handle_t get_file_params_default(char *path) { + umf_file_memory_provider_params_handle_t file_params = NULL; + umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create File Memory Provider params"); + } + + return file_params_unique_handle_t(file_params, + &umfFileMemoryProviderParamsDestroy); +} + +auto coarseParams = umfCoarseMemoryProviderParamsDefault(); +file_params_unique_handle_t fileParams = get_file_params_default(FILE_PATH); + +INSTANTIATE_TEST_SUITE_P(jemallocCoarseFileTest, umfPoolTest, + ::testing::Values(poolCreateExtParams{ + umfJemallocPoolOps(), nullptr, + umfFileMemoryProviderOps(), fileParams.get(), + &coarseParams})); diff --git a/test/pools/jemalloc_pool.cpp b/test/pools/jemalloc_pool.cpp index 331c96f04..4dddbcd32 100644 --- a/test/pools/jemalloc_pool.cpp +++ b/test/pools/jemalloc_pool.cpp @@ -11,11 +11,26 @@ using umf_test::test; using namespace umf_test; -auto defaultParams = umfOsMemoryProviderParamsDefault(); +using os_params_unique_handle_t = + std::unique_ptr; + +os_params_unique_handle_t createOsMemoryProviderParams() { + umf_os_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to create os memory provider params"); + } + + return os_params_unique_handle_t(params, &umfOsMemoryProviderParamsDestroy); +} +auto defaultParams = createOsMemoryProviderParams(); + INSTANTIATE_TEST_SUITE_P(jemallocPoolTest, umfPoolTest, ::testing::Values(poolCreateExtParams{ umfJemallocPoolOps(), nullptr, - umfOsMemoryProviderOps(), &defaultParams})); + umfOsMemoryProviderOps(), defaultParams.get(), + nullptr})); // this test makes sure that jemalloc does not use // memory provider to allocate metadata (and hence @@ -27,11 +42,18 @@ TEST_F(test, metadataNotAllocatedUsingProvider) { // set coarse grain allocations to PROT_NONE so that we can be sure // jemalloc does not touch any of the allocated memory - auto params = umfOsMemoryProviderParamsDefault(); - params.protection = UMF_PROTECTION_NONE; + umf_os_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + res = umfOsMemoryProviderParamsSetProtection(params, UMF_PROTECTION_NONE); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); - auto pool = poolCreateExtUnique( - {umfJemallocPoolOps(), nullptr, umfOsMemoryProviderOps(), ¶ms}); + auto pool = + poolCreateExtUnique({umfJemallocPoolOps(), nullptr, + umfOsMemoryProviderOps(), params, nullptr}); + + res = umfOsMemoryProviderParamsDestroy(params); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); std::vector> allocs; for (size_t i = 0; i < numAllocs; i++) { @@ -40,3 +62,122 @@ TEST_F(test, metadataNotAllocatedUsingProvider) { [pool = pool.get()](void *ptr) { umfPoolFree(pool, ptr); }); } } + +using jemallocPoolParams = bool; +struct umfJemallocPoolParamsTest + : umf_test::test, + ::testing::WithParamInterface { + + struct validation_params_t { + bool keep_all_memory; + }; + + struct provider_validator : public umf_test::provider_ba_global { + using base_provider = umf_test::provider_ba_global; + + umf_result_t initialize(validation_params_t *params) { + EXPECT_NE(params, nullptr); + expected_params = params; + return UMF_RESULT_SUCCESS; + } + umf_result_t free(void *ptr, size_t size) { + EXPECT_EQ(expected_params->keep_all_memory, false); + return base_provider::free(ptr, size); + } + + validation_params_t *expected_params; + }; + + static constexpr umf_memory_provider_ops_t VALIDATOR_PROVIDER_OPS = + umf::providerMakeCOps(); + + umfJemallocPoolParamsTest() : expected_params{false}, params(nullptr) {} + void SetUp() override { + test::SetUp(); + expected_params.keep_all_memory = this->GetParam(); + umf_result_t ret = umfJemallocPoolParamsCreate(¶ms); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ret = umfJemallocPoolParamsSetKeepAllMemory( + params, expected_params.keep_all_memory); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + void TearDown() override { + umfJemallocPoolParamsDestroy(params); + test::TearDown(); + } + + umf::pool_unique_handle_t makePool() { + umf_memory_provider_handle_t hProvider = nullptr; + umf_memory_pool_handle_t hPool = nullptr; + + auto ret = umfMemoryProviderCreate(&VALIDATOR_PROVIDER_OPS, + &expected_params, &hProvider); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfPoolCreate(umfJemallocPoolOps(), hProvider, params, + UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + + return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); + } + + void allocFreeFlow() { + static const size_t ALLOC_SIZE = 128; + static const size_t NUM_ALLOCATIONS = 100; + std::vector ptrs; + + auto pool = makePool(); + ASSERT_NE(pool, nullptr); + + for (size_t i = 0; i < NUM_ALLOCATIONS; ++i) { + auto *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); + ASSERT_NE(ptr, nullptr); + ptrs.push_back(ptr); + } + + for (size_t i = 0; i < NUM_ALLOCATIONS; ++i) { + auto ret = umfPoolFree(pool.get(), ptrs[i]); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + // Now pool can call free during pool destruction + expected_params.keep_all_memory = false; + } + + validation_params_t expected_params; + umf_jemalloc_pool_params_handle_t params; +}; + +TEST_P(umfJemallocPoolParamsTest, allocFree) { allocFreeFlow(); } + +TEST_P(umfJemallocPoolParamsTest, updateParams) { + expected_params.keep_all_memory = !expected_params.keep_all_memory; + umf_result_t ret = umfJemallocPoolParamsSetKeepAllMemory( + params, expected_params.keep_all_memory); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + allocFreeFlow(); +} + +TEST_P(umfJemallocPoolParamsTest, invalidParams) { + umf_result_t ret = umfJemallocPoolParamsCreate(nullptr); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfJemallocPoolParamsSetKeepAllMemory(nullptr, true); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfJemallocPoolParamsSetKeepAllMemory(nullptr, false); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfJemallocPoolParamsDestroy(nullptr); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfJemallocPoolParamsTest); + +/* TODO: enable this test after the issue #903 is fixed. +(https://github.com/oneapi-src/unified-memory-framework/issues/903) +INSTANTIATE_TEST_SUITE_P(jemallocPoolTest, umfJemallocPoolParamsTest, + testing::Values(false, true)); +*/ diff --git a/test/pools/pool_base_alloc.cpp b/test/pools/pool_base_alloc.cpp index ec07a7c2f..7c9a3701a 100644 --- a/test/pools/pool_base_alloc.cpp +++ b/test/pools/pool_base_alloc.cpp @@ -48,4 +48,4 @@ umf_memory_pool_ops_t BA_POOL_OPS = umf::poolMakeCOps(); INSTANTIATE_TEST_SUITE_P(baPool, umfPoolTest, ::testing::Values(poolCreateExtParams{ &BA_POOL_OPS, nullptr, - &umf_test::BASE_PROVIDER_OPS, nullptr})); + &umf_test::BASE_PROVIDER_OPS, nullptr, nullptr})); diff --git a/test/pools/pool_coarse.hpp b/test/pools/pool_coarse.hpp new file mode 100644 index 000000000..7baa612f1 --- /dev/null +++ b/test/pools/pool_coarse.hpp @@ -0,0 +1,18 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef UMF_TEST_POOL_COARSE_HPP +#define UMF_TEST_POOL_COARSE_HPP 1 + +#include "umf/providers/provider_coarse.h" + +#include "pool.hpp" +#include "poolFixtures.hpp" + +using umf_test::test; +using namespace umf_test; + +#define FILE_PATH ((char *)"tmp_file_provider") + +#endif /* UMF_TEST_POOL_COARSE_HPP */ diff --git a/test/pools/scalable_coarse_devdax.cpp b/test/pools/scalable_coarse_devdax.cpp new file mode 100644 index 000000000..1bf77c61c --- /dev/null +++ b/test/pools/scalable_coarse_devdax.cpp @@ -0,0 +1,45 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "umf/pools/pool_scalable.h" +#include "umf/providers/provider_devdax_memory.h" + +#include "pool_coarse.hpp" + +using devdax_params_unique_handle_t = + std::unique_ptr; + +devdax_params_unique_handle_t create_devdax_params() { + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); + if (path == nullptr || path[0] == 0 || size == nullptr || size[0] == 0) { + return devdax_params_unique_handle_t( + nullptr, &umfDevDaxMemoryProviderParamsDestroy); + } + + umf_devdax_memory_provider_params_handle_t params = NULL; + umf_result_t res = + umfDevDaxMemoryProviderParamsCreate(¶ms, path, atol(size)); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create DevDax Memory Provider params"); + } + + return devdax_params_unique_handle_t(params, + &umfDevDaxMemoryProviderParamsDestroy); +} + +auto coarseParams = umfCoarseMemoryProviderParamsDefault(); +auto devdaxParams = create_devdax_params(); + +static std::vector poolParamsList = + devdaxParams.get() + ? std::vector{poolCreateExtParams{ + umfScalablePoolOps(), nullptr, umfDevDaxMemoryProviderOps(), + devdaxParams.get(), &coarseParams}} + : std::vector{}; + +INSTANTIATE_TEST_SUITE_P(scalableCoarseDevDaxTest, umfPoolTest, + ::testing::ValuesIn(poolParamsList)); diff --git a/test/pools/scalable_coarse_file.cpp b/test/pools/scalable_coarse_file.cpp new file mode 100644 index 000000000..b45c112be --- /dev/null +++ b/test/pools/scalable_coarse_file.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "umf/pools/pool_scalable.h" +#include "umf/providers/provider_file_memory.h" + +#include "pool_coarse.hpp" + +using file_params_unique_handle_t = + std::unique_ptr; + +file_params_unique_handle_t get_file_params_default(char *path) { + umf_file_memory_provider_params_handle_t file_params = NULL; + umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create File Memory Provider params"); + } + + return file_params_unique_handle_t(file_params, + &umfFileMemoryProviderParamsDestroy); +} + +auto coarseParams = umfCoarseMemoryProviderParamsDefault(); +file_params_unique_handle_t fileParams = get_file_params_default(FILE_PATH); + +INSTANTIATE_TEST_SUITE_P(scalableCoarseFileTest, umfPoolTest, + ::testing::Values(poolCreateExtParams{ + umfScalablePoolOps(), nullptr, + umfFileMemoryProviderOps(), fileParams.get(), + &coarseParams})); diff --git a/test/pools/scalable_pool.cpp b/test/pools/scalable_pool.cpp index bdd0682f5..3edacd965 100644 --- a/test/pools/scalable_pool.cpp +++ b/test/pools/scalable_pool.cpp @@ -7,9 +7,162 @@ #include "pool.hpp" #include "poolFixtures.hpp" +#include "provider.hpp" + +using os_params_unique_handle_t = + std::unique_ptr; + +os_params_unique_handle_t createOsMemoryProviderParams() { + umf_os_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to create os memory provider params"); + } + + return os_params_unique_handle_t(params, &umfOsMemoryProviderParamsDestroy); +} +auto defaultParams = createOsMemoryProviderParams(); -auto defaultParams = umfOsMemoryProviderParamsDefault(); INSTANTIATE_TEST_SUITE_P(scalablePoolTest, umfPoolTest, ::testing::Values(poolCreateExtParams{ umfScalablePoolOps(), nullptr, - umfOsMemoryProviderOps(), &defaultParams})); + umfOsMemoryProviderOps(), defaultParams.get(), + nullptr})); + +using scalablePoolParams = std::tuple; +struct umfScalablePoolParamsTest + : umf_test::test, + ::testing::WithParamInterface { + + struct validation_params_t { + size_t granularity; + bool keep_all_memory; + }; + + struct provider_validator : public umf_test::provider_ba_global { + using base_provider = umf_test::provider_ba_global; + + umf_result_t initialize(validation_params_t *params) { + EXPECT_NE(params, nullptr); + expected_params = params; + return UMF_RESULT_SUCCESS; + } + umf_result_t alloc(size_t size, size_t align, void **ptr) { + EXPECT_EQ(size, expected_params->granularity); + return base_provider::alloc(size, align, ptr); + } + umf_result_t free(void *ptr, size_t size) { + EXPECT_EQ(expected_params->keep_all_memory, false); + return base_provider::free(ptr, size); + } + + validation_params_t *expected_params; + }; + + static constexpr umf_memory_provider_ops_t VALIDATOR_PROVIDER_OPS = + umf::providerMakeCOps(); + + umfScalablePoolParamsTest() : expected_params{0, false}, params(nullptr) {} + void SetUp() override { + test::SetUp(); + auto [granularity, keep_all_memory] = this->GetParam(); + expected_params.granularity = granularity; + expected_params.keep_all_memory = keep_all_memory; + umf_result_t ret = umfScalablePoolParamsCreate(¶ms); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ret = umfScalablePoolParamsSetGranularity(params, granularity); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ret = umfScalablePoolParamsSetKeepAllMemory(params, keep_all_memory); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + void TearDown() override { + umfScalablePoolParamsDestroy(params); + test::TearDown(); + } + + umf::pool_unique_handle_t makePool() { + umf_memory_provider_handle_t hProvider = nullptr; + umf_memory_pool_handle_t hPool = nullptr; + + auto ret = umfMemoryProviderCreate(&VALIDATOR_PROVIDER_OPS, + &expected_params, &hProvider); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfPoolCreate(umfScalablePoolOps(), hProvider, params, + UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + + return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); + } + + void allocFreeFlow() { + static const size_t ALLOC_SIZE = 128; + static const size_t NUM_ALLOCATIONS = + expected_params.granularity / ALLOC_SIZE * 20; + std::vector ptrs; + + auto pool = makePool(); + ASSERT_NE(pool, nullptr); + + for (size_t i = 0; i < NUM_ALLOCATIONS; ++i) { + auto *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); + ASSERT_NE(ptr, nullptr); + ptrs.push_back(ptr); + } + + for (size_t i = 0; i < NUM_ALLOCATIONS; ++i) { + auto ret = umfPoolFree(pool.get(), ptrs[i]); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + // Now pool can call free during pool destruction + expected_params.keep_all_memory = false; + } + + validation_params_t expected_params; + umf_scalable_pool_params_handle_t params; +}; + +TEST_P(umfScalablePoolParamsTest, allocFree) { allocFreeFlow(); } + +TEST_P(umfScalablePoolParamsTest, updateParams) { + expected_params.granularity *= 2; + umf_result_t ret = umfScalablePoolParamsSetGranularity( + params, expected_params.granularity); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + expected_params.keep_all_memory = !expected_params.keep_all_memory; + ret = umfScalablePoolParamsSetKeepAllMemory( + params, expected_params.keep_all_memory); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + allocFreeFlow(); +} + +TEST_P(umfScalablePoolParamsTest, invalidParams) { + umf_result_t ret = umfScalablePoolParamsCreate(nullptr); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfScalablePoolParamsSetGranularity(nullptr, 2 * 1024 * 1024); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfScalablePoolParamsSetGranularity(params, 0); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfScalablePoolParamsSetKeepAllMemory(nullptr, true); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfScalablePoolParamsSetKeepAllMemory(nullptr, false); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfScalablePoolParamsDestroy(nullptr); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +INSTANTIATE_TEST_SUITE_P( + scalablePoolTest, umfScalablePoolParamsTest, + testing::Combine(testing::Values(2 * 1024 * 1024, 3 * 1024 * 1024, + 4 * 1024 * 1024, 5 * 1024 * 1024), + testing::Values(false, true))); diff --git a/test/provider_coarse.cpp b/test/provider_coarse.cpp new file mode 100644 index 000000000..c2de4c06a --- /dev/null +++ b/test/provider_coarse.cpp @@ -0,0 +1,668 @@ +/* + * Copyright (C) 2023-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include + +#include "provider.hpp" + +#include + +using umf_test::KB; +using umf_test::MB; +using umf_test::test; + +#define GetStats umfCoarseMemoryProviderGetStats + +#define UPSTREAM_NAME "umf_ba_global" +#define BASE_NAME "coarse" +#define COARSE_NAME BASE_NAME " (" UPSTREAM_NAME ")" + +umf_memory_provider_ops_t UMF_MALLOC_MEMORY_PROVIDER_OPS = + umf::providerMakeCOps(); + +struct CoarseWithMemoryStrategyTest + : umf_test::test, + ::testing::WithParamInterface { + void SetUp() override { + test::SetUp(); + allocation_strategy = this->GetParam(); + } + + coarse_memory_provider_strategy_t allocation_strategy; +}; + +INSTANTIATE_TEST_SUITE_P( + CoarseWithMemoryStrategyTest, CoarseWithMemoryStrategyTest, + ::testing::Values(UMF_COARSE_MEMORY_STRATEGY_FASTEST, + UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, + UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE)); + +TEST_F(test, coarseProvider_name_upstream) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result_t umf_result; + + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + const size_t init_buffer_size = 20 * MB; + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.upstream_memory_provider = + malloc_memory_provider; + coarse_memory_provider_params.destroy_upstream_memory_provider = true; + coarse_memory_provider_params.immediate_init_from_upstream = true; + coarse_memory_provider_params.init_buffer = nullptr; + coarse_memory_provider_params.init_buffer_size = init_buffer_size; + + umf_memory_provider_handle_t coarse_memory_provider; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_memory_provider, nullptr); + + size_t minPageSize = 0; + umf_result = umfMemoryProviderGetMinPageSize(coarse_memory_provider, + nullptr, &minPageSize); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_UNKNOWN); + ASSERT_EQ(minPageSize, 0); + + size_t pageSize = 0; + umf_result = umfMemoryProviderGetRecommendedPageSize( + coarse_memory_provider, minPageSize, &pageSize); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_UNKNOWN); + ASSERT_EQ(pageSize, minPageSize); + + ASSERT_EQ( + strcmp(umfMemoryProviderGetName(coarse_memory_provider), COARSE_NAME), + 0); + + umfMemoryProviderDestroy(coarse_memory_provider); + // malloc_memory_provider has already been destroyed + // by umfMemoryProviderDestroy(coarse_memory_provider), because: + // coarse_memory_provider_params.destroy_upstream_memory_provider = true; +} + +TEST_F(test, coarseProvider_name_no_upstream) { + umf_result_t umf_result; + + const size_t init_buffer_size = 20 * MB; + + // preallocate some memory and initialize the vector with zeros + std::vector buffer(init_buffer_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.upstream_memory_provider = nullptr; + coarse_memory_provider_params.immediate_init_from_upstream = false; + coarse_memory_provider_params.init_buffer = buf; + coarse_memory_provider_params.init_buffer_size = init_buffer_size; + + umf_memory_provider_handle_t coarse_memory_provider; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_memory_provider, nullptr); + + size_t minPageSize = 0; + umf_result = umfMemoryProviderGetMinPageSize(coarse_memory_provider, + nullptr, &minPageSize); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GT(minPageSize, 0); + + size_t pageSize = 0; + umf_result = umfMemoryProviderGetRecommendedPageSize( + coarse_memory_provider, minPageSize, &pageSize); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(pageSize, minPageSize); + + ASSERT_EQ( + strcmp(umfMemoryProviderGetName(coarse_memory_provider), BASE_NAME), 0); + + umfMemoryProviderDestroy(coarse_memory_provider); +} + +// negative tests + +TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_null_stats) { + ASSERT_EQ(GetStats(nullptr).alloc_size, 0); + ASSERT_EQ(GetStats(nullptr).used_size, 0); + ASSERT_EQ(GetStats(nullptr).num_upstream_blocks, 0); + ASSERT_EQ(GetStats(nullptr).num_all_blocks, 0); + ASSERT_EQ(GetStats(nullptr).num_free_blocks, 0); +} + +// wrong NULL parameters +TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_NULL_params) { + umf_result_t umf_result; + + umf_memory_provider_handle_t coarse_memory_provider = nullptr; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), nullptr, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_memory_provider, nullptr); +} + +// wrong parameters: given no upstream_memory_provider +// nor init_buffer while exactly one of them must be set +TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_0) { + umf_result_t umf_result; + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.allocation_strategy = allocation_strategy; + coarse_memory_provider_params.upstream_memory_provider = nullptr; + coarse_memory_provider_params.immediate_init_from_upstream = false; + coarse_memory_provider_params.init_buffer = nullptr; + coarse_memory_provider_params.init_buffer_size = 0; + + umf_memory_provider_handle_t coarse_memory_provider = nullptr; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_memory_provider, nullptr); +} + +// wrong parameters: given both an upstream_memory_provider +// and an init_buffer while only one of them is allowed +TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_1) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result_t umf_result; + + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + const size_t init_buffer_size = 20 * MB; + + // preallocate some memory and initialize the vector with zeros + std::vector buffer(init_buffer_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.allocation_strategy = allocation_strategy; + coarse_memory_provider_params.upstream_memory_provider = + malloc_memory_provider; + coarse_memory_provider_params.immediate_init_from_upstream = true; + coarse_memory_provider_params.init_buffer = buf; + coarse_memory_provider_params.init_buffer_size = init_buffer_size; + + umf_memory_provider_handle_t coarse_memory_provider = nullptr; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_memory_provider, nullptr); + + umfMemoryProviderDestroy(malloc_memory_provider); +} + +// wrong parameters: init_buffer_size must not equal 0 when immediate_init_from_upstream is true +TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_2) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result_t umf_result; + + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.allocation_strategy = allocation_strategy; + coarse_memory_provider_params.upstream_memory_provider = + malloc_memory_provider; + coarse_memory_provider_params.immediate_init_from_upstream = true; + coarse_memory_provider_params.init_buffer = nullptr; + coarse_memory_provider_params.init_buffer_size = 0; + + umf_memory_provider_handle_t coarse_memory_provider = nullptr; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_memory_provider, nullptr); + + umfMemoryProviderDestroy(malloc_memory_provider); +} + +// wrong parameters: init_buffer_size must not equal 0 when init_buffer is not NULL +TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_3) { + umf_result_t umf_result; + + const size_t init_buffer_size = 20 * MB; + + // preallocate some memory and initialize the vector with zeros + std::vector buffer(init_buffer_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.allocation_strategy = allocation_strategy; + coarse_memory_provider_params.upstream_memory_provider = nullptr; + coarse_memory_provider_params.immediate_init_from_upstream = false; + coarse_memory_provider_params.init_buffer = buf; + coarse_memory_provider_params.init_buffer_size = 0; + + umf_memory_provider_handle_t coarse_memory_provider = nullptr; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_memory_provider, nullptr); +} + +// wrong parameters: init_buffer_size must equal 0 when init_buffer is NULL and immediate_init_from_upstream is false +TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_4) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result_t umf_result; + + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.allocation_strategy = allocation_strategy; + coarse_memory_provider_params.upstream_memory_provider = + malloc_memory_provider; + coarse_memory_provider_params.immediate_init_from_upstream = false; + coarse_memory_provider_params.init_buffer = NULL; + coarse_memory_provider_params.init_buffer_size = 20 * MB; + + umf_memory_provider_handle_t coarse_memory_provider = nullptr; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_memory_provider, nullptr); + + umfMemoryProviderDestroy(malloc_memory_provider); +} + +// wrong parameters: destroy_upstream_memory_provider is true, but an upstream provider is not provided +TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_5) { + umf_result_t umf_result; + + const size_t init_buffer_size = 20 * MB; + + // preallocate some memory and initialize the vector with zeros + std::vector buffer(init_buffer_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.allocation_strategy = allocation_strategy; + coarse_memory_provider_params.upstream_memory_provider = nullptr; + coarse_memory_provider_params.destroy_upstream_memory_provider = true; + coarse_memory_provider_params.immediate_init_from_upstream = false; + coarse_memory_provider_params.init_buffer = buf; + coarse_memory_provider_params.init_buffer_size = init_buffer_size; + + umf_memory_provider_handle_t coarse_memory_provider = nullptr; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_memory_provider, nullptr); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_split_merge) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result_t umf_result; + + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + const size_t init_buffer_size = 20 * MB; + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.upstream_memory_provider = + malloc_memory_provider; + coarse_memory_provider_params.immediate_init_from_upstream = true; + coarse_memory_provider_params.init_buffer = NULL; + coarse_memory_provider_params.init_buffer_size = init_buffer_size; + + umf_memory_provider_handle_t coarse_memory_provider; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_memory_provider, nullptr); + + umf_memory_provider_handle_t cp = coarse_memory_provider; + char *ptr = nullptr; + + ASSERT_EQ(GetStats(cp).used_size, 0 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 1); + + /* test umfMemoryProviderAllocationSplit */ + umf_result = umfMemoryProviderAlloc(cp, 2 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(GetStats(cp).used_size, 2 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 2); + + umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 2 * MB, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(cp).used_size, 2 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 3); + + umf_result = umfMemoryProviderFree(cp, (ptr + 1 * MB), 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(cp).used_size, 1 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 2); + + umf_result = umfMemoryProviderFree(cp, ptr, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(cp).used_size, 0); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 1); + + /* test umfMemoryProviderAllocationMerge */ + umf_result = umfMemoryProviderAlloc(cp, 2 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(GetStats(cp).used_size, 2 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 2); + + umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 2 * MB, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(cp).used_size, 2 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 3); + + umf_result = + umfMemoryProviderAllocationMerge(cp, ptr, (ptr + 1 * MB), 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(cp).used_size, 2 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 2); + + umf_result = umfMemoryProviderFree(cp, ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(cp).used_size, 0); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 1); + + umfMemoryProviderDestroy(coarse_memory_provider); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_split_merge_negative) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result_t umf_result; + + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + const size_t init_buffer_size = 20 * MB; + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.upstream_memory_provider = + malloc_memory_provider; + coarse_memory_provider_params.immediate_init_from_upstream = true; + coarse_memory_provider_params.init_buffer = NULL; + coarse_memory_provider_params.init_buffer_size = init_buffer_size; + + umf_memory_provider_handle_t coarse_memory_provider; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_memory_provider, nullptr); + + umf_memory_provider_handle_t cp = coarse_memory_provider; + char *ptr = nullptr; + + ASSERT_EQ(GetStats(cp).used_size, 0 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 1); + + /* test umfMemoryProviderAllocationSplit */ + umf_result = umfMemoryProviderAlloc(cp, 6 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(GetStats(cp).used_size, 6 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 2); + + // firstSize >= totalSize + umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 6 * MB, 6 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // firstSize == 0 + umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 6 * MB, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // wrong totalSize + umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 5 * MB, 1 * KB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + /* test umfMemoryProviderAllocationMerge */ + // split (6 * MB) block into (1 * MB) + (5 * MB) + umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 6 * MB, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(cp).used_size, 6 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 3); + + // split (5 * MB) block into (2 * MB) + (3 * MB) + umf_result = + umfMemoryProviderAllocationSplit(cp, (ptr + 1 * MB), 5 * MB, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(cp).used_size, 6 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 4); + + // now we have 3 blocks: (1 * MB) + (2 * MB) + (3 * MB) + + // highPtr <= lowPtr + umf_result = + umfMemoryProviderAllocationMerge(cp, (ptr + 1 * MB), ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // highPtr - lowPtr >= totalSize + umf_result = + umfMemoryProviderAllocationMerge(cp, ptr, (ptr + 1 * MB), 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // low_block->size + high_block->size != totalSize + umf_result = + umfMemoryProviderAllocationMerge(cp, ptr, (ptr + 1 * MB), 5 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // not adjacent blocks + umf_result = + umfMemoryProviderAllocationMerge(cp, ptr, (ptr + 3 * MB), 4 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = umfMemoryProviderFree(cp, ptr, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(cp).used_size, 5 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 4); + + umf_result = umfMemoryProviderFree(cp, (ptr + 1 * MB), 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(cp).used_size, 3 * MB); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 3); + + umf_result = umfMemoryProviderFree(cp, (ptr + 3 * MB), 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(GetStats(cp).used_size, 0); + ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); + ASSERT_EQ(GetStats(cp).num_all_blocks, 1); + + umfMemoryProviderDestroy(coarse_memory_provider); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_purge_no_upstream) { + umf_result_t umf_result; + + const size_t init_buffer_size = 20 * MB; + + // preallocate some memory and initialize the vector with zeros + std::vector buffer(init_buffer_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.allocation_strategy = allocation_strategy; + coarse_memory_provider_params.upstream_memory_provider = nullptr; + coarse_memory_provider_params.immediate_init_from_upstream = false; + coarse_memory_provider_params.init_buffer = buf; + coarse_memory_provider_params.init_buffer_size = init_buffer_size; + + umf_memory_provider_handle_t coarse_memory_provider = nullptr; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_memory_provider, nullptr); + + // umfMemoryProviderPurgeLazy + // provider == NULL + umf_result = umfMemoryProviderPurgeLazy(nullptr, (void *)0x01, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // ptr == NULL + umf_result = umfMemoryProviderPurgeLazy(coarse_memory_provider, nullptr, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // no upstream_memory_provider + umf_result = + umfMemoryProviderPurgeLazy(coarse_memory_provider, (void *)0x01, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + // umfMemoryProviderPurgeForce + // provider == NULL + umf_result = umfMemoryProviderPurgeForce(nullptr, (void *)0x01, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // ptr == NULL + umf_result = + umfMemoryProviderPurgeForce(coarse_memory_provider, nullptr, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // no upstream_memory_provider + umf_result = + umfMemoryProviderPurgeForce(coarse_memory_provider, (void *)0x01, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umfMemoryProviderDestroy(coarse_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_purge_with_upstream) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result_t umf_result; + + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + const size_t init_buffer_size = 20 * MB; + + coarse_memory_provider_params_t coarse_memory_provider_params; + // make sure there are no undefined members - prevent a UB + memset(&coarse_memory_provider_params, 0, + sizeof(coarse_memory_provider_params)); + coarse_memory_provider_params.upstream_memory_provider = + malloc_memory_provider; + coarse_memory_provider_params.immediate_init_from_upstream = true; + coarse_memory_provider_params.init_buffer = NULL; + coarse_memory_provider_params.init_buffer_size = init_buffer_size; + + umf_memory_provider_handle_t coarse_memory_provider; + umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_memory_provider, nullptr); + + // umfMemoryProviderPurgeLazy + // provider == NULL + umf_result = umfMemoryProviderPurgeLazy(nullptr, (void *)0x01, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // ptr == NULL + umf_result = umfMemoryProviderPurgeLazy(coarse_memory_provider, nullptr, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // malloc_memory_provider returns UMF_RESULT_ERROR_UNKNOWN + umf_result = + umfMemoryProviderPurgeLazy(coarse_memory_provider, (void *)0x01, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_UNKNOWN); + + // umfMemoryProviderPurgeForce + // provider == NULL + umf_result = umfMemoryProviderPurgeForce(nullptr, (void *)0x01, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // ptr == NULL + umf_result = + umfMemoryProviderPurgeForce(coarse_memory_provider, nullptr, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // malloc_memory_provider returns UMF_RESULT_ERROR_UNKNOWN + umf_result = + umfMemoryProviderPurgeForce(coarse_memory_provider, (void *)0x01, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_UNKNOWN); + + umfMemoryProviderDestroy(coarse_memory_provider); + umfMemoryProviderDestroy(malloc_memory_provider); +} diff --git a/test/provider_devdax_memory.cpp b/test/provider_devdax_memory.cpp new file mode 100644 index 000000000..0fd0705da --- /dev/null +++ b/test/provider_devdax_memory.cpp @@ -0,0 +1,485 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef _WIN32 +#include "test_helpers_linux.h" +#include +#include +#include +#endif + +#include "base.hpp" + +#include "cpp_helpers.hpp" +#include "test_helpers.h" + +#include +#include + +using umf_test::test; + +#define INVALID_PTR ((void *)0x01) + +typedef enum purge_t { + PURGE_NONE = 0, + PURGE_LAZY = 1, + PURGE_FORCE = 2, +} purge_t; + +static const char *Native_error_str[] = { + "success", // UMF_DEVDAX_RESULT_SUCCESS + "memory allocation failed", // UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED + "allocated address is not aligned", // UMF_DEVDAX_RESULT_ERROR_ADDRESS_NOT_ALIGNED + "memory deallocation failed", // UMF_DEVDAX_RESULT_ERROR_FREE_FAILED + "force purging failed", // UMF_DEVDAX_RESULT_ERROR_PURGE_FORCE_FAILED +}; + +// test helpers + +static int compare_native_error_str(const char *message, int error) { + const char *error_str = Native_error_str[error - UMF_DEVDAX_RESULT_SUCCESS]; + size_t len = strlen(error_str); + return strncmp(message, error_str, len); +} + +using providerCreateExtParams = std::tuple; + +static void providerCreateExt(providerCreateExtParams params, + umf::provider_unique_handle_t *handle) { + umf_memory_provider_handle_t hProvider = nullptr; + auto [provider_ops, provider_params] = params; + + auto ret = + umfMemoryProviderCreate(provider_ops, provider_params, &hProvider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider, nullptr); + + *handle = + umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); +} + +struct umfProviderTest + : umf_test::test, + ::testing::WithParamInterface { + void SetUp() override { + test::SetUp(); + providerCreateExt(this->GetParam(), &provider); + umf_result_t umf_result = umfMemoryProviderGetMinPageSize( + provider.get(), nullptr, &page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + page_plus_64 = page_size + 64; + } + + void TearDown() override { test::TearDown(); } + + umf::provider_unique_handle_t provider; + size_t page_size; + size_t page_plus_64; +}; + +static void test_alloc_free_success(umf_memory_provider_handle_t provider, + size_t size, size_t alignment, + purge_t purge) { + void *ptr = nullptr; + + umf_result_t umf_result = + umfMemoryProviderAlloc(provider, size, alignment, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + memset(ptr, 0xFF, size); + + if (purge == PURGE_LAZY) { + umf_result = umfMemoryProviderPurgeLazy(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + } else if (purge == PURGE_FORCE) { + umf_result = umfMemoryProviderPurgeForce(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + umf_result = umfMemoryProviderFree(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +static void verify_last_native_error(umf_memory_provider_handle_t provider, + int32_t err) { + const char *message; + int32_t error; + umfMemoryProviderGetLastNativeError(provider, &message, &error); + ASSERT_EQ(error, err); + ASSERT_EQ(compare_native_error_str(message, error), 0); +} + +static void test_alloc_failure(umf_memory_provider_handle_t provider, + size_t size, size_t alignment, + umf_result_t result, int32_t err) { + void *ptr = nullptr; + umf_result_t umf_result = + umfMemoryProviderAlloc(provider, size, alignment, &ptr); + ASSERT_EQ(umf_result, result); + ASSERT_EQ(ptr, nullptr); + + if (umf_result == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) { + verify_last_native_error(provider, err); + } +} + +// TESTS + +// Test checking if devdax was mapped with the MAP_SYNC flag: +TEST_F(test, test_if_mapped_with_MAP_SYNC) { + umf_memory_provider_handle_t hProvider = nullptr; + umf_result_t umf_result; + + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + if (path == nullptr || path[0] == 0) { + GTEST_SKIP() << "Test skipped, UMF_TESTS_DEVDAX_PATH is not set"; + } + + char *size_str = getenv("UMF_TESTS_DEVDAX_SIZE"); + if (size_str == nullptr || size_str[0] == 0) { + GTEST_SKIP() << "Test skipped, UMF_TESTS_DEVDAX_SIZE is not set"; + } + + size_t size = atol(size_str); + umf_devdax_memory_provider_params_handle_t params = NULL; + umf_result = umfDevDaxMemoryProviderParamsCreate(¶ms, path, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + umf_result = umfMemoryProviderCreate(umfDevDaxMemoryProviderOps(), params, + &hProvider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider, nullptr); + + char *buf; + umf_result = umfMemoryProviderAlloc(hProvider, size, 0, (void **)&buf); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(buf, nullptr); + + bool flag_found = is_mapped_with_MAP_SYNC(path, buf, size); + + umf_result = umfMemoryProviderFree(hProvider, buf, size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umfMemoryProviderDestroy(hProvider); + + // fail test if the "sf" flag was not found + ASSERT_EQ(flag_found, true); +} + +// positive tests using test_alloc_free_success + +using devdax_params_unique_handle_t = + std::unique_ptr; + +devdax_params_unique_handle_t create_devdax_params() { + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); + if (path == nullptr || path[0] == 0 || size == nullptr || size[0] == 0) { + return devdax_params_unique_handle_t( + nullptr, &umfDevDaxMemoryProviderParamsDestroy); + } + + umf_devdax_memory_provider_params_handle_t params = NULL; + umf_result_t res = + umfDevDaxMemoryProviderParamsCreate(¶ms, path, atol(size)); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create DevDax Memory Provider params"); + } + + return devdax_params_unique_handle_t(params, + &umfDevDaxMemoryProviderParamsDestroy); +} + +auto defaultDevDaxParams = create_devdax_params(); + +static std::vector devdaxProviderTestParamsList = + defaultDevDaxParams.get() + ? std::vector{providerCreateExtParams{ + umfDevDaxMemoryProviderOps(), defaultDevDaxParams.get()}} + : std::vector{}; + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfProviderTest); + +INSTANTIATE_TEST_SUITE_P(devdaxProviderTest, umfProviderTest, + ::testing::ValuesIn(devdaxProviderTestParamsList)); + +TEST_P(umfProviderTest, create_destroy) {} + +TEST_P(umfProviderTest, alloc_page_align_0) { + test_alloc_free_success(provider.get(), page_size, 0, PURGE_NONE); +} + +TEST_P(umfProviderTest, alloc_2page_align_page_size) { + test_alloc_free_success(provider.get(), 2 * page_size, page_size, + PURGE_NONE); +} + +TEST_P(umfProviderTest, alloc_page64_align_page_div_2) { + test_alloc_free_success(provider.get(), page_plus_64, page_size / 2, + PURGE_NONE); +} + +TEST_P(umfProviderTest, purge_lazy) { + test_alloc_free_success(provider.get(), page_size, 0, PURGE_LAZY); +} + +TEST_P(umfProviderTest, purge_force) { + test_alloc_free_success(provider.get(), page_size, 0, PURGE_FORCE); +} + +// negative tests using test_alloc_failure + +TEST_P(umfProviderTest, alloc_page64_align_page_minus_1_WRONG_ALIGNMENT_1) { + test_alloc_failure(provider.get(), page_plus_64, page_size - 1, + UMF_RESULT_ERROR_INVALID_ARGUMENT, 0); +} + +TEST_P(umfProviderTest, alloc_page64_align_one_half_pages_WRONG_ALIGNMENT_2) { + test_alloc_failure(provider.get(), page_plus_64, + page_size + (page_size / 2), + UMF_RESULT_ERROR_INVALID_ARGUMENT, 0); +} + +TEST_P(umfProviderTest, alloc_page64_WRONG_ALIGNMENT_3_pages) { + test_alloc_failure(provider.get(), page_plus_64, 3 * page_size, + UMF_RESULT_ERROR_INVALID_ARGUMENT, 0); +} + +TEST_P(umfProviderTest, alloc_3_pages_WRONG_ALIGNMENT_3_pages) { + test_alloc_failure(provider.get(), 3 * page_size, 3 * page_size, + UMF_RESULT_ERROR_INVALID_ARGUMENT, 0); +} + +TEST_P(umfProviderTest, alloc_WRONG_SIZE) { + size_t size = (size_t)(-1) & ~(page_size - 1); + test_alloc_failure(provider.get(), size, 0, + UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC, + UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED); +} + +// other positive tests + +TEST_P(umfProviderTest, get_min_page_size) { + size_t min_page_size; + umf_result_t umf_result = umfMemoryProviderGetMinPageSize( + provider.get(), nullptr, &min_page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_LE(min_page_size, page_size); +} + +TEST_P(umfProviderTest, get_recommended_page_size) { + size_t min_page_size; + umf_result_t umf_result = umfMemoryProviderGetMinPageSize( + provider.get(), nullptr, &min_page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_LE(min_page_size, page_size); + + size_t recommended_page_size; + umf_result = umfMemoryProviderGetRecommendedPageSize( + provider.get(), 0, &recommended_page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(recommended_page_size, min_page_size); +} + +TEST_P(umfProviderTest, get_name) { + const char *name = umfMemoryProviderGetName(provider.get()); + ASSERT_STREQ(name, "DEVDAX"); +} + +TEST_P(umfProviderTest, free_size_0_ptr_not_null) { + umf_result_t umf_result = + umfMemoryProviderFree(provider.get(), INVALID_PTR, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +TEST_P(umfProviderTest, free_NULL) { + umf_result_t umf_result = umfMemoryProviderFree(provider.get(), nullptr, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +// other negative tests + +TEST_P(umfProviderTest, free_INVALID_POINTER_SIZE_GT_0) { + umf_result_t umf_result = + umfMemoryProviderFree(provider.get(), INVALID_PTR, page_plus_64); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +TEST_P(umfProviderTest, purge_lazy_INVALID_POINTER) { + umf_result_t umf_result = + umfMemoryProviderPurgeLazy(provider.get(), INVALID_PTR, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +TEST_P(umfProviderTest, purge_force_INVALID_POINTER) { + umf_result_t umf_result = + umfMemoryProviderPurgeForce(provider.get(), INVALID_PTR, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC); + + verify_last_native_error(provider.get(), + UMF_DEVDAX_RESULT_ERROR_PURGE_FORCE_FAILED); +} + +// params tests + +TEST_F(test, params_protection_flag) { + umf_devdax_memory_provider_params_handle_t params = nullptr; + umf_result_t ret = + umfDevDaxMemoryProviderParamsCreate(¶ms, "/dev/dax0.0", 4096); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + //test all valid combinations + for (unsigned protection = UMF_PROTECTION_NONE; + protection < (UMF_PROTECTION_MAX - 1) << 1; ++protection) { + ret = umfDevDaxMemoryProviderParamsSetProtection(params, protection); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + umfDevDaxMemoryProviderParamsDestroy(params); +} + +// negative params tests + +TEST_F(test, params_invalid_protection_flag) { + umf_devdax_memory_provider_params_handle_t params = nullptr; + umf_result_t ret = + umfDevDaxMemoryProviderParamsCreate(¶ms, "/dev/dax0.0", 4096); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + ret = umfDevDaxMemoryProviderParamsSetProtection(params, 0); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + for (unsigned protection = UMF_PROTECTION_NONE; + protection < (UMF_PROTECTION_MAX - 1) << 1; ++protection) { + unsigned invalid_protection = protection | (UMF_PROTECTION_MAX << 1); + ret = umfDevDaxMemoryProviderParamsSetProtection(params, + invalid_protection); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + } + + umfDevDaxMemoryProviderParamsDestroy(params); +} + +TEST_F(test, params_null_handle) { + auto ret = + umfDevDaxMemoryProviderParamsCreate(nullptr, "/dev/dax0.0", 4096); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfDevDaxMemoryProviderParamsDestroy(nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = + umfDevDaxMemoryProviderParamsSetDeviceDax(nullptr, "/dev/dax0.0", 4096); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfDevDaxMemoryProviderParamsSetProtection(nullptr, 1); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_F(test, create_empty_path) { + const char *path = ""; + umf_devdax_memory_provider_params_handle_t wrong_params = NULL; + auto ret = umfDevDaxMemoryProviderParamsCreate(&wrong_params, path, 4096); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(wrong_params, nullptr); +} + +TEST_F(test, create_null_path) { + const char *path = nullptr; + umf_devdax_memory_provider_params_handle_t wrong_params = NULL; + auto ret = umfDevDaxMemoryProviderParamsCreate(&wrong_params, path, 4096); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(wrong_params, nullptr); +} + +TEST_F(test, set_empty_path) { + const char *path = "tmp"; + const char *empty_path = ""; + umf_devdax_memory_provider_params_handle_t params = NULL; + auto ret = umfDevDaxMemoryProviderParamsCreate(¶ms, path, 4096); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + ret = umfDevDaxMemoryProviderParamsSetDeviceDax(params, empty_path, 4096); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfDevDaxMemoryProviderParamsDestroy(params); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); +} + +TEST_F(test, set_null_path) { + const char *path = "tmp"; + const char *null_path = nullptr; + umf_devdax_memory_provider_params_handle_t params = NULL; + auto ret = umfDevDaxMemoryProviderParamsCreate(¶ms, path, 4096); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + ret = umfDevDaxMemoryProviderParamsSetDeviceDax(params, null_path, 4096); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + ret = umfDevDaxMemoryProviderParamsDestroy(params); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); +} + +TEST_F(test, create_wrong_path) { + umf_memory_provider_handle_t hProvider = nullptr; + const char *path = "/tmp/dev/dax0.0"; + umf_devdax_memory_provider_params_handle_t wrong_params = nullptr; + + auto ret = umfDevDaxMemoryProviderParamsCreate(&wrong_params, path, 4096); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(wrong_params, nullptr); + + ret = umfMemoryProviderCreate(umfDevDaxMemoryProviderOps(), wrong_params, + &hProvider); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(hProvider, nullptr); + + ret = umfDevDaxMemoryProviderParamsDestroy(wrong_params); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); +} + +TEST_F(test, create_wrong_path_not_exist) { + umf_memory_provider_handle_t hProvider = nullptr; + const char *path = "/dev/dax1.1"; + umf_devdax_memory_provider_params_handle_t wrong_params = nullptr; + + auto ret = umfDevDaxMemoryProviderParamsCreate(&wrong_params, path, 4096); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(wrong_params, nullptr); + + ret = umfMemoryProviderCreate(umfDevDaxMemoryProviderOps(), wrong_params, + &hProvider); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(hProvider, nullptr); + + ret = umfDevDaxMemoryProviderParamsDestroy(wrong_params); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); +} + +TEST_F(test, create_wrong_size_0) { + umf_memory_provider_handle_t hProvider = nullptr; + const char *path = "/dev/dax0.0"; + umf_devdax_memory_provider_params_handle_t wrong_params = nullptr; + + auto ret = umfDevDaxMemoryProviderParamsCreate(&wrong_params, path, 0); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(wrong_params, nullptr); + + ret = umfMemoryProviderCreate(umfDevDaxMemoryProviderOps(), wrong_params, + &hProvider); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(hProvider, nullptr); + + ret = umfDevDaxMemoryProviderParamsDestroy(wrong_params); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); +} diff --git a/test/provider_devdax_memory_ipc.cpp b/test/provider_devdax_memory_ipc.cpp new file mode 100644 index 000000000..3941f66e9 --- /dev/null +++ b/test/provider_devdax_memory_ipc.cpp @@ -0,0 +1,73 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#ifdef UMF_POOL_JEMALLOC_ENABLED +#include +#endif +#ifdef UMF_POOL_SCALABLE_ENABLED +#include +#endif + +#include "ipcFixtures.hpp" + +using umf_test::test; + +using devdax_params_unique_handle_t = + std::unique_ptr; + +devdax_params_unique_handle_t create_devdax_params() { + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); + if (path == nullptr || path[0] == 0 || size == nullptr || size[0] == 0) { + return devdax_params_unique_handle_t( + nullptr, &umfDevDaxMemoryProviderParamsDestroy); + } + + umf_devdax_memory_provider_params_handle_t params = NULL; + umf_result_t res = + umfDevDaxMemoryProviderParamsCreate(¶ms, path, atol(size)); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create DevDax Memory Provider params"); + } + + return devdax_params_unique_handle_t(params, + &umfDevDaxMemoryProviderParamsDestroy); +} + +auto defaultDevDaxParams = create_devdax_params(); + +HostMemoryAccessor hostAccessor; + +static std::vector getIpcProxyPoolTestParamsList(void) { + std::vector ipcProxyPoolTestParamsList = {}; + + if (!defaultDevDaxParams.get()) { + // return empty list to skip the test + return ipcProxyPoolTestParamsList; + } + + ipcProxyPoolTestParamsList = { + {umfProxyPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), + defaultDevDaxParams.get(), &hostAccessor, true}, +#ifdef UMF_POOL_JEMALLOC_ENABLED + {umfJemallocPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), + defaultDevDaxParams.get(), &hostAccessor, false}, +#endif +#ifdef UMF_POOL_SCALABLE_ENABLED + {umfScalablePoolOps(), nullptr, umfDevDaxMemoryProviderOps(), + defaultDevDaxParams.get(), &hostAccessor, false}, +#endif + }; + + return ipcProxyPoolTestParamsList; +} + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); + +INSTANTIATE_TEST_SUITE_P(DevDaxProviderDifferentPoolsTest, umfIpcTest, + ::testing::ValuesIn(getIpcProxyPoolTestParamsList())); diff --git a/test/provider_devdax_memory_not_impl.cpp b/test/provider_devdax_memory_not_impl.cpp new file mode 100644 index 000000000..3b97443a0 --- /dev/null +++ b/test/provider_devdax_memory_not_impl.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "base.hpp" + +#include + +using umf_test::test; + +TEST_F(test, devdax_provider_not_implemented) { + umf_devdax_memory_provider_params_handle_t params = nullptr; + umf_result_t umf_result = + umfDevDaxMemoryProviderParamsCreate(¶ms, "path", 4096); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + EXPECT_EQ(params, nullptr); + + umf_result = umfDevDaxMemoryProviderParamsDestroy(nullptr); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_result = + umfDevDaxMemoryProviderParamsSetDeviceDax(nullptr, "path", 4096); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_result = umfDevDaxMemoryProviderParamsSetProtection(nullptr, 0); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_memory_provider_ops_t *ops = umfDevDaxMemoryProviderOps(); + EXPECT_EQ(ops, nullptr); +} diff --git a/test/provider_file_memory.cpp b/test/provider_file_memory.cpp new file mode 100644 index 000000000..d3124aa11 --- /dev/null +++ b/test/provider_file_memory.cpp @@ -0,0 +1,556 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "base.hpp" + +#include "cpp_helpers.hpp" +#include "test_helpers.h" +#ifndef _WIN32 +#include "test_helpers_linux.h" +#endif + +#include +#include + +using umf_test::test; + +#define FILE_PATH ((char *)"tmp_file") +#define INVALID_PTR ((void *)0x01) + +typedef enum purge_t { + PURGE_NONE = 0, + PURGE_LAZY = 1, + PURGE_FORCE = 2, +} purge_t; + +static const char *Native_error_str[] = { + "success", // UMF_FILE_RESULT_SUCCESS + "memory allocation failed", // UMF_FILE_RESULT_ERROR_ALLOC_FAILED + "memory deallocation failed", // UMF_FILE_RESULT_ERROR_FREE_FAILED + "force purging failed", // UMF_FILE_RESULT_ERROR_PURGE_FORCE_FAILED +}; + +// test helpers + +static int compare_native_error_str(const char *message, int error) { + const char *error_str = Native_error_str[error - UMF_FILE_RESULT_SUCCESS]; + size_t len = strlen(error_str); + return strncmp(message, error_str, len); +} + +using providerCreateExtParams = std::tuple; + +static void providerCreateExt(providerCreateExtParams params, + umf::provider_unique_handle_t *handle) { + umf_memory_provider_handle_t hProvider = nullptr; + auto [provider_ops, provider_params] = params; + + auto ret = + umfMemoryProviderCreate(provider_ops, provider_params, &hProvider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider, nullptr); + + *handle = + umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); +} + +struct FileProviderParamsDefault + : umf_test::test, + ::testing::WithParamInterface { + void SetUp() override { + test::SetUp(); + providerCreateExt(this->GetParam(), &provider); + umf_result_t umf_result = + umfMemoryProviderGetMinPageSize(provider.get(), NULL, &page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + page_plus_64 = page_size + 64; + } + + void TearDown() override { test::TearDown(); } + + umf::provider_unique_handle_t provider; + size_t page_size; + size_t page_plus_64; +}; + +struct FileProviderParamsShared : FileProviderParamsDefault {}; + +static void test_alloc_free_success(umf_memory_provider_handle_t provider, + size_t size, size_t alignment, + purge_t purge) { + void *ptr = nullptr; + + umf_result_t umf_result = + umfMemoryProviderAlloc(provider, size, alignment, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + memset(ptr, 0xFF, size); + + if (purge == PURGE_LAZY) { + umf_result = umfMemoryProviderPurgeLazy(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + } else if (purge == PURGE_FORCE) { + umf_result = umfMemoryProviderPurgeForce(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + umf_result = umfMemoryProviderFree(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +static void verify_last_native_error(umf_memory_provider_handle_t provider, + int32_t err) { + const char *message; + int32_t error; + umfMemoryProviderGetLastNativeError(provider, &message, &error); + ASSERT_EQ(error, err); + ASSERT_EQ(compare_native_error_str(message, error), 0); +} + +static void test_alloc_failure(umf_memory_provider_handle_t provider, + size_t size, size_t alignment, + umf_result_t result, int32_t err) { + void *ptr = nullptr; + umf_result_t umf_result = + umfMemoryProviderAlloc(provider, size, alignment, &ptr); + ASSERT_EQ(umf_result, result); + ASSERT_EQ(ptr, nullptr); + + if (umf_result == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) { + verify_last_native_error(provider, err); + } +} + +// TESTS + +// Test checking if FSDAX was mapped with the MAP_SYNC flag: +TEST_F(test, test_if_mapped_with_MAP_SYNC) { + umf_memory_provider_handle_t hProvider = nullptr; + umf_result_t umf_result; + + char *path = getenv("UMF_TESTS_FSDAX_PATH"); + if (path == nullptr || path[0] == 0) { + GTEST_SKIP() << "Test skipped, UMF_TESTS_FSDAX_PATH is not set"; + } + + umf_file_memory_provider_params_handle_t params = nullptr; + umf_result = umfFileMemoryProviderParamsCreate(¶ms, path); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + umf_result = + umfFileMemoryProviderParamsSetVisibility(params, UMF_MEM_MAP_SHARED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = + umfMemoryProviderCreate(umfFileMemoryProviderOps(), params, &hProvider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider, nullptr); + + char *buf; + size_t size = 2 * 1024 * 1024; // 2MB + umf_result = umfMemoryProviderAlloc(hProvider, size, 0, (void **)&buf); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(buf, nullptr); + + bool flag_found = is_mapped_with_MAP_SYNC(path, buf, size); + + umf_result = umfMemoryProviderFree(hProvider, buf, size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umfMemoryProviderDestroy(hProvider); + + // fail test if the "sf" flag was not found + ASSERT_EQ(flag_found, true); +} + +// positive tests using test_alloc_free_success + +using file_params_unique_handle_t = + std::unique_ptr; + +file_params_unique_handle_t get_file_params_default(char *path) { + umf_file_memory_provider_params_handle_t file_params = NULL; + umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create File Memory Provider params"); + } + + return file_params_unique_handle_t(file_params, + &umfFileMemoryProviderParamsDestroy); +} + +file_params_unique_handle_t file_params_default = + get_file_params_default(FILE_PATH); + +file_params_unique_handle_t get_file_params_shared(char *path) { + umf_file_memory_provider_params_handle_t file_params = NULL; + umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create File Memory Provider params"); + } + + res = umfFileMemoryProviderParamsSetVisibility(file_params, + UMF_MEM_MAP_SHARED); + if (res != UMF_RESULT_SUCCESS) { + umfFileMemoryProviderParamsDestroy(file_params); + throw std::runtime_error("Failed to set visibility to shared for File " + "Memory Provider params"); + } + + return file_params_unique_handle_t(file_params, + &umfFileMemoryProviderParamsDestroy); +} + +file_params_unique_handle_t file_params_shared = + get_file_params_shared(FILE_PATH); + +INSTANTIATE_TEST_SUITE_P(fileProviderTest, FileProviderParamsDefault, + ::testing::Values(providerCreateExtParams{ + umfFileMemoryProviderOps(), + file_params_default.get()})); + +TEST_P(FileProviderParamsDefault, create_destroy) {} + +TEST_P(FileProviderParamsDefault, two_allocations) { + umf_result_t umf_result; + void *ptr1 = nullptr; + void *ptr2 = nullptr; + size_t size = page_plus_64; + size_t alignment = page_size; + + umf_result = umfMemoryProviderAlloc(provider.get(), size, alignment, &ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); + + umf_result = umfMemoryProviderAlloc(provider.get(), size, alignment, &ptr2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr2, nullptr); + + ASSERT_NE(ptr1, ptr2); + if ((uintptr_t)ptr1 > (uintptr_t)ptr2) { + ASSERT_GT((uintptr_t)ptr1 - (uintptr_t)ptr2, size); + } else { + ASSERT_GT((uintptr_t)ptr2 - (uintptr_t)ptr1, size); + } + + memset(ptr1, 0x11, size); + memset(ptr2, 0x22, size); + + umf_result = umfMemoryProviderFree(provider.get(), ptr1, size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_result = umfMemoryProviderFree(provider.get(), ptr2, size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +TEST_P(FileProviderParamsDefault, alloc_page64_align_0) { + test_alloc_free_success(provider.get(), page_plus_64, 0, PURGE_NONE); +} + +TEST_P(FileProviderParamsDefault, alloc_page64_align_page_div_2) { + test_alloc_free_success(provider.get(), page_plus_64, page_size / 2, + PURGE_NONE); +} + +TEST_P(FileProviderParamsDefault, purge_lazy) { + test_alloc_free_success(provider.get(), page_plus_64, 0, PURGE_LAZY); +} + +TEST_P(FileProviderParamsDefault, purge_force) { + test_alloc_free_success(provider.get(), page_plus_64, 0, PURGE_FORCE); +} + +// negative tests using test_alloc_failure + +TEST_P(FileProviderParamsDefault, alloc_WRONG_SIZE) { + test_alloc_failure(provider.get(), -1, 0, UMF_RESULT_ERROR_INVALID_ARGUMENT, + 0); +} + +TEST_P(FileProviderParamsDefault, alloc_page64_WRONG_ALIGNMENT_3_pages) { + test_alloc_failure(provider.get(), page_plus_64, 3 * page_size, + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); +} + +TEST_P(FileProviderParamsDefault, alloc_3pages_WRONG_ALIGNMENT_3pages) { + test_alloc_failure(provider.get(), 3 * page_size, 3 * page_size, + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); +} + +TEST_P(FileProviderParamsDefault, + alloc_page64_align_page_minus_1_WRONG_ALIGNMENT_1) { + test_alloc_failure(provider.get(), page_plus_64, page_size - 1, + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); +} + +TEST_P(FileProviderParamsDefault, + alloc_page64_align_one_half_pages_WRONG_ALIGNMENT_2) { + test_alloc_failure(provider.get(), page_plus_64, + page_size + (page_size / 2), + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); +} + +// negative IPC tests + +TEST_P(FileProviderParamsDefault, get_ipc_handle_size_wrong_visibility) { + size_t size; + umf_result_t umf_result = + umfMemoryProviderGetIPCHandleSize(provider.get(), &size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(FileProviderParamsDefault, get_ipc_handle_wrong_visibility) { + char providerIpcData; + umf_result_t umf_result = umfMemoryProviderGetIPCHandle( + provider.get(), INVALID_PTR, 1, &providerIpcData); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(FileProviderParamsDefault, put_ipc_handle_wrong_visibility) { + char providerIpcData; + umf_result_t umf_result = + umfMemoryProviderPutIPCHandle(provider.get(), &providerIpcData); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(FileProviderParamsDefault, open_ipc_handle_wrong_visibility) { + char providerIpcData; + void *ptr; + umf_result_t umf_result = + umfMemoryProviderOpenIPCHandle(provider.get(), &providerIpcData, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(FileProviderParamsDefault, close_ipc_handle_wrong_visibility) { + umf_result_t umf_result = + umfMemoryProviderCloseIPCHandle(provider.get(), INVALID_PTR, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +// other positive tests + +TEST_P(FileProviderParamsDefault, get_min_page_size) { + size_t min_page_size; + umf_result_t umf_result = umfMemoryProviderGetMinPageSize( + provider.get(), nullptr, &min_page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_LE(min_page_size, page_size); +} + +TEST_P(FileProviderParamsDefault, get_recommended_page_size) { + size_t min_page_size; + umf_result_t umf_result = umfMemoryProviderGetMinPageSize( + provider.get(), nullptr, &min_page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_LE(min_page_size, page_size); + + size_t recommended_page_size; + umf_result = umfMemoryProviderGetRecommendedPageSize( + provider.get(), 0, &recommended_page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(recommended_page_size, min_page_size); +} + +TEST_P(FileProviderParamsDefault, get_name) { + const char *name = umfMemoryProviderGetName(provider.get()); + ASSERT_STREQ(name, "FILE"); +} + +TEST_P(FileProviderParamsDefault, free_size_0_ptr_not_null) { + umf_result_t umf_result = + umfMemoryProviderFree(provider.get(), INVALID_PTR, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +TEST_P(FileProviderParamsDefault, free_NULL) { + umf_result_t umf_result = umfMemoryProviderFree(provider.get(), nullptr, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +// other negative tests + +TEST_F(test, params_null_handle) { + umf_result_t umf_result = + umfFileMemoryProviderParamsCreate(nullptr, FILE_PATH); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = umfFileMemoryProviderParamsDestroy(nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfFileMemoryProviderParamsSetPath(nullptr, FILE_PATH); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = + umfFileMemoryProviderParamsSetProtection(nullptr, UMF_PROTECTION_READ); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = + umfFileMemoryProviderParamsSetVisibility(nullptr, UMF_MEM_MAP_PRIVATE); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_F(test, create_empty_path) { + const char *path = ""; + + umf_file_memory_provider_params_handle_t wrong_params = nullptr; + umf_result_t umf_result = + umfFileMemoryProviderParamsCreate(&wrong_params, path); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(wrong_params, nullptr); +} + +TEST_F(test, create_null_path) { + const char *path = nullptr; + + umf_file_memory_provider_params_handle_t wrong_params = nullptr; + umf_result_t umf_result = + umfFileMemoryProviderParamsCreate(&wrong_params, path); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(wrong_params, nullptr); +} + +TEST_F(test, set_empty_path) { + const char *empty_path = ""; + + umf_file_memory_provider_params_handle_t params = nullptr; + umf_result_t umf_result = + umfFileMemoryProviderParamsCreate(¶ms, FILE_PATH); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfFileMemoryProviderParamsSetPath(params, empty_path); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = umfFileMemoryProviderParamsDestroy(params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_F(test, set_null_path) { + const char *null_path = nullptr; + + umf_file_memory_provider_params_handle_t params = nullptr; + umf_result_t umf_result = + umfFileMemoryProviderParamsCreate(¶ms, FILE_PATH); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfFileMemoryProviderParamsSetPath(params, null_path); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = umfFileMemoryProviderParamsDestroy(params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_P(FileProviderParamsDefault, free_INVALID_POINTER_SIZE_GT_0) { + umf_result_t umf_result = + umfMemoryProviderFree(provider.get(), INVALID_PTR, page_plus_64); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +TEST_P(FileProviderParamsDefault, purge_lazy_INVALID_POINTER) { + umf_result_t umf_result = + umfMemoryProviderPurgeLazy(provider.get(), INVALID_PTR, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +TEST_P(FileProviderParamsDefault, purge_force_INVALID_POINTER) { + umf_result_t umf_result = + umfMemoryProviderPurgeForce(provider.get(), INVALID_PTR, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC); + + verify_last_native_error(provider.get(), + UMF_FILE_RESULT_ERROR_PURGE_FORCE_FAILED); +} + +// IPC tests + +INSTANTIATE_TEST_SUITE_P(fileProviderTest, FileProviderParamsShared, + ::testing::Values(providerCreateExtParams{ + umfFileMemoryProviderOps(), + file_params_shared.get()})); + +TEST_P(FileProviderParamsShared, IPC_base_success_test) { + umf_result_t umf_result; + void *ptr = nullptr; + size_t size = page_size; + void *ipc_handle = nullptr; + size_t ipc_handle_size; + void *new_ptr = nullptr; + + umf_result = + umfMemoryProviderGetIPCHandleSize(provider.get(), &ipc_handle_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ipc_handle_size, 0); + + umf_result = umfMemoryProviderAlloc(provider.get(), size, page_size, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + memset(ptr, 0xFF, size); + + umf_result = + umfMemoryProviderAlloc(provider.get(), ipc_handle_size, 0, &ipc_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ipc_handle, nullptr); + memset(ipc_handle, 0x0, ipc_handle_size); + + umf_result = + umfMemoryProviderGetIPCHandle(provider.get(), ptr, size, ipc_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = + umfMemoryProviderOpenIPCHandle(provider.get(), ipc_handle, &new_ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(new_ptr, nullptr); + + // it requires mapping with UMF_MEM_MAP_SHARED to work + int ret = memcmp(ptr, new_ptr, size); + ASSERT_EQ(ret, 0); + + umf_result = umfMemoryProviderFree(provider.get(), ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +TEST_P(FileProviderParamsShared, IPC_file_not_exist) { + umf_result_t umf_result; + void *ptr = nullptr; + size_t size = page_size; + void *ipc_handle = nullptr; + size_t ipc_handle_size; + void *new_ptr = nullptr; + + umf_result = + umfMemoryProviderGetIPCHandleSize(provider.get(), &ipc_handle_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ipc_handle_size, 0); + + umf_result = umfMemoryProviderAlloc(provider.get(), size, page_size, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + memset(ptr, 0xFF, size); + + umf_result = + umfMemoryProviderAlloc(provider.get(), ipc_handle_size, 0, &ipc_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ipc_handle, nullptr); + memset(ipc_handle, 0x0, ipc_handle_size); + + umf_result = + umfMemoryProviderGetIPCHandle(provider.get(), ptr, size, ipc_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + int ret = unlink(FILE_PATH); + ASSERT_EQ(ret, 0); + + umf_result = + umfMemoryProviderOpenIPCHandle(provider.get(), ipc_handle, &new_ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(new_ptr, nullptr); + + umf_result = umfMemoryProviderFree(provider.get(), ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} diff --git a/test/provider_file_memory_ipc.cpp b/test/provider_file_memory_ipc.cpp new file mode 100644 index 000000000..ee7ab6c8f --- /dev/null +++ b/test/provider_file_memory_ipc.cpp @@ -0,0 +1,119 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#ifdef UMF_POOL_JEMALLOC_ENABLED +#include +#endif +#ifdef UMF_POOL_SCALABLE_ENABLED +#include +#endif + +#include "ipcFixtures.hpp" + +using umf_test::test; + +#define FILE_PATH ((char *)"tmp_file") + +using file_params_unique_handle_t = + std::unique_ptr; + +file_params_unique_handle_t get_file_params_shared(char *path) { + umf_file_memory_provider_params_handle_t file_params = NULL; + umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create File Memory Provider params"); + } + + res = umfFileMemoryProviderParamsSetVisibility(file_params, + UMF_MEM_MAP_SHARED); + if (res != UMF_RESULT_SUCCESS) { + umfFileMemoryProviderParamsDestroy(file_params); + throw std::runtime_error("Failed to set visibility to shared for File " + "Memory Provider params"); + } + + return file_params_unique_handle_t(file_params, + &umfFileMemoryProviderParamsDestroy); +} + +file_params_unique_handle_t file_params_shared = + get_file_params_shared(FILE_PATH); + +file_params_unique_handle_t get_file_params_fsdax(char *path) { + umf_file_memory_provider_params_handle_t file_params = NULL; + umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + if (res != UMF_RESULT_SUCCESS) { + //test will be skipped. + return file_params_unique_handle_t(nullptr, + &umfFileMemoryProviderParamsDestroy); + } + + res = umfFileMemoryProviderParamsSetVisibility(file_params, + UMF_MEM_MAP_SHARED); + if (res != UMF_RESULT_SUCCESS) { + umfFileMemoryProviderParamsDestroy(file_params); + throw std::runtime_error("Failed to set visibility to shared for File " + "Memory Provider params"); + } + + return file_params_unique_handle_t(file_params, + &umfFileMemoryProviderParamsDestroy); +} + +file_params_unique_handle_t file_params_fsdax = + get_file_params_fsdax(getenv("UMF_TESTS_FSDAX_PATH")); + +HostMemoryAccessor hostAccessor; + +static std::vector ipcManyPoolsTestParamsList = { +// TODO: enable it when sizes of allocations in ipcFixtures.hpp are fixed +// {umfProxyPoolOps(), nullptr, umfFileMemoryProviderOps(), +// file_params_shared.get(), &hostAccessor, true}, +#ifdef UMF_POOL_JEMALLOC_ENABLED + {umfJemallocPoolOps(), nullptr, umfFileMemoryProviderOps(), + file_params_shared.get(), &hostAccessor, false}, +#endif +#ifdef UMF_POOL_SCALABLE_ENABLED + {umfScalablePoolOps(), nullptr, umfFileMemoryProviderOps(), + file_params_shared.get(), &hostAccessor, false}, +#endif +}; + +static std::vector getIpcFsDaxTestParamsList(void) { + std::vector ipcFsDaxTestParamsList = {}; + + char *path = getenv("UMF_TESTS_FSDAX_PATH"); + if (path == nullptr || path[0] == 0) { + // skipping the test, UMF_TESTS_FSDAX_PATH is not set + return ipcFsDaxTestParamsList; + } + + ipcFsDaxTestParamsList = { +// TODO: enable it when sizes of allocations in ipcFixtures.hpp are fixed +// {umfProxyPoolOps(), nullptr, umfFileMemoryProviderOps(), +// file_params_fsdax.get(), &hostAccessor, true}, +#ifdef UMF_POOL_JEMALLOC_ENABLED + {umfJemallocPoolOps(), nullptr, umfFileMemoryProviderOps(), + file_params_fsdax.get(), &hostAccessor, false}, +#endif +#ifdef UMF_POOL_SCALABLE_ENABLED + {umfScalablePoolOps(), nullptr, umfFileMemoryProviderOps(), + file_params_fsdax.get(), &hostAccessor, false}, +#endif + }; + + return ipcFsDaxTestParamsList; +} + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); + +INSTANTIATE_TEST_SUITE_P(FileProviderDifferentPoolsTest, umfIpcTest, + ::testing::ValuesIn(ipcManyPoolsTestParamsList)); + +INSTANTIATE_TEST_SUITE_P(FileProviderDifferentPoolsFSDAXTest, umfIpcTest, + ::testing::ValuesIn(getIpcFsDaxTestParamsList())); diff --git a/test/provider_file_memory_not_impl.cpp b/test/provider_file_memory_not_impl.cpp new file mode 100644 index 000000000..c82b8163c --- /dev/null +++ b/test/provider_file_memory_not_impl.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "base.hpp" + +#include + +using umf_test::test; + +TEST_F(test, file_provider_not_implemented) { + umf_file_memory_provider_params_handle_t params = nullptr; + umf_result_t umf_result = + umfFileMemoryProviderParamsCreate(¶ms, "path"); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + EXPECT_EQ(params, nullptr); + + umf_result = umfFileMemoryProviderParamsDestroy(nullptr); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_result = umfFileMemoryProviderParamsSetPath(nullptr, "path"); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_result = umfFileMemoryProviderParamsSetProtection(nullptr, 0); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_result = + umfFileMemoryProviderParamsSetVisibility(nullptr, UMF_MEM_MAP_PRIVATE); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_memory_provider_ops_t *ops = umfFileMemoryProviderOps(); + EXPECT_EQ(ops, nullptr); +} \ No newline at end of file diff --git a/test/provider_os_memory.cpp b/test/provider_os_memory.cpp index 5b9de43fa..57bce46d2 100644 --- a/test/provider_os_memory.cpp +++ b/test/provider_os_memory.cpp @@ -5,17 +5,22 @@ #include "base.hpp" #include "cpp_helpers.hpp" +#include "ipcFixtures.hpp" +#include "test_helpers.h" #include #include +#if (defined UMF_POOL_DISJOINT_ENABLED) +#include +#endif +#ifdef UMF_POOL_JEMALLOC_ENABLED +#include +#endif using umf_test::test; #define INVALID_PTR ((void *)0x01) -#define ASSERT_IS_ALIGNED(ptr, alignment) \ - ASSERT_EQ(((uintptr_t)ptr % alignment), 0) - typedef enum purge_t { PURGE_NONE = 0, PURGE_LAZY = 1, @@ -43,17 +48,18 @@ static int compare_native_error_str(const char *message, int error) { using providerCreateExtParams = std::tuple; -umf::provider_unique_handle_t -providerCreateExt(providerCreateExtParams params) { +static void providerCreateExt(providerCreateExtParams params, + umf::provider_unique_handle_t *handle) { umf_memory_provider_handle_t hProvider = nullptr; auto [provider_ops, provider_params] = params; auto ret = umfMemoryProviderCreate(provider_ops, provider_params, &hProvider); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - EXPECT_NE(hProvider, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider, nullptr); - return umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); + *handle = + umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); } struct umfProviderTest @@ -61,10 +67,10 @@ struct umfProviderTest ::testing::WithParamInterface { void SetUp() override { test::SetUp(); - provider = providerCreateExt(this->GetParam()); + providerCreateExt(this->GetParam(), &provider); umf_result_t umf_result = umfMemoryProviderGetMinPageSize(provider.get(), NULL, &page_size); - EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); page_plus_64 = page_size + 64; } @@ -132,16 +138,23 @@ static umf_result_t create_os_provider_with_mode(umf_numa_mode_t mode, unsigned node_list_size) { umf_result_t umf_result; umf_memory_provider_handle_t os_memory_provider = nullptr; - umf_os_memory_provider_params_t os_memory_provider_params = - umfOsMemoryProviderParamsDefault(); + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; - os_memory_provider_params.numa_mode = mode; - os_memory_provider_params.numa_list = node_list; - os_memory_provider_params.numa_list_len = node_list_size; + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), - &os_memory_provider_params, - &os_memory_provider); + umf_result = + umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, mode); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaList( + os_memory_provider_params, node_list, node_list_size); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = + umfMemoryProviderCreate(umfOsMemoryProviderOps(), + os_memory_provider_params, &os_memory_provider); + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); if (umf_result == UMF_RESULT_SUCCESS) { EXPECT_NE(os_memory_provider, nullptr); umfMemoryProviderDestroy(os_memory_provider); @@ -187,29 +200,53 @@ TEST_F(test, create_ZERO_WEIGHT_PARTITION) { umf_numa_split_partition_t p = {0, 0}; umf_result_t umf_result; umf_memory_provider_handle_t os_memory_provider = nullptr; - umf_os_memory_provider_params_t os_memory_provider_params = - umfOsMemoryProviderParamsDefault(); + umf_os_memory_provider_params_handle_t os_memory_provider_params = NULL; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_SPLIT; - os_memory_provider_params.numa_list = &valid_list; - os_memory_provider_params.numa_list_len = valid_list_len; - os_memory_provider_params.partitions = &p; - os_memory_provider_params.partitions_len = 1; + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_SPLIT); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaList( + os_memory_provider_params, &valid_list, valid_list_len); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetPartitions( + os_memory_provider_params, &p, 1); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), &os_memory_provider_params, &os_memory_provider); + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + EXPECT_EQ(os_memory_provider, nullptr); ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); } // positive tests using test_alloc_free_success -auto defaultParams = umfOsMemoryProviderParamsDefault(); +using os_params_unique_handle_t = + std::unique_ptr; + +os_params_unique_handle_t createOsMemoryProviderParams() { + umf_os_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to create os memory provider params"); + } + + return os_params_unique_handle_t(params, &umfOsMemoryProviderParamsDestroy); +} +auto defaultParams = createOsMemoryProviderParams(); + INSTANTIATE_TEST_SUITE_P(osProviderTest, umfProviderTest, ::testing::Values(providerCreateExtParams{ - umfOsMemoryProviderOps(), &defaultParams})); + umfOsMemoryProviderOps(), defaultParams.get()})); TEST_P(umfProviderTest, create_destroy) {} @@ -259,6 +296,12 @@ TEST_P(umfProviderTest, alloc_WRONG_SIZE) { UMF_OS_RESULT_ERROR_ALLOC_FAILED); } +TEST_P(umfProviderTest, alloc_MAX_SIZE) { + test_alloc_failure(provider.get(), SIZE_MAX, 0, + UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC, + UMF_OS_RESULT_ERROR_ALLOC_FAILED); +} + // other positive tests TEST_P(umfProviderTest, get_min_page_size) { @@ -326,3 +369,109 @@ TEST_P(umfProviderTest, purge_force_INVALID_POINTER) { verify_last_native_error(provider.get(), UMF_OS_RESULT_ERROR_PURGE_FORCE_FAILED); } + +TEST_P(umfProviderTest, get_ipc_handle_size_wrong_visibility) { + size_t size; + umf_result_t umf_result = + umfMemoryProviderGetIPCHandleSize(provider.get(), &size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(umfProviderTest, get_ipc_handle_wrong_visibility) { + char providerIpcData; + umf_result_t umf_result = umfMemoryProviderGetIPCHandle( + provider.get(), INVALID_PTR, 1, &providerIpcData); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(umfProviderTest, put_ipc_handle_wrong_visibility) { + char providerIpcData; + umf_result_t umf_result = + umfMemoryProviderPutIPCHandle(provider.get(), &providerIpcData); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(umfProviderTest, open_ipc_handle_wrong_visibility) { + char providerIpcData; + void *ptr; + umf_result_t umf_result = + umfMemoryProviderOpenIPCHandle(provider.get(), &providerIpcData, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(umfProviderTest, close_ipc_handle_wrong_visibility) { + umf_result_t umf_result = + umfMemoryProviderCloseIPCHandle(provider.get(), INVALID_PTR, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); + +using os_params_unique_handle_t = + std::unique_ptr; + +os_params_unique_handle_t osMemoryProviderParamsShared() { + umf_os_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to create os memory provider params"); + } + res = umfOsMemoryProviderParamsSetVisibility(params, UMF_MEM_MAP_SHARED); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to set protection"); + } + + return os_params_unique_handle_t(params, &umfOsMemoryProviderParamsDestroy); +} +auto os_params = osMemoryProviderParamsShared(); + +HostMemoryAccessor hostAccessor; + +#if (defined UMF_POOL_DISJOINT_ENABLED) +using disjoint_params_unique_handle_t = + std::unique_ptr; + +disjoint_params_unique_handle_t disjointPoolParams() { + umf_disjoint_pool_params_handle_t params = nullptr; + umf_result_t res = umfDisjointPoolParamsCreate(¶ms); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to create pool params"); + } + res = umfDisjointPoolParamsSetSlabMinSize(params, 4096); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to set slab min size"); + } + res = umfDisjointPoolParamsSetMaxPoolableSize(params, 4096); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to set max poolable size"); + } + res = umfDisjointPoolParamsSetCapacity(params, 4); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to set capacity"); + } + res = umfDisjointPoolParamsSetMinBucketSize(params, 64); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to set min bucket size"); + } + + return disjoint_params_unique_handle_t(params, + &umfDisjointPoolParamsDestroy); +} +disjoint_params_unique_handle_t disjointParams = disjointPoolParams(); +#endif + +static std::vector ipcTestParamsList = { +#if (defined UMF_POOL_DISJOINT_ENABLED) + {umfDisjointPoolOps(), disjointParams.get(), umfOsMemoryProviderOps(), + os_params.get(), &hostAccessor, false}, +#endif +#ifdef UMF_POOL_JEMALLOC_ENABLED + {umfJemallocPoolOps(), nullptr, umfOsMemoryProviderOps(), os_params.get(), + &hostAccessor, false}, +#endif +}; + +INSTANTIATE_TEST_SUITE_P(osProviderTest, umfIpcTest, + ::testing::ValuesIn(ipcTestParamsList)); diff --git a/test/provider_os_memory_config.cpp b/test/provider_os_memory_config.cpp index 78008f898..ed3456618 100644 --- a/test/provider_os_memory_config.cpp +++ b/test/provider_os_memory_config.cpp @@ -22,13 +22,16 @@ struct providerConfigTest : testing::Test { const size_t size = 128; void *ptr = nullptr; std::string dest = "destination"; - umf_os_memory_provider_params_t params = umfOsMemoryProviderParamsDefault(); + umf_os_memory_provider_params_handle_t params = nullptr; void SetUp() override { int ret = numa_available(); if (ret) { GTEST_SKIP() << "Test skipped, NUMA not available"; } + + auto res = umfOsMemoryProviderParamsCreate(¶ms); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); } void TearDown() override { @@ -38,9 +41,11 @@ struct providerConfigTest : testing::Test { if (provider) { umfMemoryProviderDestroy(provider); } + + umfOsMemoryProviderParamsDestroy(params); } - void create_provider(umf_os_memory_provider_params_t *params) { + void create_provider(umf_os_memory_provider_params_handle_t params) { auto res = umfMemoryProviderCreate(umfOsMemoryProviderOps(), params, &provider); ASSERT_EQ(res, UMF_RESULT_SUCCESS); @@ -68,9 +73,9 @@ struct providerConfigTest : testing::Test { TEST_F(providerConfigTest, protection_flag_none) { // pages may not be accessed - PROT_NONE - params.protection = UMF_PROTECTION_NONE; + umfOsMemoryProviderParamsSetProtection(params, UMF_PROTECTION_NONE); - create_provider(¶ms); + create_provider(params); allocate_memory(); // read failure @@ -82,9 +87,9 @@ TEST_F(providerConfigTest, protection_flag_none) { TEST_F(providerConfigTest, protection_flag_read) { // pages may be read - PROT_READ - params.protection = UMF_PROTECTION_READ; + umfOsMemoryProviderParamsSetProtection(params, UMF_PROTECTION_READ); - create_provider(¶ms); + create_provider(params); allocate_memory(); // read success @@ -96,9 +101,9 @@ TEST_F(providerConfigTest, protection_flag_read) { TEST_F(providerConfigTest, protection_flag_write) { // pages may be written to - PROT_WRITE - params.protection = UMF_PROTECTION_WRITE; + umfOsMemoryProviderParamsSetProtection(params, UMF_PROTECTION_WRITE); - create_provider(¶ms); + create_provider(params); allocate_memory(); // write success @@ -107,9 +112,10 @@ TEST_F(providerConfigTest, protection_flag_write) { TEST_F(providerConfigTest, protection_flag_read_write) { // pages may be read and written to - PROT_READ | PROT_WRITE - params.protection = UMF_PROTECTION_READ | UMF_PROTECTION_WRITE; + umfOsMemoryProviderParamsSetProtection(params, UMF_PROTECTION_READ | + UMF_PROTECTION_WRITE); - create_provider(¶ms); + create_provider(params); allocate_memory(); // read success @@ -119,21 +125,115 @@ TEST_F(providerConfigTest, protection_flag_read_write) { write_memory("write string"); } +TEST_F(providerConfigTest, set_params_null_params_handle) { + umf_result_t res = umfOsMemoryProviderParamsCreate(nullptr); + ASSERT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfOsMemoryProviderParamsDestroy(nullptr); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfOsMemoryProviderParamsSetProtection(nullptr, UMF_PROTECTION_READ); + ASSERT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfOsMemoryProviderParamsSetVisibility(nullptr, UMF_MEM_MAP_PRIVATE); + ASSERT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfOsMemoryProviderParamsSetShmName(nullptr, "shm_name"); + ASSERT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfOsMemoryProviderParamsSetNumaList(nullptr, nullptr, 0); + ASSERT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfOsMemoryProviderParamsSetNumaMode(nullptr, UMF_NUMA_MODE_DEFAULT); + ASSERT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfOsMemoryProviderParamsSetPartSize(nullptr, 0); + ASSERT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfOsMemoryProviderParamsSetPartitions(nullptr, nullptr, 0); + ASSERT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_F(providerConfigTest, set_params_shm_name) { + umf_result_t res = umfOsMemoryProviderParamsSetShmName(params, nullptr); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfOsMemoryProviderParamsSetShmName(params, "shm_name"); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfOsMemoryProviderParamsSetShmName(params, ""); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfOsMemoryProviderParamsSetShmName(params, nullptr); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); +} + +TEST_F(providerConfigTest, set_params_numa_list) { + unsigned numa_list[1] = {0}; + + umf_result_t res = umfOsMemoryProviderParamsSetNumaList(params, nullptr, 0); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfOsMemoryProviderParamsSetNumaList(params, numa_list, 1); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfOsMemoryProviderParamsSetNumaList(params, nullptr, 1); + ASSERT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfOsMemoryProviderParamsSetNumaList(params, numa_list, 0); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + // repeat the valid set to check memory leaks under Valgrind + res = umfOsMemoryProviderParamsSetNumaList(params, numa_list, 1); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); +} + +TEST_F(providerConfigTest, set_params_partitions) { + umf_numa_split_partition_t partitions[1] = {{0, 1}}; + + umf_result_t res = + umfOsMemoryProviderParamsSetPartitions(params, nullptr, 0); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfOsMemoryProviderParamsSetPartitions(params, partitions, 1); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfOsMemoryProviderParamsSetPartitions(params, nullptr, 1); + ASSERT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfOsMemoryProviderParamsSetPartitions(params, partitions, 0); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + // repeat the valid set to check memory leaks under Valgrind + res = umfOsMemoryProviderParamsSetPartitions(params, partitions, 1); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); +} + struct providerConfigTestNumaMode : providerConfigTest, testing::WithParamInterface { struct bitmask *allowed_nodes = nullptr; - umf_os_memory_provider_params_t params = umfOsMemoryProviderParamsDefault(); + umf_numa_mode_t expected_numa_mode; void SetUp() override { providerConfigTest::SetUp(); - params.numa_mode = GetParam(); + + if (::providerConfigTest::IsSkipped()) { + GTEST_SKIP(); + } + + expected_numa_mode = GetParam(); + + auto res = + umfOsMemoryProviderParamsSetNumaMode(params, expected_numa_mode); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); } void TearDown() override { if (allowed_nodes) { numa_bitmask_free(allowed_nodes); } + providerConfigTest::TearDown(); } }; @@ -152,24 +252,27 @@ INSTANTIATE_TEST_SUITE_P(numa_modes, providerConfigTestNumaMode, #endif TEST_P(providerConfigTestNumaMode, numa_modes) { - if (params.numa_mode != UMF_NUMA_MODE_DEFAULT && - params.numa_mode != UMF_NUMA_MODE_LOCAL) { + unsigned numa_list_len = 0; + unsigned *numa_list = nullptr; + if (expected_numa_mode != UMF_NUMA_MODE_DEFAULT && + expected_numa_mode != UMF_NUMA_MODE_LOCAL) { allowed_nodes = numa_get_mems_allowed(); // convert bitmask to array of nodes - params.numa_list_len = numa_bitmask_weight(allowed_nodes); - params.numa_list = (unsigned *)malloc(params.numa_list_len * - sizeof(*params.numa_list)); - ASSERT_NE(params.numa_list, nullptr); + numa_list_len = numa_bitmask_weight(allowed_nodes); + numa_list = (unsigned *)malloc(numa_list_len * sizeof(*numa_list)); + ASSERT_NE(numa_list, nullptr); unsigned count = 0; - for (unsigned i = 0; i < params.numa_list_len; i++) { + for (unsigned i = 0; i < numa_list_len; i++) { if (numa_bitmask_isbitset(allowed_nodes, i)) { - params.numa_list[count++] = i; + numa_list[count++] = i; } } - ASSERT_EQ(count, params.numa_list_len); + ASSERT_EQ(count, numa_list_len); + + umfOsMemoryProviderParamsSetNumaList(params, numa_list, numa_list_len); } - create_provider(¶ms); + create_provider(params); allocate_memory(); write_memory("write string"); @@ -177,25 +280,25 @@ TEST_P(providerConfigTestNumaMode, numa_modes) { long ret = get_mempolicy(&actual_mode, nullptr, 0, ptr, MPOL_F_ADDR); ASSERT_EQ(ret, 0); - if (params.numa_mode == UMF_NUMA_MODE_DEFAULT) { + if (expected_numa_mode == UMF_NUMA_MODE_DEFAULT) { ASSERT_EQ(actual_mode, MPOL_DEFAULT); - } else if (params.numa_mode == UMF_NUMA_MODE_BIND) { + } else if (expected_numa_mode == UMF_NUMA_MODE_BIND) { ASSERT_EQ(actual_mode, MPOL_BIND); - } else if (params.numa_mode == UMF_NUMA_MODE_INTERLEAVE) { + } else if (expected_numa_mode == UMF_NUMA_MODE_INTERLEAVE) { ASSERT_EQ(actual_mode, MPOL_INTERLEAVE); - } else if (params.numa_mode == UMF_NUMA_MODE_PREFERRED) { + } else if (expected_numa_mode == UMF_NUMA_MODE_PREFERRED) { // MPOL_PREFERRED_MANY is equivalent to MPOL_PREFERRED if a single node is set if (actual_mode != MPOL_PREFERRED_MANY) { ASSERT_EQ(actual_mode, MPOL_PREFERRED); } - } else if (params.numa_mode == UMF_NUMA_MODE_LOCAL) { + } else if (expected_numa_mode == UMF_NUMA_MODE_LOCAL) { // MPOL_PREFERRED_* is equivalent to MPOL_LOCAL if no node is set if (actual_mode == MPOL_PREFERRED || actual_mode == MPOL_PREFERRED_MANY) { - ASSERT_EQ(params.numa_list_len, 0); + ASSERT_EQ(numa_list_len, 0); } else { ASSERT_EQ(actual_mode, MPOL_LOCAL); } } - free(params.numa_list); + free(numa_list); } diff --git a/test/provider_os_memory_multiple_numa_nodes.cpp b/test/provider_os_memory_multiple_numa_nodes.cpp index 0f7f0fb2e..e493a427c 100644 --- a/test/provider_os_memory_multiple_numa_nodes.cpp +++ b/test/provider_os_memory_multiple_numa_nodes.cpp @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "base.hpp" -#include "numa_helpers.h" +#include "numa_helpers.hpp" #include "test_helpers.h" #include @@ -14,12 +14,10 @@ #include -static umf_os_memory_provider_params_t UMF_OS_MEMORY_PROVIDER_PARAMS_TEST = - umfOsMemoryProviderParamsDefault(); - std::vector get_available_numa_nodes() { - UT_ASSERTne(numa_available(), -1); - UT_ASSERTne(numa_all_nodes_ptr, nullptr); + if (numa_available() == -1 || numa_all_nodes_ptr == nullptr) { + return std::vector(); + } std::vector available_numa_nodes; // Get all available NUMA nodes numbers. @@ -41,7 +39,14 @@ std::vector get_available_cpus() { CPU_ZERO(mask); int ret = sched_getaffinity(0, sizeof(cpu_set_t), mask); - UT_ASSERTeq(ret, 0); + + if (ret != 0) { + available_cpus.emplace_back(-1); + CPU_FREE(mask); + + return available_cpus; + } + // Get all available cpus. printf("All CPUs: "); for (size_t i = 0; i < CPU_SETSIZE; ++i) { @@ -57,9 +62,6 @@ std::vector get_available_cpus() { } void set_all_available_nodemask_bits(bitmask *nodemask) { - UT_ASSERTne(numa_available(), -1); - UT_ASSERTne(numa_all_nodes_ptr, nullptr); - numa_bitmask_clearall(nodemask); // Set all available NUMA nodes numbers. @@ -80,23 +82,26 @@ struct testNuma : testing::Test { ASSERT_NE(nodemask, nullptr); } - void - initOsProvider(umf_os_memory_provider_params_t os_memory_provider_params) { + void initOsProvider( + umf_os_memory_provider_params_handle_t os_memory_provider_params) { umf_result_t umf_result; umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), - &os_memory_provider_params, + os_memory_provider_params, &os_memory_provider); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(os_memory_provider, nullptr); } - struct bitmask *retrieve_nodemask(void *addr) { - struct bitmask *retrieved_nodemask = numa_allocate_nodemask(); - UT_ASSERTne(nodemask, nullptr); - int ret = get_mempolicy(nullptr, retrieved_nodemask->maskp, + void retrieve_nodemask(void *addr, bitmask **retrieved_nodemask) { + *retrieved_nodemask = numa_allocate_nodemask(); + + ASSERT_NE(nodemask, nullptr); + ASSERT_NE(*retrieved_nodemask, nullptr); + + int ret = get_mempolicy(nullptr, (*retrieved_nodemask)->maskp, nodemask->size, addr, MPOL_F_ADDR); - UT_ASSERTeq(ret, 0); - return retrieved_nodemask; + + ASSERT_EQ(ret, 0); } void TearDown() override { @@ -122,29 +127,52 @@ struct testNuma : testing::Test { }; struct testNumaOnEachNode : testNuma, testing::WithParamInterface {}; -struct testNumaOnEachCpu : testNuma, testing::WithParamInterface {}; + +/* + - In case of the lack of support for NUMA on the system + get_available_numa_nodes() returns an empty vector + - Then in INSTANTIATE_TEST_SUITE_P an empty container is passed as the 3rd arg + (param_generator) + - Therefore INSTANTIATE_TEST_SUITE_P expands to nothing, which causes the test + to fail in the test suite GoogleTestVerification +- GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(testNumaOnEachNode) allows the +test suite testNumaOnEachNode to be uninstantiated, suppressing +the test failure +- Additionally, the fixture testNumaOnEachNode uses SetUp from testNuma before +running every test, thus the test is eventually skipped when the lack of NUMA +support is determined by numa_available() +- (Therefore probably a vector with dummy values could be returned instead of +using the macro) +*/ +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(testNumaOnEachNode); INSTANTIATE_TEST_SUITE_P(testNumaNodesAllocations, testNumaOnEachNode, ::testing::ValuesIn(get_available_numa_nodes())); -INSTANTIATE_TEST_SUITE_P(testNumaNodesAllocationsAllCpus, testNumaOnEachCpu, - ::testing::ValuesIn(get_available_cpus())); - // Test for allocations on numa nodes. It will be executed on each of // the available numa nodes. TEST_P(testNumaOnEachNode, checkNumaNodesAllocations) { unsigned numa_node_number = GetParam(); ASSERT_GE(numa_node_number, 0); + umf_result_t umf_result; + + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaList(os_memory_provider_params, + &numa_node_number, 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_BIND); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - os_memory_provider_params.numa_list = &numa_node_number; - os_memory_provider_params.numa_list_len = 1; - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_BIND; initOsProvider(os_memory_provider_params); - umf_result_t umf_result; + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + umf_result = umfMemoryProviderAlloc(os_memory_provider, alloc_size, 0, &ptr); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); @@ -152,24 +180,32 @@ TEST_P(testNumaOnEachNode, checkNumaNodesAllocations) { // 'ptr' must point to an initialized value before retrieving its numa node memset(ptr, 0xFF, alloc_size); - int retrieved_numa_node_number = getNumaNodeByPtr(ptr); - EXPECT_EQ(retrieved_numa_node_number, numa_node_number); + EXPECT_NODE_EQ(ptr, numa_node_number); } // Test for allocations on numa nodes with mode preferred. It will be executed // on each of the available numa nodes. TEST_P(testNumaOnEachNode, checkModePreferred) { unsigned numa_node_number = GetParam(); - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; + umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - os_memory_provider_params.numa_list = &numa_node_number; + umf_result = umfOsMemoryProviderParamsSetNumaList(os_memory_provider_params, + &numa_node_number, 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); numa_bitmask_setbit(nodemask, numa_node_number); - os_memory_provider_params.numa_list_len = 1; - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_PREFERRED; + + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_PREFERRED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + initOsProvider(os_memory_provider_params); - umf_result_t umf_result; + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + umf_result = umfMemoryProviderAlloc(os_memory_provider, alloc_size, 0, &ptr); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); @@ -177,8 +213,7 @@ TEST_P(testNumaOnEachNode, checkModePreferred) { // 'ptr' must point to an initialized value before retrieving its numa node memset(ptr, 0xFF, alloc_size); - int retrieved_numa_node_number = getNumaNodeByPtr(ptr); - EXPECT_EQ(retrieved_numa_node_number, numa_node_number); + EXPECT_NODE_EQ(ptr, numa_node_number); } // Test for allocation on numa node with default mode enabled. @@ -187,14 +222,19 @@ TEST_P(testNumaOnEachNode, checkModePreferred) { TEST_P(testNumaOnEachNode, checkModeDefaultSetMempolicy) { unsigned numa_node_number = GetParam(); numa_bitmask_setbit(nodemask, numa_node_number); - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; + umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + initOsProvider(os_memory_provider_params); + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + long ret = set_mempolicy(MPOL_BIND, nodemask->maskp, nodemask->size); ASSERT_EQ(ret, 0); - umf_result_t umf_result; umf_result = umfMemoryProviderAlloc(os_memory_provider, alloc_size, 0, &ptr); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); @@ -202,8 +242,7 @@ TEST_P(testNumaOnEachNode, checkModeDefaultSetMempolicy) { // 'ptr' must point to an initialized value before retrieving its numa node memset(ptr, 0xFF, alloc_size); - int retrieved_numa_node_number = getNumaNodeByPtr(ptr); - EXPECT_EQ(retrieved_numa_node_number, numa_node_number); + EXPECT_NODE_EQ(ptr, numa_node_number); } // Test for allocations on a single numa node with interleave mode enabled. @@ -213,15 +252,24 @@ TEST_P(testNumaOnEachNode, checkModeInterleaveSingleNode) { constexpr int pages_num = 1024; size_t page_size = sysconf(_SC_PAGE_SIZE); - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; + umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaList(os_memory_provider_params, + &numa_node_number, 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_INTERLEAVE); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - os_memory_provider_params.numa_list = &numa_node_number; - os_memory_provider_params.numa_list_len = 1; - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_INTERLEAVE; initOsProvider(os_memory_provider_params); - umf_result_t umf_result; + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + umf_result = umfMemoryProviderAlloc(os_memory_provider, pages_num * page_size, 0, &ptr); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); @@ -229,10 +277,24 @@ TEST_P(testNumaOnEachNode, checkModeInterleaveSingleNode) { // 'ptr' must point to an initialized value before retrieving its numa node memset(ptr, 0xFF, pages_num * page_size); - int retrieved_numa_node_number = getNumaNodeByPtr(ptr); - EXPECT_EQ(retrieved_numa_node_number, numa_node_number); + EXPECT_NODE_EQ(ptr, numa_node_number); } +struct testNumaOnEachCpu : testNuma, testing::WithParamInterface { + void SetUp() override { + ::testNuma::SetUp(); + + int cpuNumber = this->GetParam(); + + if (cpuNumber < 0) { + GTEST_FAIL() << "get_available_cpus() error"; + } + } +}; + +INSTANTIATE_TEST_SUITE_P(testNumaNodesAllocationsAllCpus, testNumaOnEachCpu, + ::testing::ValuesIn(get_available_cpus())); + // Test for allocation on numa node with mode preferred and an empty nodeset. // For the empty nodeset the memory is allocated on the node of the CPU that // triggered the allocation. It will be executed on each available CPU. @@ -247,14 +309,22 @@ TEST_P(testNumaOnEachCpu, checkModePreferredEmptyNodeset) { int ret = sched_setaffinity(0, sizeof(cpu_set_t), mask); CPU_FREE(mask); - UT_ASSERTeq(ret, 0); + ASSERT_EQ(ret, 0); + + umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_PREFERRED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_PREFERRED; initOsProvider(os_memory_provider_params); - umf_result_t umf_result; + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + umf_result = umfMemoryProviderAlloc(os_memory_provider, alloc_size, 0, &ptr); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); @@ -262,15 +332,14 @@ TEST_P(testNumaOnEachCpu, checkModePreferredEmptyNodeset) { // Verify we're on the expected CPU int cpu_check = sched_getcpu(); - UT_ASSERTeq(cpu, cpu_check); + ASSERT_EQ(cpu, cpu_check); int numa_node_number = numa_node_of_cpu(cpu); printf("Got CPU: %d, got numa node: %d\n", cpu, numa_node_number); // 'ptr' must point to an initialized value before retrieving its numa node memset(ptr, 0xFF, alloc_size); - int retrieved_numa_node_number = getNumaNodeByPtr(ptr); - EXPECT_EQ(retrieved_numa_node_number, numa_node_number); + EXPECT_NODE_EQ(ptr, numa_node_number); } // Test for allocation on numa node with local mode enabled. The memory is @@ -285,14 +354,22 @@ TEST_P(testNumaOnEachCpu, checkModeLocal) { int ret = sched_setaffinity(0, sizeof(cpu_set_t), mask); CPU_FREE(mask); - UT_ASSERTeq(ret, 0); + ASSERT_EQ(ret, 0); + + umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_LOCAL); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_LOCAL; initOsProvider(os_memory_provider_params); - umf_result_t umf_result; + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + umf_result = umfMemoryProviderAlloc(os_memory_provider, alloc_size, 0, &ptr); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); @@ -300,15 +377,14 @@ TEST_P(testNumaOnEachCpu, checkModeLocal) { // Verify we're on the expected CPU int cpu_check = sched_getcpu(); - UT_ASSERTeq(cpu, cpu_check); + ASSERT_EQ(cpu, cpu_check); int numa_node_number = numa_node_of_cpu(cpu); printf("Got CPU: %d, got numa node: %d\n", cpu, numa_node_number); // 'ptr' must point to an initialized value before retrieving its numa node memset(ptr, 0xFF, alloc_size); - int retrieved_numa_node_number = getNumaNodeByPtr(ptr); - EXPECT_EQ(retrieved_numa_node_number, numa_node_number); + EXPECT_NODE_EQ(ptr, numa_node_number); } // Test for allocation on numa node with default mode enabled. @@ -316,11 +392,16 @@ TEST_P(testNumaOnEachCpu, checkModeLocal) { // default policy - it allocates pages on the node of the CPU that triggered // the allocation. TEST_F(testNuma, checkModeDefault) { - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; + umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + initOsProvider(os_memory_provider_params); - umf_result_t umf_result; + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + umf_result = umfMemoryProviderAlloc(os_memory_provider, alloc_size, 0, &ptr); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); @@ -332,8 +413,7 @@ TEST_F(testNuma, checkModeDefault) { // 'ptr' must point to an initialized value before retrieving its numa node memset(ptr, 0xFF, alloc_size); - int retrieved_numa_node_number = getNumaNodeByPtr(ptr); - EXPECT_EQ(retrieved_numa_node_number, numa_node_number); + EXPECT_NODE_EQ(ptr, numa_node_number); } // Test for allocations on numa nodes with interleave mode enabled. @@ -341,18 +421,27 @@ TEST_F(testNuma, checkModeDefault) { TEST_F(testNuma, checkModeInterleave) { constexpr int pages_num = 1024; size_t page_size = sysconf(_SC_PAGE_SIZE); - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; + umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); std::vector numa_nodes = get_available_numa_nodes(); set_all_available_nodemask_bits(nodemask); - os_memory_provider_params.numa_list = numa_nodes.data(); - os_memory_provider_params.numa_list_len = numa_nodes.size(); - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_INTERLEAVE; + umf_result = umfOsMemoryProviderParamsSetNumaList( + os_memory_provider_params, numa_nodes.data(), numa_nodes.size()); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_INTERLEAVE); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + initOsProvider(os_memory_provider_params); - umf_result_t umf_result; + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + umf_result = umfMemoryProviderAlloc(os_memory_provider, pages_num * page_size, 0, &ptr); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); @@ -363,14 +452,31 @@ TEST_F(testNuma, checkModeInterleave) { // Test where each page will be allocated. // Get the first numa node for ptr; Each next page is expected to be on next nodes. - size_t index = getNumaNodeByPtr((char *)ptr); + int node = -1; + ASSERT_NO_FATAL_FAILURE(getNumaNodeByPtr(ptr, &node)); + ASSERT_GE(node, 0); + int index = -1; + for (size_t i = 0; i < numa_nodes.size(); i++) { + if (numa_nodes[i] == (unsigned)node) { + index = i; + break; + } + } + ASSERT_GE(index, 0); + ASSERT_LT(index, numa_nodes.size()); + for (size_t i = 1; i < (size_t)pages_num; i++) { index = (index + 1) % numa_nodes.size(); - ASSERT_EQ(numa_nodes[index], - getNumaNodeByPtr((char *)ptr + page_size * i)); + EXPECT_NODE_EQ((char *)ptr + page_size * i, numa_nodes[index]); + } + + bitmask *retrieved_nodemask = nullptr; + retrieve_nodemask(ptr, &retrieved_nodemask); + + if (IS_SKIPPED_OR_FAILED()) { + return; } - bitmask *retrieved_nodemask = retrieve_nodemask(ptr); int ret = numa_bitmask_equal(retrieved_nodemask, nodemask); numa_bitmask_free(retrieved_nodemask); @@ -385,20 +491,32 @@ TEST_F(testNuma, checkModeInterleaveCustomPartSize) { ASSERT_GT(_page_size, 0); size_t page_size = _page_size; size_t part_size = page_size * 100; - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; + umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); std::vector numa_nodes = get_available_numa_nodes(); - os_memory_provider_params.numa_list = numa_nodes.data(); - os_memory_provider_params.numa_list_len = numa_nodes.size(); - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_INTERLEAVE; + umf_result = umfOsMemoryProviderParamsSetNumaList( + os_memory_provider_params, numa_nodes.data(), numa_nodes.size()); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_INTERLEAVE); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + // part size do not need to be multiple of page size - os_memory_provider_params.part_size = part_size - 1; + umf_result = umfOsMemoryProviderParamsSetPartSize(os_memory_provider_params, + part_size - 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + initOsProvider(os_memory_provider_params); + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + size_t size = part_num * part_size; - umf_result_t umf_result; umf_result = umfMemoryProviderAlloc(os_memory_provider, size, 0, &ptr); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(ptr, nullptr); @@ -407,13 +525,22 @@ TEST_F(testNuma, checkModeInterleaveCustomPartSize) { memset(ptr, 0xFF, size); // Test where each page will be allocated. // Get the first numa node for ptr; Each next part is expected to be on next nodes. - size_t index = getNumaNodeByPtr((char *)ptr); + int node = -1; + ASSERT_NO_FATAL_FAILURE(getNumaNodeByPtr(ptr, &node)); + ASSERT_GE(node, 0); + int index = -1; + for (size_t i = 0; i < numa_nodes.size(); i++) { + if (numa_nodes[i] == (unsigned)node) { + index = i; + break; + } + } + ASSERT_GE(index, 0); + ASSERT_LT(index, numa_nodes.size()); + for (size_t i = 0; i < (size_t)part_num; i++) { for (size_t j = 0; j < part_size; j += page_size) { - EXPECT_EQ(numa_nodes[index], - getNumaNodeByPtr((char *)ptr + part_size * i + j)) - << "for ptr " << ptr << " + " << part_size << " * " << i - << " + " << j; + ASSERT_NODE_EQ((char *)ptr + part_size * i + j, numa_nodes[index]); } index = (index + 1) % numa_nodes.size(); } @@ -425,7 +552,7 @@ TEST_F(testNuma, checkModeInterleaveCustomPartSize) { ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(ptr, nullptr); memset(ptr, 0xFF, size); - EXPECT_EQ(numa_nodes[index], getNumaNodeByPtr(ptr)); + EXPECT_NODE_EQ(ptr, numa_nodes[index]); umfMemoryProviderFree(os_memory_provider, ptr, size); } @@ -545,9 +672,12 @@ TEST_P(testNumaSplit, checkModeSplit) { ASSERT_GT(_page_size, 0); size_t page_size = _page_size; auto [required_numa_nodes, pages, in, out] = param; + umf_result_t umf_result; + + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); std::vector numa_nodes = get_available_numa_nodes(); @@ -566,16 +696,24 @@ TEST_P(testNumaSplit, checkModeSplit) { numa_nodes.begin() + required_numa_nodes, g); } - os_memory_provider_params.numa_list = numa_nodes.data(); - os_memory_provider_params.numa_list_len = required_numa_nodes; - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_SPLIT; + umf_result = umfOsMemoryProviderParamsSetNumaList( + os_memory_provider_params, numa_nodes.data(), required_numa_nodes); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_SPLIT); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetPartitions( + os_memory_provider_params, in.data(), in.size()); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - os_memory_provider_params.partitions = in.data(); - os_memory_provider_params.partitions_len = in.size(); initOsProvider(os_memory_provider_params); + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + size_t size = page_size * pages; - umf_result_t umf_result; + umf_result = umfMemoryProviderAlloc(os_memory_provider, size, 0, &ptr); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(ptr, nullptr); @@ -609,17 +747,26 @@ TEST_P(testNumaSplit, checkModeSplit) { // Test for allocations on all numa nodes with BIND mode. // According to mbind it should go to the closest node. TEST_F(testNuma, checkModeBindOnAllNodes) { - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; + umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); std::vector numa_nodes = get_available_numa_nodes(); - os_memory_provider_params.numa_list = numa_nodes.data(); - os_memory_provider_params.numa_list_len = numa_nodes.size(); - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_BIND; + umf_result = umfOsMemoryProviderParamsSetNumaList( + os_memory_provider_params, numa_nodes.data(), numa_nodes.size()); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_BIND); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + initOsProvider(os_memory_provider_params); - umf_result_t umf_result; + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + umf_result = umfMemoryProviderAlloc(os_memory_provider, alloc_size, 0, &ptr); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); @@ -627,7 +774,10 @@ TEST_F(testNuma, checkModeBindOnAllNodes) { // 'ptr' must point to an initialized value before retrieving its numa node memset(ptr, 0xFF, alloc_size); - unsigned retrieved_numa_node_number = (unsigned)getNumaNodeByPtr(ptr); + + int node = -1; + ASSERT_NO_FATAL_FAILURE(getNumaNodeByPtr(ptr, &node)); + unsigned retrieved_numa_node_number = (unsigned)node; int read_cpu = sched_getcpu(); int read_numa_node = numa_node_of_cpu(read_cpu); @@ -650,19 +800,28 @@ TEST_F(testNuma, checkModeBindOnAllNodes) { // Local mode enabled when numa_list is set. // For the local mode the nodeset must be empty. TEST_F(testNuma, checkModeLocalIllegalArgSet) { - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; + umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); std::vector numa_nodes = get_available_numa_nodes(); - os_memory_provider_params.numa_list = numa_nodes.data(); - os_memory_provider_params.numa_list_len = numa_nodes.size(); - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_LOCAL; + umf_result = umfOsMemoryProviderParamsSetNumaList( + os_memory_provider_params, numa_nodes.data(), numa_nodes.size()); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_LOCAL); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result_t umf_result; umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), &os_memory_provider_params, &os_memory_provider); + + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); ASSERT_EQ(os_memory_provider, nullptr); } @@ -670,18 +829,24 @@ TEST_F(testNuma, checkModeLocalIllegalArgSet) { // Default mode enabled when numa_list is set. // For the default mode the nodeset must be empty. TEST_F(testNuma, checkModeDefaultIllegalArgSet) { - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; + umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); std::vector numa_nodes = get_available_numa_nodes(); - os_memory_provider_params.numa_list = numa_nodes.data(); - os_memory_provider_params.numa_list_len = numa_nodes.size(); + umf_result = umfOsMemoryProviderParamsSetNumaList( + os_memory_provider_params, numa_nodes.data(), numa_nodes.size()); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result_t umf_result; umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), &os_memory_provider_params, &os_memory_provider); + + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); ASSERT_EQ(os_memory_provider, nullptr); } @@ -689,14 +854,22 @@ TEST_F(testNuma, checkModeDefaultIllegalArgSet) { // Bind mode enabled when numa_list is not set. // For the bind mode the nodeset must be non-empty. TEST_F(testNuma, checkModeBindIllegalArgSet) { - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_BIND; - umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_BIND); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), &os_memory_provider_params, &os_memory_provider); + + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); ASSERT_EQ(os_memory_provider, nullptr); } @@ -704,14 +877,53 @@ TEST_F(testNuma, checkModeBindIllegalArgSet) { // Interleave mode enabled numa_list is not set. // For the interleave mode the nodeset must be non-empty. TEST_F(testNuma, checkModeInterleaveIllegalArgSet) { - umf_os_memory_provider_params_t os_memory_provider_params = - UMF_OS_MEMORY_PROVIDER_PARAMS_TEST; - os_memory_provider_params.numa_mode = UMF_NUMA_MODE_INTERLEAVE; - umf_result_t umf_result; + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_INTERLEAVE); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfMemoryProviderCreate(umfOsMemoryProviderOps(), &os_memory_provider_params, &os_memory_provider); + + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); ASSERT_EQ(os_memory_provider, nullptr); } + +// Interleave mode set with SIZE_MAX part size +TEST_F(testNuma, maxPartSize) { + std::vector numa_nodes = get_available_numa_nodes(); + + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + auto res = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfOsMemoryProviderParamsSetNumaMode(os_memory_provider_params, + UMF_NUMA_MODE_INTERLEAVE); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfOsMemoryProviderParamsSetPartSize(os_memory_provider_params, + SIZE_MAX); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfOsMemoryProviderParamsSetNumaList( + os_memory_provider_params, numa_nodes.data(), numa_nodes.size()); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfMemoryProviderCreate(umfOsMemoryProviderOps(), + &os_memory_provider_params, + &os_memory_provider); + + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + + ASSERT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(os_memory_provider, nullptr); +} diff --git a/test/provider_os_memory_not_impl.cpp b/test/provider_os_memory_not_impl.cpp new file mode 100644 index 000000000..13c123fb7 --- /dev/null +++ b/test/provider_os_memory_not_impl.cpp @@ -0,0 +1,46 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "base.hpp" + +#include + +using umf_test::test; + +TEST_F(test, os_provider_not_implemented) { + umf_os_memory_provider_params_handle_t params = nullptr; + umf_result_t umf_result = umfOsMemoryProviderParamsCreate(¶ms); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + EXPECT_EQ(params, nullptr); + + umf_result = umfOsMemoryProviderParamsDestroy(params); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_result = umfOsMemoryProviderParamsSetProtection(params, 0); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_result = + umfOsMemoryProviderParamsSetVisibility(params, UMF_MEM_MAP_PRIVATE); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_result = umfOsMemoryProviderParamsSetShmName(params, "shm_name"); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_result = umfOsMemoryProviderParamsSetNumaList(params, nullptr, 0); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_result = + umfOsMemoryProviderParamsSetNumaMode(params, UMF_NUMA_MODE_DEFAULT); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_result = umfOsMemoryProviderParamsSetPartSize(params, 4096); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_numa_split_partition_t partitions[1]; + umf_result = umfOsMemoryProviderParamsSetPartitions(params, partitions, 1); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_memory_provider_ops_t *ops = umfOsMemoryProviderOps(); + EXPECT_EQ(ops, nullptr); +} diff --git a/test/providers/cuda_helpers.cpp b/test/providers/cuda_helpers.cpp new file mode 100644 index 000000000..9c41d9382 --- /dev/null +++ b/test/providers/cuda_helpers.cpp @@ -0,0 +1,456 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include +#include +#include + +#include "cuda_helpers.h" +#include "utils_concurrency.h" +#include "utils_load_library.h" + +struct libcu_ops { + CUresult (*cuInit)(unsigned int flags); + CUresult (*cuCtxCreate)(CUcontext *pctx, unsigned int flags, CUdevice dev); + CUresult (*cuCtxDestroy)(CUcontext ctx); + CUresult (*cuCtxGetCurrent)(CUcontext *pctx); + CUresult (*cuCtxSetCurrent)(CUcontext ctx); + CUresult (*cuDeviceGet)(CUdevice *device, int ordinal); + CUresult (*cuMemAlloc)(CUdeviceptr *dptr, size_t size); + CUresult (*cuMemFree)(CUdeviceptr dptr); + CUresult (*cuMemAllocHost)(void **pp, size_t size); + CUresult (*cuMemAllocManaged)(CUdeviceptr *dptr, size_t bytesize, + unsigned int flags); + CUresult (*cuMemFreeHost)(void *p); + CUresult (*cuMemsetD32)(CUdeviceptr dstDevice, unsigned int pattern, + size_t size); + CUresult (*cuMemcpy)(CUdeviceptr dst, CUdeviceptr src, size_t size); + CUresult (*cuPointerGetAttribute)(void *data, CUpointer_attribute attribute, + CUdeviceptr ptr); + CUresult (*cuPointerGetAttributes)(unsigned int numAttributes, + CUpointer_attribute *attributes, + void **data, CUdeviceptr ptr); + CUresult (*cuStreamSynchronize)(CUstream hStream); + CUresult (*cuCtxSynchronize)(void); +} libcu_ops; + +#if USE_DLOPEN +// Generic no-op stub function for all callbacks +template CUresult noop_stub(Args &&...) { + return CUDA_SUCCESS; // Always return CUDA_SUCCESS +} + +struct DlHandleCloser { + void operator()(void *dlHandle) { + if (dlHandle) { + libcu_ops.cuInit = [](auto... args) { return noop_stub(args...); }; + libcu_ops.cuCtxCreate = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuCtxDestroy = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuCtxGetCurrent = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuCtxSetCurrent = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuDeviceGet = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuMemAlloc = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuMemFree = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuMemAllocHost = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuMemAllocManaged = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuMemFreeHost = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuMemsetD32 = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuMemcpy = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuPointerGetAttribute = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuPointerGetAttributes = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuStreamSynchronize = [](auto... args) { + return noop_stub(args...); + }; + libcu_ops.cuCtxSynchronize = [](auto... args) { + return noop_stub(args...); + }; + utils_close_library(dlHandle); + } + } +}; + +std::unique_ptr cuDlHandle = nullptr; +int InitCUDAOps() { +#ifdef _WIN32 + const char *lib_name = "nvcuda.dll"; +#else + const char *lib_name = "libcuda.so"; +#endif + // CUDA symbols + // NOTE that we use UMF_UTIL_OPEN_LIBRARY_GLOBAL which add all loaded + // symbols to the global symbol table. + cuDlHandle = std::unique_ptr( + utils_open_library(lib_name, UMF_UTIL_OPEN_LIBRARY_GLOBAL)); + + // NOTE: some symbols defined in the lib have _vX postfixes - this is + // important to load the proper version of functions + *(void **)&libcu_ops.cuInit = + utils_get_symbol_addr(cuDlHandle.get(), "cuInit", lib_name); + if (libcu_ops.cuInit == nullptr) { + fprintf(stderr, "cuInit symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuCtxCreate = + utils_get_symbol_addr(cuDlHandle.get(), "cuCtxCreate_v2", lib_name); + if (libcu_ops.cuCtxCreate == nullptr) { + fprintf(stderr, "cuCtxCreate_v2 symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuCtxDestroy = + utils_get_symbol_addr(cuDlHandle.get(), "cuCtxDestroy_v2", lib_name); + if (libcu_ops.cuCtxDestroy == nullptr) { + fprintf(stderr, "cuCtxDestroy symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuCtxGetCurrent = + utils_get_symbol_addr(cuDlHandle.get(), "cuCtxGetCurrent", lib_name); + if (libcu_ops.cuCtxGetCurrent == nullptr) { + fprintf(stderr, "cuCtxGetCurrent symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuCtxSetCurrent = + utils_get_symbol_addr(cuDlHandle.get(), "cuCtxSetCurrent", lib_name); + if (libcu_ops.cuCtxSetCurrent == nullptr) { + fprintf(stderr, "cuCtxSetCurrent symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuDeviceGet = + utils_get_symbol_addr(cuDlHandle.get(), "cuDeviceGet", lib_name); + if (libcu_ops.cuDeviceGet == nullptr) { + fprintf(stderr, "cuDeviceGet symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuMemAlloc = + utils_get_symbol_addr(cuDlHandle.get(), "cuMemAlloc_v2", lib_name); + if (libcu_ops.cuMemAlloc == nullptr) { + fprintf(stderr, "cuMemAlloc_v2 symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuMemFree = + utils_get_symbol_addr(cuDlHandle.get(), "cuMemFree_v2", lib_name); + if (libcu_ops.cuMemFree == nullptr) { + fprintf(stderr, "cuMemFree_v2 symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuMemAllocHost = + utils_get_symbol_addr(cuDlHandle.get(), "cuMemAllocHost_v2", lib_name); + if (libcu_ops.cuMemAllocHost == nullptr) { + fprintf(stderr, "cuMemAllocHost_v2 symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuMemAllocManaged = + utils_get_symbol_addr(cuDlHandle.get(), "cuMemAllocManaged", lib_name); + if (libcu_ops.cuMemAllocManaged == nullptr) { + fprintf(stderr, "cuMemAllocManaged symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuMemFreeHost = + utils_get_symbol_addr(cuDlHandle.get(), "cuMemFreeHost", lib_name); + if (libcu_ops.cuMemFreeHost == nullptr) { + fprintf(stderr, "cuMemFreeHost symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuMemsetD32 = + utils_get_symbol_addr(cuDlHandle.get(), "cuMemsetD32_v2", lib_name); + if (libcu_ops.cuMemsetD32 == nullptr) { + fprintf(stderr, "cuMemsetD32_v2 symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuMemcpy = + utils_get_symbol_addr(cuDlHandle.get(), "cuMemcpy", lib_name); + if (libcu_ops.cuMemcpy == nullptr) { + fprintf(stderr, "cuMemcpy symbol not found in %s\n", lib_name); + return -1; + } + *(void **)&libcu_ops.cuPointerGetAttribute = utils_get_symbol_addr( + cuDlHandle.get(), "cuPointerGetAttribute", lib_name); + if (libcu_ops.cuPointerGetAttribute == nullptr) { + fprintf(stderr, "cuPointerGetAttribute symbol not found in %s\n", + lib_name); + return -1; + } + *(void **)&libcu_ops.cuPointerGetAttributes = utils_get_symbol_addr( + cuDlHandle.get(), "cuPointerGetAttributes", lib_name); + if (libcu_ops.cuPointerGetAttributes == nullptr) { + fprintf(stderr, "cuPointerGetAttributes symbol not found in %s\n", + lib_name); + return -1; + } + *(void **)&libcu_ops.cuStreamSynchronize = utils_get_symbol_addr( + cuDlHandle.get(), "cuStreamSynchronize", lib_name); + if (libcu_ops.cuStreamSynchronize == nullptr) { + fprintf(stderr, "cuStreamSynchronize symbol not found in %s\n", + lib_name); + return -1; + } + *(void **)&libcu_ops.cuCtxSynchronize = + utils_get_symbol_addr(cuDlHandle.get(), "cuCtxSynchronize", lib_name); + if (libcu_ops.cuCtxSynchronize == nullptr) { + fprintf(stderr, "cuCtxSynchronize symbol not found in %s\n", lib_name); + return -1; + } + + return 0; +} + +#else // USE_DLOPEN +int InitCUDAOps() { + // CUDA is linked statically but we prepare ops structure to + // make test code consistent + libcu_ops.cuInit = cuInit; + libcu_ops.cuCtxCreate = cuCtxCreate; + libcu_ops.cuCtxDestroy = cuCtxDestroy; + libcu_ops.cuCtxGetCurrent = cuCtxGetCurrent; + libcu_ops.cuCtxSetCurrent = cuCtxSetCurrent; + libcu_ops.cuDeviceGet = cuDeviceGet; + libcu_ops.cuMemAlloc = cuMemAlloc; + libcu_ops.cuMemAllocHost = cuMemAllocHost; + libcu_ops.cuMemAllocManaged = cuMemAllocManaged; + libcu_ops.cuMemFree = cuMemFree; + libcu_ops.cuMemFreeHost = cuMemFreeHost; + libcu_ops.cuMemsetD32 = cuMemsetD32; + libcu_ops.cuMemcpy = cuMemcpy; + libcu_ops.cuPointerGetAttribute = cuPointerGetAttribute; + libcu_ops.cuPointerGetAttributes = cuPointerGetAttributes; + libcu_ops.cuStreamSynchronize = cuStreamSynchronize; + libcu_ops.cuCtxSynchronize = cuCtxSynchronize; + + return 0; +} +#endif // USE_DLOPEN + +static CUresult set_context(CUcontext required_ctx, CUcontext *restore_ctx) { + CUcontext current_ctx = NULL; + CUresult cu_result = libcu_ops.cuCtxGetCurrent(¤t_ctx); + if (cu_result != CUDA_SUCCESS) { + fprintf(stderr, "cuCtxGetCurrent() failed.\n"); + return cu_result; + } + + *restore_ctx = current_ctx; + if (current_ctx != required_ctx) { + cu_result = libcu_ops.cuCtxSetCurrent(required_ctx); + if (cu_result != CUDA_SUCCESS) { + fprintf(stderr, "cuCtxSetCurrent() failed.\n"); + } + } + + return cu_result; +} + +static int init_cuda_lib(void) { + CUresult result = libcu_ops.cuInit(0); + if (result != CUDA_SUCCESS) { + return -1; + } + return 0; +} + +int cuda_fill(CUcontext context, CUdevice device, void *ptr, size_t size, + const void *pattern, size_t pattern_size) { + (void)device; + (void)pattern_size; + + // TODO support patterns > sizeof(unsigned int) + if (pattern_size > sizeof(unsigned int)) { + fprintf(stderr, "patterns > sizeof(unsigned int) are unsupported!\n"); + return -1; + } + + // set required context + CUcontext curr_context = nullptr; + set_context(context, &curr_context); + + int ret = 0; + CUresult res = + libcu_ops.cuMemsetD32((CUdeviceptr)ptr, *(unsigned int *)pattern, + size / sizeof(unsigned int)); + if (res != CUDA_SUCCESS) { + fprintf(stderr, "cuMemsetD32(%llu, %u, %zu) failed!\n", + (CUdeviceptr)ptr, *(unsigned int *)pattern, + size / pattern_size); + return -1; + } + + res = libcu_ops.cuCtxSynchronize(); + if (res != CUDA_SUCCESS) { + fprintf(stderr, "cuCtxSynchronize() failed!\n"); + return -1; + } + + // restore context + set_context(curr_context, &curr_context); + return ret; +} + +int cuda_copy(CUcontext context, CUdevice device, void *dst_ptr, + const void *src_ptr, size_t size) { + (void)device; + + // set required context + CUcontext curr_context = nullptr; + set_context(context, &curr_context); + + int ret = 0; + CUresult res = + libcu_ops.cuMemcpy((CUdeviceptr)dst_ptr, (CUdeviceptr)src_ptr, size); + if (res != CUDA_SUCCESS) { + fprintf(stderr, "cuMemcpy() failed!\n"); + return -1; + } + + res = libcu_ops.cuCtxSynchronize(); + if (res != CUDA_SUCCESS) { + fprintf(stderr, "cuCtxSynchronize() failed!\n"); + return -1; + } + + // restore context + set_context(curr_context, &curr_context); + return ret; +} + +umf_usm_memory_type_t get_mem_type(CUcontext context, void *ptr) { + + (void)context; + + unsigned int managed; + unsigned int type; + void *attrib_vals[2] = {&managed, &type}; + CUpointer_attribute attribs[2] = {CU_POINTER_ATTRIBUTE_IS_MANAGED, + CU_POINTER_ATTRIBUTE_MEMORY_TYPE}; + + CUresult res = libcu_ops.cuPointerGetAttributes(2, attribs, attrib_vals, + (CUdeviceptr)ptr); + if (res != CUDA_SUCCESS) { + fprintf(stderr, "cuPointerGetAttributes() failed!\n"); + return UMF_MEMORY_TYPE_UNKNOWN; + } + + if (type == CU_MEMORYTYPE_DEVICE && managed == 0) { + return UMF_MEMORY_TYPE_DEVICE; + } else if (type == CU_MEMORYTYPE_DEVICE && managed == 1) { + return UMF_MEMORY_TYPE_SHARED; + } else if (type == CU_MEMORYTYPE_HOST) { + return UMF_MEMORY_TYPE_HOST; + } + + return UMF_MEMORY_TYPE_UNKNOWN; +} + +CUcontext get_mem_context(void *ptr) { + CUcontext context; + CUresult res = libcu_ops.cuPointerGetAttribute( + &context, CU_POINTER_ATTRIBUTE_CONTEXT, (CUdeviceptr)ptr); + if (res != CUDA_SUCCESS) { + fprintf(stderr, "cuPointerGetAttribute() failed!\n"); + return nullptr; + } + + return context; +} + +CUcontext get_current_context() { + CUcontext context; + CUresult res = libcu_ops.cuCtxGetCurrent(&context); + if (res != CUDA_SUCCESS) { + fprintf(stderr, "cuCtxGetCurrent() failed!\n"); + return nullptr; + } + + return context; +} + +UTIL_ONCE_FLAG cuda_init_flag; +int InitResult; +void init_cuda_once() { + InitResult = InitCUDAOps(); + if (InitResult != 0) { + return; + } + InitResult = init_cuda_lib(); +} + +int init_cuda() { + utils_init_once(&cuda_init_flag, init_cuda_once); + + return InitResult; +} + +int get_cuda_device(CUdevice *device) { + CUdevice cuDevice = -1; + + int ret = init_cuda(); + if (ret != 0) { + fprintf(stderr, "init_cuda() failed!\n"); + return ret; + } + + CUresult res = libcu_ops.cuDeviceGet(&cuDevice, 0); + if (res != CUDA_SUCCESS || cuDevice < 0) { + return -1; + } + + *device = cuDevice; + return 0; +} + +int create_context(CUdevice device, CUcontext *context) { + CUcontext cuContext = nullptr; + + int ret = init_cuda(); + if (ret != 0) { + fprintf(stderr, "init_cuda() failed!\n"); + return ret; + } + + CUresult res = libcu_ops.cuCtxCreate(&cuContext, 0, device); + if (res != CUDA_SUCCESS || cuContext == nullptr) { + return -1; + } + + *context = cuContext; + return 0; +} + +int destroy_context(CUcontext context) { + CUresult res = libcu_ops.cuCtxDestroy(context); + if (res != CUDA_SUCCESS) { + fprintf(stderr, "cuCtxDestroy() failed!\n"); + return -1; + } + + return 0; +} diff --git a/test/providers/cuda_helpers.h b/test/providers/cuda_helpers.h new file mode 100644 index 000000000..fc06c1fcf --- /dev/null +++ b/test/providers/cuda_helpers.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef TEST_COMMON_CUDA_HELPERS_HPP +#define TEST_COMMON_CUDA_HELPERS_HPP + +#include + +// disable warning 4201: nonstandard extension used: nameless struct/union +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4201) +#endif // _MSC_VER + +#include "cuda.h" + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif // _MSC_VER + +#ifdef __cplusplus +extern "C" { +#endif + +int get_cuda_device(CUdevice *device); + +int create_context(CUdevice device, CUcontext *context); + +int destroy_context(CUcontext context); + +int cuda_fill(CUcontext context, CUdevice device, void *ptr, size_t size, + const void *pattern, size_t pattern_size); + +int cuda_copy(CUcontext context, CUdevice device, void *dst_ptr, + const void *src_ptr, size_t size); + +umf_usm_memory_type_t get_mem_type(CUcontext context, void *ptr); + +CUcontext get_mem_context(void *ptr); + +CUcontext get_current_context(); + +#ifdef __cplusplus +} +#endif + +#endif // TEST_COMMON_CUDA_HELPERS_HPP diff --git a/test/providers/ipc_cuda_prov.sh b/test/providers/ipc_cuda_prov.sh new file mode 100755 index 000000000..1e9b6b05d --- /dev/null +++ b/test/providers/ipc_cuda_prov.sh @@ -0,0 +1,24 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + +#!/bin/bash + +set -e + +# port should be a number from the range <1024, 65535> +PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) + +UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" + +echo "Starting ipc_cuda_prov CONSUMER on port $PORT ..." +UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_cuda_prov_consumer $PORT & + +echo "Waiting 1 sec ..." +sleep 1 + +echo "Starting ipc_cuda_prov PRODUCER on port $PORT ..." +UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_cuda_prov_producer $PORT diff --git a/test/providers/ipc_cuda_prov_common.c b/test/providers/ipc_cuda_prov_common.c new file mode 100644 index 000000000..a38e4d061 --- /dev/null +++ b/test/providers/ipc_cuda_prov_common.c @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include + +#include + +#include "cuda_helpers.h" +#include "ipc_cuda_prov_common.h" + +void memcopy(void *dst, const void *src, size_t size, void *context) { + cuda_copy_ctx_t *cu_params = (cuda_copy_ctx_t *)context; + int ret = cuda_copy(cu_params->context, cu_params->device, dst, (void *)src, + size); + if (ret != 0) { + fprintf(stderr, "cuda_copy failed with error %d\n", ret); + } +} diff --git a/test/providers/ipc_cuda_prov_common.h b/test/providers/ipc_cuda_prov_common.h new file mode 100644 index 000000000..e50546efd --- /dev/null +++ b/test/providers/ipc_cuda_prov_common.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef UMF_TEST_IPC_CUDA_PROV_COMMON_H +#define UMF_TEST_IPC_CUDA_PROV_COMMON_H + +#include + +typedef struct cuda_copy_ctx_t { + CUcontext context; + CUdevice device; +} cuda_copy_ctx_t; + +void memcopy(void *dst, const void *src, size_t size, void *context); + +#endif // UMF_TEST_IPC_CUDA_PROV_COMMON_H diff --git a/test/providers/ipc_cuda_prov_consumer.c b/test/providers/ipc_cuda_prov_consumer.c new file mode 100644 index 000000000..1aeb5b15c --- /dev/null +++ b/test/providers/ipc_cuda_prov_consumer.c @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include +#include + +#include +#include + +#include "cuda_helpers.h" +#include "ipc_common.h" +#include "ipc_cuda_prov_common.h" + +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + return -1; + } + + int port = atoi(argv[1]); + CUdevice hDevice = -1; + CUcontext hContext = NULL; + + int ret = get_cuda_device(&hDevice); + if (ret != 0) { + fprintf(stderr, "get_cuda_device() failed!\n"); + return -1; + } + + ret = create_context(hDevice, &hContext); + if (ret != 0) { + fprintf(stderr, "create_context() failed!\n"); + return -1; + } + + umf_cuda_memory_provider_params_handle_t cu_params = NULL; + umf_result_t umf_result = umfCUDAMemoryProviderParamsCreate(&cu_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create CUDA params!\n"); + ret = -1; + goto destroy_context; + } + + umf_result = umfCUDAMemoryProviderParamsSetContext(cu_params, hContext); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "Failed to set context in CUDA Memory Provider params!\n"); + ret = -1; + goto destroy_provider_params; + } + + umf_result = umfCUDAMemoryProviderParamsSetDevice(cu_params, hDevice); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "Failed to set device in CUDA Memory Provider params!\n"); + ret = -1; + goto destroy_provider_params; + } + + umf_result = umfCUDAMemoryProviderParamsSetMemoryType( + cu_params, UMF_MEMORY_TYPE_DEVICE); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set memory type in CUDA memory " + "provider params!\n"); + ret = -1; + goto destroy_provider_params; + } + + umf_disjoint_pool_params_handle_t pool_params = NULL; + + umf_result = umfDisjointPoolParamsCreate(&pool_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create pool params!\n"); + ret = -1; + goto destroy_provider_params; + } + + cuda_copy_ctx_t copy_ctx = {hContext, hDevice}; + + ret = + run_consumer(port, umfDisjointPoolOps(), pool_params, + umfCUDAMemoryProviderOps(), cu_params, memcopy, ©_ctx); + + umfDisjointPoolParamsDestroy(pool_params); + +destroy_provider_params: + umfCUDAMemoryProviderParamsDestroy(cu_params); + +destroy_context: + destroy_context(hContext); + + return ret; +} diff --git a/test/providers/ipc_cuda_prov_producer.c b/test/providers/ipc_cuda_prov_producer.c new file mode 100644 index 000000000..c2cd1d132 --- /dev/null +++ b/test/providers/ipc_cuda_prov_producer.c @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include +#include + +#include +#include + +#include "cuda_helpers.h" +#include "ipc_common.h" +#include "ipc_cuda_prov_common.h" + +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + return -1; + } + + int port = atoi(argv[1]); + CUdevice hDevice = -1; + CUcontext hContext = NULL; + + int ret = get_cuda_device(&hDevice); + if (ret != 0) { + fprintf(stderr, "get_cuda_device() failed!\n"); + return -1; + } + + ret = create_context(hDevice, &hContext); + if (ret != 0) { + fprintf(stderr, "create_context() failed!\n"); + return -1; + } + + umf_cuda_memory_provider_params_handle_t cu_params = NULL; + umf_result_t umf_result = umfCUDAMemoryProviderParamsCreate(&cu_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create CUDA params!\n"); + ret = -1; + goto destroy_context; + } + + umf_result = umfCUDAMemoryProviderParamsSetContext(cu_params, hContext); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "Failed to set context in CUDA Memory Provider params!\n"); + ret = -1; + goto destroy_provider_params; + } + + umf_result = umfCUDAMemoryProviderParamsSetDevice(cu_params, hDevice); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "Failed to set device in CUDA Memory Provider params!\n"); + ret = -1; + goto destroy_provider_params; + } + + umf_result = umfCUDAMemoryProviderParamsSetMemoryType( + cu_params, UMF_MEMORY_TYPE_DEVICE); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set memory type in CUDA memory " + "provider params!\n"); + ret = -1; + goto destroy_provider_params; + } + + umf_disjoint_pool_params_handle_t pool_params = NULL; + + umf_result = umfDisjointPoolParamsCreate(&pool_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create pool params!\n"); + ret = -1; + goto destroy_provider_params; + } + + cuda_copy_ctx_t copy_ctx = {hContext, hDevice}; + + ret = + run_producer(port, umfDisjointPoolOps(), pool_params, + umfCUDAMemoryProviderOps(), cu_params, memcopy, ©_ctx); + + umfDisjointPoolParamsDestroy(pool_params); + +destroy_provider_params: + umfCUDAMemoryProviderParamsDestroy(cu_params); + +destroy_context: + destroy_context(hContext); + + return ret; +} diff --git a/test/providers/ipc_level_zero_prov_common.c b/test/providers/ipc_level_zero_prov_common.c index 11813653d..485cb41b5 100644 --- a/test/providers/ipc_level_zero_prov_common.c +++ b/test/providers/ipc_level_zero_prov_common.c @@ -6,18 +6,16 @@ */ #include "ipc_level_zero_prov_common.h" -#include "level_zero_helpers.h" +#include "utils_level_zero.h" #include #include void memcopy(void *dst, const void *src, size_t size, void *context) { - level_zero_memory_provider_params_t *l0_params = - (level_zero_memory_provider_params_t *)context; - int ret = - level_zero_copy(l0_params->level_zero_context_handle, - l0_params->level_zero_device_handle, dst, src, size); + level_zero_copy_ctx_t *l0_params = (level_zero_copy_ctx_t *)context; + int ret = utils_ze_level_zero_copy(l0_params->context, l0_params->device, + dst, src, size); if (ret != 0) { fprintf(stderr, "level_zero_copy failed with error %d\n", ret); } diff --git a/test/providers/ipc_level_zero_prov_common.h b/test/providers/ipc_level_zero_prov_common.h index dff51d08b..ea444133d 100644 --- a/test/providers/ipc_level_zero_prov_common.h +++ b/test/providers/ipc_level_zero_prov_common.h @@ -10,6 +10,13 @@ #include +#include "ze_api.h" + +typedef struct level_zero_copy_ctx_t { + ze_context_handle_t context; + ze_device_handle_t device; +} level_zero_copy_ctx_t; + void memcopy(void *dst, const void *src, size_t size, void *context); #endif // UMF_TEST_IPC_LEVEL_ZERO_PROV_COMMON_H diff --git a/test/providers/ipc_level_zero_prov_consumer.c b/test/providers/ipc_level_zero_prov_consumer.c index 6d59ced53..8ec0648e4 100644 --- a/test/providers/ipc_level_zero_prov_consumer.c +++ b/test/providers/ipc_level_zero_prov_consumer.c @@ -8,23 +8,102 @@ #include #include +#include #include #include "ipc_common.h" #include "ipc_level_zero_prov_common.h" -#include "level_zero_helpers.h" +#include "utils_level_zero.h" int main(int argc, char *argv[]) { if (argc < 2) { - fprintf(stderr, "usage: %s [shm_name]\n", argv[0]); + fprintf(stderr, "usage: %s \n", argv[0]); return -1; } int port = atoi(argv[1]); + uint32_t driver_idx = 0; + ze_driver_handle_t hDriver = NULL; + ze_device_handle_t hDevice = NULL; + ze_context_handle_t hContext = NULL; - level_zero_memory_provider_params_t l0_params = - create_level_zero_prov_params(UMF_MEMORY_TYPE_DEVICE); + int ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver); + if (ret != 0 || hDriver == NULL) { + fprintf(stderr, "utils_ze_find_driver_with_gpu() failed!\n"); + return -1; + } + + ret = utils_ze_find_gpu_device(hDriver, &hDevice); + if (ret != 0 || hDevice == NULL) { + fprintf(stderr, "utils_ze_find_gpu_device() failed!\n"); + return -1; + } + + ret = utils_ze_create_context(hDriver, &hContext); + if (ret != 0) { + fprintf(stderr, "utils_ze_create_context() failed!\n"); + return -1; + } + + umf_level_zero_memory_provider_params_handle_t l0_params = NULL; + umf_result_t umf_result = + umfLevelZeroMemoryProviderParamsCreate(&l0_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "Failed to create Level Zero Memory Provider params!\n"); + ret = -1; + goto destroy_context; + } + + umf_result = + umfLevelZeroMemoryProviderParamsSetContext(l0_params, hContext); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf( + stderr, + "Failed to set context in Level Zero Memory Provider params!\n"); + ret = -1; + goto destroy_provider_params; + } + + umf_result = umfLevelZeroMemoryProviderParamsSetDevice(l0_params, hDevice); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "Failed to set device in Level Zero Memory Provider params!\n"); + ret = -1; + goto destroy_provider_params; + } + + umf_result = umfLevelZeroMemoryProviderParamsSetMemoryType( + l0_params, UMF_MEMORY_TYPE_DEVICE); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set memory type in Level Zero Memory " + "Provider params!\n"); + ret = -1; + goto destroy_provider_params; + } + + umf_disjoint_pool_params_handle_t pool_params = NULL; + + umf_result = umfDisjointPoolParamsCreate(&pool_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create pool params!\n"); + ret = -1; + goto destroy_provider_params; + } + + level_zero_copy_ctx_t copy_ctx = {hContext, hDevice}; + + ret = run_consumer(port, umfDisjointPoolOps(), pool_params, + umfLevelZeroMemoryProviderOps(), l0_params, memcopy, + ©_ctx); + + umfDisjointPoolParamsDestroy(pool_params); + +destroy_provider_params: + umfLevelZeroMemoryProviderParamsDestroy(l0_params); + +destroy_context: + utils_ze_destroy_context(hContext); - return run_consumer(port, umfLevelZeroMemoryProviderOps(), &l0_params, - memcopy, &l0_params); + return ret; } diff --git a/test/providers/ipc_level_zero_prov_producer.c b/test/providers/ipc_level_zero_prov_producer.c index d2d95d885..2a8fedc37 100644 --- a/test/providers/ipc_level_zero_prov_producer.c +++ b/test/providers/ipc_level_zero_prov_producer.c @@ -8,23 +8,102 @@ #include #include +#include #include #include "ipc_common.h" #include "ipc_level_zero_prov_common.h" -#include "level_zero_helpers.h" +#include "utils_level_zero.h" int main(int argc, char *argv[]) { if (argc < 2) { - fprintf(stderr, "usage: %s [shm_name]\n", argv[0]); + fprintf(stderr, "usage: %s \n", argv[0]); return -1; } int port = atoi(argv[1]); + uint32_t driver_idx = 0; + ze_driver_handle_t hDriver = NULL; + ze_device_handle_t hDevice = NULL; + ze_context_handle_t hContext = NULL; - level_zero_memory_provider_params_t l0_params = - create_level_zero_prov_params(UMF_MEMORY_TYPE_DEVICE); + int ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver); + if (ret != 0 || hDriver == NULL) { + fprintf(stderr, "utils_ze_find_driver_with_gpu() failed!\n"); + return -1; + } + + ret = utils_ze_find_gpu_device(hDriver, &hDevice); + if (ret != 0 || hDevice == NULL) { + fprintf(stderr, "utils_ze_find_gpu_device() failed!\n"); + return -1; + } + + ret = utils_ze_create_context(hDriver, &hContext); + if (ret != 0) { + fprintf(stderr, "utils_ze_create_context() failed!\n"); + return -1; + } + + umf_level_zero_memory_provider_params_handle_t l0_params = NULL; + umf_result_t umf_result = + umfLevelZeroMemoryProviderParamsCreate(&l0_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "Failed to create Level Zero Memory Provider params!\n"); + ret = -1; + goto destroy_context; + } + + umf_result = + umfLevelZeroMemoryProviderParamsSetContext(l0_params, hContext); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf( + stderr, + "Failed to set context in Level Zero Memory Provider params!\n"); + ret = -1; + goto destroy_provider_params; + } + + umf_result = umfLevelZeroMemoryProviderParamsSetDevice(l0_params, hDevice); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, + "Failed to set device in Level Zero Memory Provider params!\n"); + ret = -1; + goto destroy_provider_params; + } + + umf_result = umfLevelZeroMemoryProviderParamsSetMemoryType( + l0_params, UMF_MEMORY_TYPE_DEVICE); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to set memory type in Level Zero Memory " + "Provider params!\n"); + ret = -1; + goto destroy_provider_params; + } + + umf_disjoint_pool_params_handle_t pool_params = NULL; + + umf_result = umfDisjointPoolParamsCreate(&pool_params); + if (umf_result != UMF_RESULT_SUCCESS) { + fprintf(stderr, "Failed to create pool params!\n"); + ret = -1; + goto destroy_provider_params; + } + + level_zero_copy_ctx_t copy_ctx = {hContext, hDevice}; + + ret = run_producer(port, umfDisjointPoolOps(), pool_params, + umfLevelZeroMemoryProviderOps(), l0_params, memcopy, + ©_ctx); + + umfDisjointPoolParamsDestroy(pool_params); + +destroy_provider_params: + umfLevelZeroMemoryProviderParamsDestroy(l0_params); + +destroy_context: + utils_ze_destroy_context(hContext); - return run_producer(port, umfLevelZeroMemoryProviderOps(), &l0_params, - memcopy, &l0_params); + return ret; } diff --git a/test/providers/provider_cuda.cpp b/test/providers/provider_cuda.cpp new file mode 100644 index 000000000..4f1d35911 --- /dev/null +++ b/test/providers/provider_cuda.cpp @@ -0,0 +1,357 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifdef _WIN32 +//workaround for std::numeric_limits on windows +#define NOMINMAX +#endif + +#include + +#include + +#include "cuda_helpers.h" +#include "ipcFixtures.hpp" +#include "pool.hpp" +#include "utils_load_library.h" + +using umf_test::test; +using namespace umf_test; + +class CUDATestHelper { + public: + CUDATestHelper(); + + ~CUDATestHelper() { + if (hContext_) { + destroy_context(hContext_); + } + } + + CUcontext get_test_context() const { return hContext_; } + + CUdevice get_test_device() const { return hDevice_; } + + private: + CUcontext hContext_ = nullptr; + CUdevice hDevice_ = -1; +}; + +CUDATestHelper::CUDATestHelper() { + int ret = get_cuda_device(&hDevice_); + if (ret != 0) { + fprintf(stderr, "get_cuda_device() failed!\n"); + return; + } + + ret = create_context(hDevice_, &hContext_); + if (ret != 0) { + fprintf(stderr, "create_context() failed!\n"); + return; + } +} + +using cuda_params_unique_handle_t = + std::unique_ptr; + +cuda_params_unique_handle_t +create_cuda_prov_params(CUcontext context, CUdevice device, + umf_usm_memory_type_t memory_type) { + umf_cuda_memory_provider_params_handle_t params = nullptr; + + umf_result_t res = umfCUDAMemoryProviderParamsCreate(¶ms); + if (res != UMF_RESULT_SUCCESS) { + return cuda_params_unique_handle_t(nullptr, + &umfCUDAMemoryProviderParamsDestroy); + } + + res = umfCUDAMemoryProviderParamsSetContext(params, context); + if (res != UMF_RESULT_SUCCESS) { + umfCUDAMemoryProviderParamsDestroy(params); + return cuda_params_unique_handle_t(nullptr, + &umfCUDAMemoryProviderParamsDestroy); + ; + } + + res = umfCUDAMemoryProviderParamsSetDevice(params, device); + if (res != UMF_RESULT_SUCCESS) { + umfCUDAMemoryProviderParamsDestroy(params); + return cuda_params_unique_handle_t(nullptr, + &umfCUDAMemoryProviderParamsDestroy); + ; + } + + res = umfCUDAMemoryProviderParamsSetMemoryType(params, memory_type); + if (res != UMF_RESULT_SUCCESS) { + umfCUDAMemoryProviderParamsDestroy(params); + return cuda_params_unique_handle_t(nullptr, + &umfCUDAMemoryProviderParamsDestroy); + ; + } + + return cuda_params_unique_handle_t(params, + &umfCUDAMemoryProviderParamsDestroy); +} + +class CUDAMemoryAccessor : public MemoryAccessor { + public: + CUDAMemoryAccessor(CUcontext hContext, CUdevice hDevice) + : hDevice_(hDevice), hContext_(hContext) {} + + void fill(void *ptr, size_t size, const void *pattern, + size_t pattern_size) { + ASSERT_NE(hContext_, nullptr); + ASSERT_GE(hDevice_, -1); + ASSERT_NE(ptr, nullptr); + + int ret = + cuda_fill(hContext_, hDevice_, ptr, size, pattern, pattern_size); + ASSERT_EQ(ret, 0); + } + + void copy(void *dst_ptr, void *src_ptr, size_t size) { + ASSERT_NE(hContext_, nullptr); + ASSERT_GE(hDevice_, -1); + ASSERT_NE(dst_ptr, nullptr); + ASSERT_NE(src_ptr, nullptr); + + int ret = cuda_copy(hContext_, hDevice_, dst_ptr, src_ptr, size); + ASSERT_EQ(ret, 0); + } + + private: + CUdevice hDevice_; + CUcontext hContext_; +}; + +using CUDAProviderTestParams = + std::tuple; + +struct umfCUDAProviderTest + : umf_test::test, + ::testing::WithParamInterface { + + void SetUp() override { + test::SetUp(); + + auto [cuda_params, cu_context, memory_type, accessor] = + this->GetParam(); + params = cuda_params; + memAccessor = accessor; + expected_context = cu_context; + expected_memory_type = memory_type; + } + + void TearDown() override { test::TearDown(); } + + umf_cuda_memory_provider_params_handle_t params; + MemoryAccessor *memAccessor = nullptr; + CUcontext expected_context; + umf_usm_memory_type_t expected_memory_type; +}; + +TEST_P(umfCUDAProviderTest, basic) { + const size_t size = 1024 * 8; + const uint32_t pattern = 0xAB; + CUcontext expected_current_context = get_current_context(); + + // create CUDA provider + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = + umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + size_t pageSize = 0; + umf_result = umfMemoryProviderGetMinPageSize(provider, 0, &pageSize); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(pageSize, 0); + + umf_result = + umfMemoryProviderGetRecommendedPageSize(provider, 0, &pageSize); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(pageSize, 0); + + void *ptr = nullptr; + umf_result = umfMemoryProviderAlloc(provider, size, 128, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + // use the allocated memory - fill it with a 0xAB pattern + memAccessor->fill(ptr, size, &pattern, sizeof(pattern)); + + CUcontext actual_mem_context = get_mem_context(ptr); + ASSERT_EQ(actual_mem_context, expected_context); + + CUcontext actual_current_context = get_current_context(); + ASSERT_EQ(actual_current_context, expected_current_context); + + umf_usm_memory_type_t memoryTypeActual = + get_mem_type(actual_current_context, ptr); + ASSERT_EQ(memoryTypeActual, expected_memory_type); + + // check if the pattern was successfully applied + uint32_t *hostMemory = (uint32_t *)calloc(1, size); + memAccessor->copy(hostMemory, ptr, size); + for (size_t i = 0; i < size / sizeof(uint32_t); i++) { + ASSERT_EQ(hostMemory[i], pattern); + } + free(hostMemory); + + umf_result = umfMemoryProviderFree(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(provider); +} + +TEST_P(umfCUDAProviderTest, getPageSize) { + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = + umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + size_t recommendedPageSize = 0; + umf_result = umfMemoryProviderGetRecommendedPageSize(provider, 0, + &recommendedPageSize); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(recommendedPageSize, 0); + + size_t minPageSize = 0; + umf_result = + umfMemoryProviderGetMinPageSize(provider, nullptr, &minPageSize); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(minPageSize, 0); + + ASSERT_GE(recommendedPageSize, minPageSize); + + umfMemoryProviderDestroy(provider); +} + +TEST_P(umfCUDAProviderTest, getName) { + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = + umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + const char *name = umfMemoryProviderGetName(provider); + ASSERT_STREQ(name, "CUDA"); + + umfMemoryProviderDestroy(provider); +} + +TEST_P(umfCUDAProviderTest, allocInvalidSize) { + CUcontext expected_current_context = get_current_context(); + // create CUDA provider + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = + umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + void *ptr = nullptr; + + // NOTE: some scenarios are invalid only for the DEVICE allocations + if (expected_memory_type == UMF_MEMORY_TYPE_DEVICE) { + // try to alloc SIZE_MAX + umf_result = umfMemoryProviderAlloc(provider, SIZE_MAX, 0, &ptr); + ASSERT_EQ(ptr, nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY); + + // in case of size == 0 we should got INVALID_ARGUMENT error + umf_result = umfMemoryProviderAlloc(provider, 0, 0, &ptr); + ASSERT_EQ(ptr, nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + } + + CUcontext actual_current_context = get_current_context(); + ASSERT_EQ(actual_current_context, expected_current_context); + + umfMemoryProviderDestroy(provider); +} + +TEST_P(umfCUDAProviderTest, providerCreateInvalidArgs) { + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = + umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), nullptr, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = umfMemoryProviderCreate(nullptr, params, nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(umfCUDAProviderTest, getPageSizeInvalidArgs) { + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = + umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + umf_result = umfMemoryProviderGetMinPageSize(provider, nullptr, nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = umfMemoryProviderGetRecommendedPageSize(provider, 0, nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umfMemoryProviderDestroy(provider); +} + +TEST_P(umfCUDAProviderTest, cudaProviderNullParams) { + umf_result_t res = umfCUDAMemoryProviderParamsCreate(nullptr); + EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfCUDAMemoryProviderParamsSetContext(nullptr, expected_context); + EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfCUDAMemoryProviderParamsSetDevice(nullptr, 1); + EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = + umfCUDAMemoryProviderParamsSetMemoryType(nullptr, expected_memory_type); + EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +// TODO add tests that mixes CUDA Memory Provider and Disjoint Pool + +CUDATestHelper cudaTestHelper; + +cuda_params_unique_handle_t cuParams_device_memory = create_cuda_prov_params( + cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device(), + UMF_MEMORY_TYPE_DEVICE); +cuda_params_unique_handle_t cuParams_shared_memory = create_cuda_prov_params( + cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device(), + UMF_MEMORY_TYPE_SHARED); +cuda_params_unique_handle_t cuParams_host_memory = create_cuda_prov_params( + cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device(), + UMF_MEMORY_TYPE_HOST); + +CUDAMemoryAccessor cuAccessor(cudaTestHelper.get_test_context(), + cudaTestHelper.get_test_device()); +HostMemoryAccessor hostAccessor; + +INSTANTIATE_TEST_SUITE_P( + umfCUDAProviderTestSuite, umfCUDAProviderTest, + ::testing::Values( + CUDAProviderTestParams{cuParams_device_memory.get(), + cudaTestHelper.get_test_context(), + UMF_MEMORY_TYPE_DEVICE, &cuAccessor}, + CUDAProviderTestParams{cuParams_shared_memory.get(), + cudaTestHelper.get_test_context(), + UMF_MEMORY_TYPE_SHARED, &hostAccessor}, + CUDAProviderTestParams{cuParams_host_memory.get(), + cudaTestHelper.get_test_context(), + UMF_MEMORY_TYPE_HOST, &hostAccessor})); + +// TODO: add IPC API +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); +/* +INSTANTIATE_TEST_SUITE_P(umfCUDAProviderTestSuite, umfIpcTest, + ::testing::Values(ipcTestParams{ + umfProxyPoolOps(), nullptr, + umfCUDAMemoryProviderOps(), + cuParams_device_memory.get(), &cuAccessor, false})); +*/ diff --git a/test/providers/provider_cuda_not_impl.cpp b/test/providers/provider_cuda_not_impl.cpp new file mode 100644 index 000000000..30fc373ca --- /dev/null +++ b/test/providers/provider_cuda_not_impl.cpp @@ -0,0 +1,31 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "base.hpp" + +#include + +using umf_test::test; + +TEST_F(test, cuda_provider_not_implemented) { + umf_cuda_memory_provider_params_handle_t hParams = nullptr; + umf_result_t result = umfCUDAMemoryProviderParamsCreate(&hParams); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + result = umfCUDAMemoryProviderParamsDestroy(hParams); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + result = umfCUDAMemoryProviderParamsSetContext(hParams, nullptr); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + result = umfCUDAMemoryProviderParamsSetDevice(hParams, 0); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + result = umfCUDAMemoryProviderParamsSetMemoryType(hParams, + UMF_MEMORY_TYPE_DEVICE); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_memory_provider_ops_t *ops = umfCUDAMemoryProviderOps(); + ASSERT_EQ(ops, nullptr); +} diff --git a/test/providers/provider_level_zero.cpp b/test/providers/provider_level_zero.cpp index ae5ece59c..d0584777b 100644 --- a/test/providers/provider_level_zero.cpp +++ b/test/providers/provider_level_zero.cpp @@ -12,13 +12,203 @@ #include #include "ipcFixtures.hpp" -#include "level_zero_helpers.h" #include "pool.hpp" +#include "utils_level_zero.h" #include "utils_load_library.h" using umf_test::test; using namespace umf_test; +class LevelZeroTestHelper { + public: + LevelZeroTestHelper(); + + ~LevelZeroTestHelper() { + if (hContext_) { + utils_ze_destroy_context(hContext_); + } + } + + ze_context_handle_t get_test_context() const { return hContext_; } + + ze_device_handle_t get_test_device() const { return hDevice_; } + + private: + ze_driver_handle_t hDriver_ = nullptr; + ze_context_handle_t hContext_ = nullptr; + ze_device_handle_t hDevice_ = nullptr; +}; + +LevelZeroTestHelper::LevelZeroTestHelper() { + uint32_t driver_idx = 0; + + int ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver_); + if (ret != 0 || hDriver_ == NULL) { + fprintf(stderr, "utils_ze_find_driver_with_gpu() failed!\n"); + return; + } + + ret = utils_ze_find_gpu_device(hDriver_, &hDevice_); + if (ret != 0 || hDevice_ == NULL) { + fprintf(stderr, "utils_ze_find_gpu_device() failed!\n"); + return; + } + + ret = utils_ze_create_context(hDriver_, &hContext_); + if (ret != 0) { + fprintf(stderr, "utils_ze_create_context() failed!\n"); + return; + } +} + +using level_zero_params_unique_handle_t = + std::unique_ptr; + +level_zero_params_unique_handle_t +create_level_zero_prov_params(ze_context_handle_t context, + ze_device_handle_t device, + umf_usm_memory_type_t memory_type) { + umf_level_zero_memory_provider_params_handle_t params = nullptr; + + umf_result_t res = umfLevelZeroMemoryProviderParamsCreate(¶ms); + if (res != UMF_RESULT_SUCCESS) { + return level_zero_params_unique_handle_t( + nullptr, &umfLevelZeroMemoryProviderParamsDestroy); + } + + res = umfLevelZeroMemoryProviderParamsSetContext(params, context); + if (res != UMF_RESULT_SUCCESS) { + umfLevelZeroMemoryProviderParamsDestroy(params); + return level_zero_params_unique_handle_t( + nullptr, &umfLevelZeroMemoryProviderParamsDestroy); + ; + } + + res = umfLevelZeroMemoryProviderParamsSetDevice(params, device); + if (res != UMF_RESULT_SUCCESS) { + umfLevelZeroMemoryProviderParamsDestroy(params); + return level_zero_params_unique_handle_t( + nullptr, &umfLevelZeroMemoryProviderParamsDestroy); + ; + } + + res = umfLevelZeroMemoryProviderParamsSetMemoryType(params, memory_type); + if (res != UMF_RESULT_SUCCESS) { + umfLevelZeroMemoryProviderParamsDestroy(params); + return level_zero_params_unique_handle_t( + nullptr, &umfLevelZeroMemoryProviderParamsDestroy); + ; + } + + return level_zero_params_unique_handle_t( + params, &umfLevelZeroMemoryProviderParamsDestroy); +} + +struct LevelZeroProviderInit + : public test, + public ::testing::WithParamInterface {}; + +INSTANTIATE_TEST_SUITE_P(, LevelZeroProviderInit, + ::testing::Values(UMF_MEMORY_TYPE_HOST, + UMF_MEMORY_TYPE_DEVICE, + UMF_MEMORY_TYPE_SHARED)); + +LevelZeroTestHelper l0TestHelper; + +TEST_P(LevelZeroProviderInit, FailNullContext) { + umf_memory_provider_ops_t *ops = umfLevelZeroMemoryProviderOps(); + ASSERT_NE(ops, nullptr); + + auto memory_type = GetParam(); + + umf_level_zero_memory_provider_params_handle_t hParams = nullptr; + umf_result_t result = umfLevelZeroMemoryProviderParamsCreate(&hParams); + ASSERT_EQ(result, UMF_RESULT_SUCCESS); + result = + umfLevelZeroMemoryProviderParamsSetMemoryType(hParams, memory_type); + ASSERT_EQ(result, UMF_RESULT_SUCCESS); + result = umfLevelZeroMemoryProviderParamsSetDevice( + hParams, l0TestHelper.get_test_device()); + ASSERT_EQ(result, UMF_RESULT_SUCCESS); + + result = umfLevelZeroMemoryProviderParamsSetContext(hParams, nullptr); + ASSERT_EQ(result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_memory_provider_handle_t provider = nullptr; + result = umfMemoryProviderCreate(ops, hParams, &provider); + ASSERT_EQ(result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umfLevelZeroMemoryProviderParamsDestroy(hParams); +} + +TEST_P(LevelZeroProviderInit, FailNullDevice) { + if (GetParam() == UMF_MEMORY_TYPE_HOST) { + GTEST_SKIP() << "Host memory does not require device handle"; + } + + umf_memory_provider_ops_t *ops = umfLevelZeroMemoryProviderOps(); + ASSERT_NE(ops, nullptr); + + auto memory_type = GetParam(); + umf_level_zero_memory_provider_params_handle_t hParams = nullptr; + umf_result_t result = umfLevelZeroMemoryProviderParamsCreate(&hParams); + ASSERT_EQ(result, UMF_RESULT_SUCCESS); + result = + umfLevelZeroMemoryProviderParamsSetMemoryType(hParams, memory_type); + ASSERT_EQ(result, UMF_RESULT_SUCCESS); + result = umfLevelZeroMemoryProviderParamsSetContext( + hParams, l0TestHelper.get_test_context()); + ASSERT_EQ(result, UMF_RESULT_SUCCESS); + + umf_memory_provider_handle_t provider = nullptr; + result = umfMemoryProviderCreate(ops, hParams, &provider); + ASSERT_EQ(result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umfLevelZeroMemoryProviderParamsDestroy(hParams); +} + +TEST_F(test, FailNonNullDevice) { + umf_memory_provider_ops_t *ops = umfLevelZeroMemoryProviderOps(); + ASSERT_NE(ops, nullptr); + + auto memory_type = UMF_MEMORY_TYPE_HOST; + + umf_level_zero_memory_provider_params_handle_t hParams = nullptr; + umf_result_t result = umfLevelZeroMemoryProviderParamsCreate(&hParams); + ASSERT_EQ(result, UMF_RESULT_SUCCESS); + result = + umfLevelZeroMemoryProviderParamsSetMemoryType(hParams, memory_type); + ASSERT_EQ(result, UMF_RESULT_SUCCESS); + result = umfLevelZeroMemoryProviderParamsSetContext( + hParams, l0TestHelper.get_test_context()); + ASSERT_EQ(result, UMF_RESULT_SUCCESS); + result = umfLevelZeroMemoryProviderParamsSetDevice( + hParams, l0TestHelper.get_test_device()); + ASSERT_EQ(result, UMF_RESULT_SUCCESS); + + umf_memory_provider_handle_t provider = nullptr; + result = umfMemoryProviderCreate(ops, hParams, &provider); + ASSERT_EQ(result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umfLevelZeroMemoryProviderParamsDestroy(hParams); +} + +TEST_F(test, FailMismatchedResidentHandlesCount) { + umf_memory_provider_ops_t *ops = umfLevelZeroMemoryProviderOps(); + ASSERT_NE(ops, nullptr); + + umf_level_zero_memory_provider_params_handle_t hParams = nullptr; + umf_result_t result = umfLevelZeroMemoryProviderParamsCreate(&hParams); + ASSERT_EQ(result, UMF_RESULT_SUCCESS); + + result = umfLevelZeroMemoryProviderParamsSetResidentDevices(hParams, + nullptr, 99); + ASSERT_EQ(result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umfLevelZeroMemoryProviderParamsDestroy(hParams); +} + class LevelZeroMemoryAccessor : public MemoryAccessor { public: LevelZeroMemoryAccessor(ze_context_handle_t hContext, @@ -28,8 +218,8 @@ class LevelZeroMemoryAccessor : public MemoryAccessor { size_t pattern_size) { ASSERT_NE(ptr, nullptr); - int ret = level_zero_fill(hContext_, hDevice_, ptr, size, pattern, - pattern_size); + int ret = utils_ze_level_zero_fill(hContext_, hDevice_, ptr, size, + pattern, pattern_size); ASSERT_EQ(ret, 0); } @@ -37,7 +227,8 @@ class LevelZeroMemoryAccessor : public MemoryAccessor { ASSERT_NE(dst_ptr, nullptr); ASSERT_NE(src_ptr, nullptr); - int ret = level_zero_copy(hContext_, hDevice_, dst_ptr, src_ptr, size); + int ret = utils_ze_level_zero_copy(hContext_, hDevice_, dst_ptr, + src_ptr, size); ASSERT_EQ(ret, 0); } @@ -47,7 +238,8 @@ class LevelZeroMemoryAccessor : public MemoryAccessor { }; using LevelZeroProviderTestParams = - std::tuple; + std::tuple; struct umfLevelZeroProviderTest : umf_test::test, @@ -56,15 +248,14 @@ struct umfLevelZeroProviderTest void SetUp() override { test::SetUp(); - auto [l0_params, accessor] = this->GetParam(); + auto [l0_params, ze_context, memory_type, accessor] = this->GetParam(); params = l0_params; - hDevice = (ze_device_handle_t)params.level_zero_device_handle; - hContext = (ze_context_handle_t)params.level_zero_context_handle; + memAccessor = accessor; + hContext = ze_context; - ASSERT_NE(hDevice, nullptr); ASSERT_NE(hContext, nullptr); - switch (params.memory_type) { + switch (memory_type) { case UMF_MEMORY_TYPE_DEVICE: zeMemoryTypeExpected = ZE_MEMORY_TYPE_DEVICE; break; @@ -80,21 +271,14 @@ struct umfLevelZeroProviderTest } ASSERT_NE(zeMemoryTypeExpected, ZE_MEMORY_TYPE_UNKNOWN); - - memAccessor = accessor; } - void TearDown() override { - int ret = destroy_context(hContext); - ASSERT_EQ(ret, 0); - test::TearDown(); - } + void TearDown() override { test::TearDown(); } - level_zero_memory_provider_params_t params; - ze_device_handle_t hDevice = nullptr; + umf_level_zero_memory_provider_params_handle_t params; + MemoryAccessor *memAccessor = nullptr; ze_context_handle_t hContext = nullptr; ze_memory_type_t zeMemoryTypeExpected = ZE_MEMORY_TYPE_UNKNOWN; - MemoryAccessor *memAccessor = nullptr; }; GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfLevelZeroProviderTest); @@ -106,7 +290,7 @@ TEST_P(umfLevelZeroProviderTest, basic) { // create Level Zero provider umf_memory_provider_handle_t provider = nullptr; umf_result_t umf_result = umfMemoryProviderCreate( - umfLevelZeroMemoryProviderOps(), ¶ms, &provider); + umfLevelZeroMemoryProviderOps(), params, &provider); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(provider, nullptr); @@ -118,7 +302,7 @@ TEST_P(umfLevelZeroProviderTest, basic) { // use the allocated memory - fill it with a 0xAB pattern memAccessor->fill(ptr, size, &pattern, sizeof(pattern)); - ze_memory_type_t zeMemoryTypeActual = get_mem_type(hContext, ptr); + ze_memory_type_t zeMemoryTypeActual = utils_ze_get_mem_type(hContext, ptr); ASSERT_EQ(zeMemoryTypeActual, zeMemoryTypeExpected); // check if the pattern was successfully applied @@ -135,38 +319,144 @@ TEST_P(umfLevelZeroProviderTest, basic) { umfMemoryProviderDestroy(provider); } -// TODO add Level Zero Memory Provider specyfic tests -// TODO add negative test and check for Level Zero native errors -// TODO add tests that mixes Level Zero Memory Provider and Disjoint Pool +TEST_P(umfLevelZeroProviderTest, getPageSize) { + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = umfMemoryProviderCreate( + umfLevelZeroMemoryProviderOps(), params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + size_t recommendedPageSize = 0; + umf_result = umfMemoryProviderGetRecommendedPageSize(provider, 0, + &recommendedPageSize); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(recommendedPageSize, 0); + + size_t minPageSize = 0; + umf_result = + umfMemoryProviderGetMinPageSize(provider, nullptr, &minPageSize); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(minPageSize, 0); + + ASSERT_GE(recommendedPageSize, minPageSize); + + umfMemoryProviderDestroy(provider); +} + +TEST_P(umfLevelZeroProviderTest, getName) { + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = umfMemoryProviderCreate( + umfLevelZeroMemoryProviderOps(), params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + const char *name = umfMemoryProviderGetName(provider); + ASSERT_STREQ(name, "LEVEL_ZERO"); + + umfMemoryProviderDestroy(provider); +} + +TEST_P(umfLevelZeroProviderTest, allocInvalidSize) { + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = umfMemoryProviderCreate( + umfLevelZeroMemoryProviderOps(), params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + void *ptr = nullptr; + umf_result = umfMemoryProviderAlloc( + provider, std::numeric_limits::max(), 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC); + const char *message; + int32_t error; + umfMemoryProviderGetLastNativeError(provider, &message, &error); + ASSERT_EQ(error, ZE_RESULT_ERROR_UNSUPPORTED_SIZE); + + umfMemoryProviderDestroy(provider); +} + +TEST_P(umfLevelZeroProviderTest, providerCreateInvalidArgs) { + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = umfMemoryProviderCreate( + umfLevelZeroMemoryProviderOps(), nullptr, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = umfMemoryProviderCreate(nullptr, params, nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(umfLevelZeroProviderTest, getPageSizeInvalidArgs) { + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = umfMemoryProviderCreate( + umfLevelZeroMemoryProviderOps(), params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + umf_result = umfMemoryProviderGetMinPageSize(provider, nullptr, nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = umfMemoryProviderGetRecommendedPageSize(provider, 0, nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umfMemoryProviderDestroy(provider); +} -level_zero_memory_provider_params_t l0Params_device_memory = - create_level_zero_prov_params(UMF_MEMORY_TYPE_DEVICE); -level_zero_memory_provider_params_t l0Params_shared_memory = - create_level_zero_prov_params(UMF_MEMORY_TYPE_SHARED); -level_zero_memory_provider_params_t l0Params_host_memory = - create_level_zero_prov_params(UMF_MEMORY_TYPE_HOST); +TEST_P(umfLevelZeroProviderTest, levelZeroProviderNullParams) { + umf_result_t res = umfLevelZeroMemoryProviderParamsCreate(nullptr); + EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); -LevelZeroMemoryAccessor l0Accessor( - (ze_context_handle_t)l0Params_device_memory.level_zero_context_handle, - (ze_device_handle_t)l0Params_device_memory.level_zero_device_handle); + res = umfLevelZeroMemoryProviderParamsSetContext(nullptr, hContext); + EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfLevelZeroMemoryProviderParamsSetDevice(nullptr, nullptr); + EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfLevelZeroMemoryProviderParamsSetMemoryType(nullptr, + UMF_MEMORY_TYPE_DEVICE); + EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +// TODO add tests that mixes Level Zero Memory Provider and Disjoint Pool + +level_zero_params_unique_handle_t l0Params_device_memory = + create_level_zero_prov_params(l0TestHelper.get_test_context(), + l0TestHelper.get_test_device(), + UMF_MEMORY_TYPE_DEVICE); +level_zero_params_unique_handle_t l0Params_shared_memory = + create_level_zero_prov_params(l0TestHelper.get_test_context(), + l0TestHelper.get_test_device(), + UMF_MEMORY_TYPE_SHARED); +level_zero_params_unique_handle_t l0Params_host_memory = + create_level_zero_prov_params(l0TestHelper.get_test_context(), nullptr, + UMF_MEMORY_TYPE_HOST); + +LevelZeroMemoryAccessor + l0Accessor((ze_context_handle_t)l0TestHelper.get_test_context(), + (ze_device_handle_t)l0TestHelper.get_test_device()); HostMemoryAccessor hostAccessor; INSTANTIATE_TEST_SUITE_P( umfLevelZeroProviderTestSuite, umfLevelZeroProviderTest, ::testing::Values( - LevelZeroProviderTestParams{l0Params_device_memory, &l0Accessor}, - LevelZeroProviderTestParams{l0Params_shared_memory, &hostAccessor}, - LevelZeroProviderTestParams{l0Params_host_memory, &hostAccessor})); + LevelZeroProviderTestParams{l0Params_device_memory.get(), + l0TestHelper.get_test_context(), + UMF_MEMORY_TYPE_DEVICE, &l0Accessor}, + LevelZeroProviderTestParams{l0Params_shared_memory.get(), + l0TestHelper.get_test_context(), + UMF_MEMORY_TYPE_SHARED, &hostAccessor}, + LevelZeroProviderTestParams{l0Params_host_memory.get(), + l0TestHelper.get_test_context(), + UMF_MEMORY_TYPE_HOST, &hostAccessor})); // TODO: it looks like there is some problem with IPC implementation in Level // Zero on windows. Issue: #494 #ifdef _WIN32 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); #else -INSTANTIATE_TEST_SUITE_P(umfLevelZeroProviderTestSuite, umfIpcTest, - ::testing::Values(ipcTestParams{ - umfProxyPoolOps(), nullptr, - umfLevelZeroMemoryProviderOps(), - &l0Params_device_memory, &l0Accessor})); +INSTANTIATE_TEST_SUITE_P( + umfLevelZeroProviderTestSuite, umfIpcTest, + ::testing::Values(ipcTestParams{ + umfProxyPoolOps(), nullptr, umfLevelZeroMemoryProviderOps(), + l0Params_device_memory.get(), &l0Accessor, false})); #endif diff --git a/test/providers/provider_level_zero_not_impl.cpp b/test/providers/provider_level_zero_not_impl.cpp new file mode 100644 index 000000000..bea1acbe7 --- /dev/null +++ b/test/providers/provider_level_zero_not_impl.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "base.hpp" + +#include + +using umf_test::test; + +TEST_F(test, level_zero_provider_not_implemented) { + umf_level_zero_memory_provider_params_handle_t hParams = nullptr; + umf_result_t result = umfLevelZeroMemoryProviderParamsCreate(&hParams); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + result = umfLevelZeroMemoryProviderParamsDestroy(hParams); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + result = umfLevelZeroMemoryProviderParamsSetContext(hParams, nullptr); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + result = umfLevelZeroMemoryProviderParamsSetDevice(hParams, nullptr); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + result = umfLevelZeroMemoryProviderParamsSetMemoryType( + hParams, UMF_MEMORY_TYPE_DEVICE); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + ze_device_handle_t hDevices[1]; + result = umfLevelZeroMemoryProviderParamsSetResidentDevices(hParams, + hDevices, 1); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + umf_memory_provider_ops_t *ops = umfLevelZeroMemoryProviderOps(); + ASSERT_EQ(ops, nullptr); +} diff --git a/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp b/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp new file mode 100644 index 000000000..fd071432b --- /dev/null +++ b/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp @@ -0,0 +1,34 @@ +{ + False-positive ConflictingAccess in libjemalloc.so + drd:ConflictingAccess + obj:*/libjemalloc.so* + ... + fun:mallocx + ... +} + +{ + False-positive ConflictingAccess in libjemalloc.so + drd:ConflictingAccess + obj:*/libjemalloc.so* + ... + fun:op_free + ... +} + +{ + False-positive ConflictingAccess in libjemalloc.so + drd:ConflictingAccess + obj:*/libjemalloc.so* + ... + fun:__nptl_deallocate_tsd + ... +} + +{ + False-positive ConflictingAccess in critnib_insert + drd:ConflictingAccess + fun:store + fun:critnib_insert + ... +} diff --git a/test/supp/drd-umf_test-jemalloc_coarse_file.supp b/test/supp/drd-umf_test-jemalloc_coarse_file.supp new file mode 100644 index 000000000..fd071432b --- /dev/null +++ b/test/supp/drd-umf_test-jemalloc_coarse_file.supp @@ -0,0 +1,34 @@ +{ + False-positive ConflictingAccess in libjemalloc.so + drd:ConflictingAccess + obj:*/libjemalloc.so* + ... + fun:mallocx + ... +} + +{ + False-positive ConflictingAccess in libjemalloc.so + drd:ConflictingAccess + obj:*/libjemalloc.so* + ... + fun:op_free + ... +} + +{ + False-positive ConflictingAccess in libjemalloc.so + drd:ConflictingAccess + obj:*/libjemalloc.so* + ... + fun:__nptl_deallocate_tsd + ... +} + +{ + False-positive ConflictingAccess in critnib_insert + drd:ConflictingAccess + fun:store + fun:critnib_insert + ... +} diff --git a/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp b/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp new file mode 100644 index 000000000..cd44bb49a --- /dev/null +++ b/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp @@ -0,0 +1,8 @@ +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} diff --git a/test/supp/drd-umf_test-provider_file_memory_ipc.supp b/test/supp/drd-umf_test-provider_file_memory_ipc.supp new file mode 100644 index 000000000..7fce24116 --- /dev/null +++ b/test/supp/drd-umf_test-provider_file_memory_ipc.supp @@ -0,0 +1,16 @@ +{ + Conditional variable destruction false-positive + drd:CondErr + ... + fun:pthread_cond_destroy@* + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} diff --git a/test/supp/drd-umf_test-provider_os_memory.supp b/test/supp/drd-umf_test-provider_os_memory.supp new file mode 100644 index 000000000..cd44bb49a --- /dev/null +++ b/test/supp/drd-umf_test-provider_os_memory.supp @@ -0,0 +1,8 @@ +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} diff --git a/test/supp/drd-umf_test-scalable_coarse_devdax.supp b/test/supp/drd-umf_test-scalable_coarse_devdax.supp new file mode 100644 index 000000000..65640f6c3 --- /dev/null +++ b/test/supp/drd-umf_test-scalable_coarse_devdax.supp @@ -0,0 +1,49 @@ +{ + False-positive ConflictingAccess in libtbbmalloc.so + drd:ConflictingAccess + obj:*/libtbbmalloc.so* +} + +{ + False-positive ConflictingAccess in libtbbmalloc.so + drd:ConflictingAccess + obj:*/libtbbmalloc.so* + ... + fun:tbb_malloc + ... +} + +{ + False-positive ConflictingAccess in libtbbmalloc.so + drd:ConflictingAccess + obj:*/libtbbmalloc.so* + ... + fun:tbb_aligned_malloc + ... +} + +{ + False-positive ConflictingAccess in libtbbmalloc.so + drd:ConflictingAccess + obj:*/libtbbmalloc.so* + ... + fun:tbb_free + ... +} + +{ + False-positive ConflictingAccess in libtbbmalloc.so + drd:ConflictingAccess + obj:*/libtbbmalloc.so* + ... + fun:__nptl_deallocate_tsd + ... +} + +{ + False-positive ConflictingAccess in _Z22pow2AlignedAllocHelperP17umf_memory_pool_t + drd:ConflictingAccess + fun:memset + fun:_Z22pow2AlignedAllocHelperP17umf_memory_pool_t + ... +} diff --git a/test/supp/drd-umf_test-scalable_coarse_file.supp b/test/supp/drd-umf_test-scalable_coarse_file.supp new file mode 100644 index 000000000..65640f6c3 --- /dev/null +++ b/test/supp/drd-umf_test-scalable_coarse_file.supp @@ -0,0 +1,49 @@ +{ + False-positive ConflictingAccess in libtbbmalloc.so + drd:ConflictingAccess + obj:*/libtbbmalloc.so* +} + +{ + False-positive ConflictingAccess in libtbbmalloc.so + drd:ConflictingAccess + obj:*/libtbbmalloc.so* + ... + fun:tbb_malloc + ... +} + +{ + False-positive ConflictingAccess in libtbbmalloc.so + drd:ConflictingAccess + obj:*/libtbbmalloc.so* + ... + fun:tbb_aligned_malloc + ... +} + +{ + False-positive ConflictingAccess in libtbbmalloc.so + drd:ConflictingAccess + obj:*/libtbbmalloc.so* + ... + fun:tbb_free + ... +} + +{ + False-positive ConflictingAccess in libtbbmalloc.so + drd:ConflictingAccess + obj:*/libtbbmalloc.so* + ... + fun:__nptl_deallocate_tsd + ... +} + +{ + False-positive ConflictingAccess in _Z22pow2AlignedAllocHelperP17umf_memory_pool_t + drd:ConflictingAccess + fun:memset + fun:_Z22pow2AlignedAllocHelperP17umf_memory_pool_t + ... +} diff --git a/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp b/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp new file mode 100644 index 000000000..2f669eb31 --- /dev/null +++ b/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp @@ -0,0 +1,24 @@ +{ + Incompatibility with helgrind's implementation (pthread_mutex_lock with a pthread_rwlock_t* argument) + Helgrind:Misc + obj:*vgpreload_helgrind-amd64-linux.so + fun:_ZL20__gthread_mutex_lockP15pthread_mutex_t + ... +} + +{ + Incompatibility with helgrind's implementation (pthread_mutex_unlock with a pthread_rwlock_t* argument) + Helgrind:Misc + obj:*vgpreload_helgrind-amd64-linux.so + fun:_ZL22__gthread_mutex_unlockP15pthread_mutex_t + ... +} + +{ + Incompatibility with helgrind's implementation (lock order "0xA before 0xB" violated) + Helgrind:LockOrder + obj:*vgpreload_helgrind-amd64-linux.so + fun:_ZStL23__glibcxx_rwlock_wrlockP16pthread_rwlock_t + fun:_ZNSt22__shared_mutex_pthread4lockEv + ... +} diff --git a/test/supp/helgrind-umf_test-disjointPool.supp b/test/supp/helgrind-umf_test-disjointPool.supp index 917237d7e..3ada32736 100644 --- a/test/supp/helgrind-umf_test-disjointPool.supp +++ b/test/supp/helgrind-umf_test-disjointPool.supp @@ -27,3 +27,27 @@ fun:*gthread_mutex_unlock*pthread_mutex_t ... } + +{ + Incompatibility with helgrind's implementation ("pthread_rwlock_{rd,rw}lock with a pthread_mutex_t* argument") + Helgrind:Misc + obj:*vgpreload_helgrind-amd64-linux.so + fun:*glibcxx_rwlock_wrlock*pthread_rwlock_t + ... +} + +{ + Incompatibility with helgrind's implementation ("pthread_rwlock_unlock with a pthread_mutex_t* argument") + Helgrind:Misc + obj:*vgpreload_helgrind-amd64-linux.so + fun:*glibcxx_rwlock_unlock*pthread_rwlock_t + ... +} + +{ + Incompatibility with helgrind's implementation ("pthread_rwlock_{rd,rw}lock with a pthread_mutex_t* argument") + Helgrind:Misc + obj:*vgpreload_helgrind-amd64-linux.so + fun:*glibcxx_rwlock_rdlock*pthread_rwlock_t* + ... +} diff --git a/test/supp/helgrind-umf_test-ipc.supp b/test/supp/helgrind-umf_test-ipc.supp new file mode 100644 index 000000000..e46140c19 --- /dev/null +++ b/test/supp/helgrind-umf_test-ipc.supp @@ -0,0 +1,16 @@ +{ + False-positive race in critnib_insert (lack of instrumentation) + Helgrind:Race + fun:store + fun:critnib_insert + ... +} + +{ + False-positive race in critnib_find (lack of instrumentation) + Helgrind:Race + fun:find_predecessor + fun:find_le + fun:critnib_find + ... +} diff --git a/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp b/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp new file mode 100644 index 000000000..18774f387 --- /dev/null +++ b/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp @@ -0,0 +1,34 @@ +{ + False-positive Race in libjemalloc.so + Helgrind:Race + obj:*/libjemalloc.so* + ... + fun:mallocx + ... +} + +{ + False-positive Race in libjemalloc.so + Helgrind:Race + obj:*/libjemalloc.so* + ... + fun:op_free + ... +} + +{ + False-positive Race in libjemalloc.so + Helgrind:Race + obj:*/libjemalloc.so* + ... + fun:__nptl_deallocate_tsd + ... +} + +{ + False-positive Race in critnib_insert + Helgrind:Race + fun:store + fun:critnib_insert + ... +} diff --git a/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp b/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp new file mode 100644 index 000000000..18774f387 --- /dev/null +++ b/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp @@ -0,0 +1,34 @@ +{ + False-positive Race in libjemalloc.so + Helgrind:Race + obj:*/libjemalloc.so* + ... + fun:mallocx + ... +} + +{ + False-positive Race in libjemalloc.so + Helgrind:Race + obj:*/libjemalloc.so* + ... + fun:op_free + ... +} + +{ + False-positive Race in libjemalloc.so + Helgrind:Race + obj:*/libjemalloc.so* + ... + fun:__nptl_deallocate_tsd + ... +} + +{ + False-positive Race in critnib_insert + Helgrind:Race + fun:store + fun:critnib_insert + ... +} diff --git a/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp b/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp new file mode 100644 index 000000000..4fcd2786c --- /dev/null +++ b/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp @@ -0,0 +1,8 @@ +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} diff --git a/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp b/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp new file mode 100644 index 000000000..4194f4847 --- /dev/null +++ b/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp @@ -0,0 +1,25 @@ +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + False-positive race in critnib_insert (lack of instrumentation) + Helgrind:Race + fun:store + fun:critnib_insert + ... +} + +{ + False-positive race in critnib_find (lack of instrumentation) + Helgrind:Race + fun:find_predecessor + fun:find_le + fun:critnib_find + ... +} diff --git a/test/supp/helgrind-umf_test-provider_os_memory.supp b/test/supp/helgrind-umf_test-provider_os_memory.supp new file mode 100644 index 000000000..4fcd2786c --- /dev/null +++ b/test/supp/helgrind-umf_test-provider_os_memory.supp @@ -0,0 +1,8 @@ +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} diff --git a/test/supp/helgrind-umf_test-scalable_coarse_devdax.supp b/test/supp/helgrind-umf_test-scalable_coarse_devdax.supp new file mode 100644 index 000000000..650edf514 --- /dev/null +++ b/test/supp/helgrind-umf_test-scalable_coarse_devdax.supp @@ -0,0 +1,49 @@ +{ + False-positive Race in libtbbmalloc.so + Helgrind:Race + obj:*/libtbbmalloc.so* +} + +{ + False-positive Race in libtbbmalloc.so + Helgrind:Race + obj:*/libtbbmalloc.so* + ... + fun:tbb_malloc + ... +} + +{ + False-positive Race in libtbbmalloc.so + Helgrind:Race + obj:*/libtbbmalloc.so* + ... + fun:tbb_aligned_malloc + ... +} + +{ + False-positive Race in libtbbmalloc.so + Helgrind:Race + obj:*/libtbbmalloc.so* + ... + fun:tbb_free + ... +} + +{ + False-positive Race in libtbbmalloc.so + Helgrind:Race + obj:*/libtbbmalloc.so* + ... + fun:__nptl_deallocate_tsd + ... +} + +{ + False-positive Race in _Z22pow2AlignedAllocHelperP17umf_memory_pool_t + Helgrind:Race + fun:memset + fun:_Z22pow2AlignedAllocHelperP17umf_memory_pool_t + ... +} diff --git a/test/supp/helgrind-umf_test-scalable_coarse_file.supp b/test/supp/helgrind-umf_test-scalable_coarse_file.supp new file mode 100644 index 000000000..650edf514 --- /dev/null +++ b/test/supp/helgrind-umf_test-scalable_coarse_file.supp @@ -0,0 +1,49 @@ +{ + False-positive Race in libtbbmalloc.so + Helgrind:Race + obj:*/libtbbmalloc.so* +} + +{ + False-positive Race in libtbbmalloc.so + Helgrind:Race + obj:*/libtbbmalloc.so* + ... + fun:tbb_malloc + ... +} + +{ + False-positive Race in libtbbmalloc.so + Helgrind:Race + obj:*/libtbbmalloc.so* + ... + fun:tbb_aligned_malloc + ... +} + +{ + False-positive Race in libtbbmalloc.so + Helgrind:Race + obj:*/libtbbmalloc.so* + ... + fun:tbb_free + ... +} + +{ + False-positive Race in libtbbmalloc.so + Helgrind:Race + obj:*/libtbbmalloc.so* + ... + fun:__nptl_deallocate_tsd + ... +} + +{ + False-positive Race in _Z22pow2AlignedAllocHelperP17umf_memory_pool_t + Helgrind:Race + fun:memset + fun:_Z22pow2AlignedAllocHelperP17umf_memory_pool_t + ... +} diff --git a/test/supp/memcheck-umf_test-scalable_pool.supp b/test/supp/memcheck-umf_test-scalable_pool.supp new file mode 100644 index 000000000..114dfb236 --- /dev/null +++ b/test/supp/memcheck-umf_test-scalable_pool.supp @@ -0,0 +1,18 @@ +{ + Conditional jump or move depends on uninitialised value(s) - internal issue of libtbbmalloc.so + Memcheck:Cond + fun:_ZN3rml9pool_freeEPNS_10MemoryPoolEPv + fun:tbb_free + fun:umfPoolFree + ... +} + +{ + Conditional jump or move depends on uninitialised value(s) - internal issue of libtbbmalloc.so + Memcheck:Cond + obj:*libtbbmalloc.so* + fun:_ZN3rml9pool_freeEPNS_10MemoryPoolEPv + fun:tbb_free + fun:umfPoolFree + ... +} diff --git a/test/test_base_alloc.cpp b/test/test_base_alloc.cpp index 497a22a69..80bc67541 100644 --- a/test/test_base_alloc.cpp +++ b/test/test_base_alloc.cpp @@ -37,7 +37,7 @@ TEST_F(test, baseAllocMultiThreadedAllocMemset) { for (int i = 0; i < ITERATIONS; i++) { for (int k = 0; k < ALLOCATION_SIZE; k++) { - UT_ASSERTeq(*(ptrs[i].get() + k), ((i + TID) & 0xFF)); + ASSERT_EQ(*(ptrs[i].get() + k), ((i + TID) & 0xFF)); } } }; diff --git a/test/test_base_alloc_linear.cpp b/test/test_base_alloc_linear.cpp index a361244fc..3f8371d8d 100644 --- a/test/test_base_alloc_linear.cpp +++ b/test/test_base_alloc_linear.cpp @@ -24,7 +24,7 @@ TEST_F(test, baseAllocLinearAllocMoreThanPoolSize) { size_t new_size = 20 * 1024 * 1024; // = 20 MB void *ptr = umf_ba_linear_alloc(pool.get(), new_size); - UT_ASSERTne(ptr, NULL); + ASSERT_NE(ptr, nullptr); memset(ptr, 0, new_size); umf_ba_linear_free(pool.get(), ptr); @@ -37,15 +37,14 @@ TEST_F(test, baseAllocLinearPoolContainsPointer) { size_t size = 16; void *ptr = umf_ba_linear_alloc(pool.get(), size); - UT_ASSERTne(ptr, NULL); + ASSERT_NE(ptr, nullptr); memset(ptr, 0, size); - // assert pool contains pointer ptr - UT_ASSERTne(umf_ba_linear_pool_contains_pointer(pool.get(), ptr), 0); + ASSERT_NE(umf_ba_linear_pool_contains_pointer(pool.get(), ptr), 0); // assert pool does NOT contain pointer 0x0123 - UT_ASSERTeq(umf_ba_linear_pool_contains_pointer(pool.get(), (void *)0x0123), - 0); + ASSERT_EQ(umf_ba_linear_pool_contains_pointer(pool.get(), (void *)0x0123), + 0); umf_ba_linear_free(pool.get(), ptr); } @@ -61,7 +60,7 @@ TEST_F(test, baseAllocLinearMultiThreadedAllocMemset) { // but not big enough to hold all allocations, // so that there were more pools allocated. // This is needed to test freeing the first pool. - size_t pool_size = 2 * util_get_page_size(); + size_t pool_size = 2 * utils_get_page_size(); auto pool = std::shared_ptr( umf_ba_linear_create(pool_size), umf_ba_linear_destroy); @@ -78,14 +77,14 @@ TEST_F(test, baseAllocLinearMultiThreadedAllocMemset) { (rand() / (double)RAND_MAX)); buffer[i].size = size; buffer[i].ptr = (unsigned char *)umf_ba_linear_alloc(pool, size); - UT_ASSERTne(buffer[i].ptr, NULL); + ASSERT_NE(buffer[i].ptr, nullptr); memset(buffer[i].ptr, (i + TID) & 0xFF, buffer[i].size); } for (int i = 0; i < ITERATIONS; i++) { - UT_ASSERTne(buffer[i].ptr, NULL); + ASSERT_NE(buffer[i].ptr, nullptr); for (size_t k = 0; k < buffer[i].size; k++) { - UT_ASSERTeq(*(buffer[i].ptr + k), (i + TID) & 0xFF); + ASSERT_EQ(*(buffer[i].ptr + k), (i + TID) & 0xFF); } } diff --git a/test/test_examples.sh b/test/test_examples.sh index 9331b1d06..efc86bcf9 100755 --- a/test/test_examples.sh +++ b/test/test_examples.sh @@ -5,40 +5,44 @@ set -e -WORKSPACE=$1 +SOURCE_DIR=$1 BUILD_DIR=$2 INSTALL_DIR=$3 +CMAKE_INSTALL_PREFIX=$4 +STANDALONE_CMAKE_OPTIONS=$5 echo "Running: $0 $*" function print_usage() { echo "$(basename $0) - test all examples standalone" - echo "Usage: $(basename $0) " + echo "Usage: $(basename $0) " } -if [ "$3" == "" ]; then +if [ "$6" = "" ]; then print_usage echo -e "Error: too few arguments\n" exit 1 fi -if [ "$4" == "" ]; then - print_usage - echo "No examples to run!" - exit 0 -fi - -if [ ! -f $WORKSPACE/README.md ]; then - echo -e "error: incorrect : $WORKSPACE\n" +if [ ! -f $SOURCE_DIR/README.md ]; then + echo -e "error: incorrect : $SOURCE_DIR\n" print_usage exit 1 fi -WORKSPACE=$(realpath $WORKSPACE) +mkdir -p ${INSTALL_DIR}/${CMAKE_INSTALL_PREFIX} + +SOURCE_DIR=$(realpath $SOURCE_DIR) BUILD_DIR=$(realpath $BUILD_DIR) INSTALL_DIR=$(realpath $INSTALL_DIR) -shift 3 +echo "SOURCE_DIR=$SOURCE_DIR" +echo "BUILD_DIR=$BUILD_DIR" +echo "CMAKE_INSTALL_PREFIX=$CMAKE_INSTALL_PREFIX" +echo "INSTALL_DIR=$INSTALL_DIR" +echo "STANDALONE_CMAKE_OPTIONS=$STANDALONE_CMAKE_OPTIONS" + +shift 5 EXAMPLES="$*" echo "Examples to run: $EXAMPLES" echo @@ -46,12 +50,13 @@ echo cd ${BUILD_DIR} echo "DIR=$(pwd)" +echo "Installing UMF into the directory: ${INSTALL_DIR}/${CMAKE_INSTALL_PREFIX}" set -x -make -j$(nproc) install +make DESTDIR=$INSTALL_DIR -j$(nproc) install set +x for ex in $EXAMPLES; do - SRC_DIR="${WORKSPACE}/examples/$ex" + SRC_DIR="${SOURCE_DIR}/examples/$ex" BLD_DIR="${BUILD_DIR}/examples-standalone/$ex" if [ ! -d $SRC_DIR ]; then @@ -67,7 +72,7 @@ for ex in $EXAMPLES; do rm -rf $BLD_DIR mkdir -p $BLD_DIR cd $BLD_DIR - CMAKE_PREFIX_PATH="$INSTALL_DIR" cmake $SRC_DIR + CMAKE_PREFIX_PATH="${INSTALL_DIR}/${CMAKE_INSTALL_PREFIX}" cmake $SRC_DIR $STANDALONE_CMAKE_OPTIONS make -j$(nproc) ctest --output-on-failure set +x diff --git a/test/test_proxy_lib.cpp b/test/test_proxy_lib.cpp index 4cdb6568b..85afc65be 100644 --- a/test/test_proxy_lib.cpp +++ b/test/test_proxy_lib.cpp @@ -15,15 +15,47 @@ #include "base.hpp" #include "test_helpers.h" +#include "utils_common.h" using umf_test::test; -TEST_F(test, proxyLibBasic) { +#define SIZE_64 64 +#define ALIGN_1024 1024 - ::free(::malloc(64)); +TEST_F(test, proxyLib_basic) { + + ::free(::malloc(SIZE_64)); // a check to verify we are running the proxy library void *ptr = (void *)0x01; + +#ifdef _WIN32 + size_t size = _msize(ptr); +#elif __APPLE__ + size_t size = ::malloc_size(ptr); +#else + size_t size = ::malloc_usable_size(ptr); +#endif + + ASSERT_EQ(size, 0xDEADBEEF); +} + +TEST_F(test, proxyLib_realloc_size0) { + // realloc(ptr, 0) == free (ptr) + // realloc(ptr, 0) returns NULL + ASSERT_EQ(::realloc(::malloc(SIZE_64), 0), nullptr); +} + +TEST_F(test, proxyLib_malloc_usable_size) { + + void *ptr = ::malloc(SIZE_64); + ASSERT_NE(ptr, nullptr); + if (ptr == nullptr) { + // Fix for the following CodeQL's warning on Windows: + // 'ptr' could be '0': this does not adhere to the specification for the function '_msize'. + return; + } + #ifdef _WIN32 size_t size = _msize(ptr); #elif __APPLE__ @@ -31,5 +63,24 @@ TEST_F(test, proxyLibBasic) { #else size_t size = ::malloc_usable_size(ptr); #endif - UT_ASSERTeq(size, 0xDEADBEEF); + + ASSERT_EQ((int)(size == 0 || size >= SIZE_64), 1); + + ::free(ptr); +} + +TEST_F(test, proxyLib_aligned_alloc) { +#ifdef _WIN32 + void *ptr = _aligned_malloc(SIZE_64, ALIGN_1024); +#else + void *ptr = ::aligned_alloc(ALIGN_1024, SIZE_64); +#endif + + ASSERT_EQ((int)(IS_ALIGNED((uintptr_t)ptr, ALIGN_1024)), 1); + +#ifdef _WIN32 + _aligned_free(ptr); +#else + ::free(ptr); +#endif } diff --git a/test/test_proxy_lib_size_threshold.cpp b/test/test_proxy_lib_size_threshold.cpp new file mode 100644 index 000000000..fac1c516b --- /dev/null +++ b/test/test_proxy_lib_size_threshold.cpp @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#if defined(__APPLE__) +#include +#else +#include +#endif + +#include + +#include "base.hpp" +#include "test_helpers.h" +#include "utils_common.h" + +using umf_test::test; + +// size threshold defined by the env variable UMF_PROXY="size.threshold=64" +#define SIZE_THRESHOLD 64 +#define SIZE_EQ (SIZE_THRESHOLD) +#define SIZE_LT (SIZE_THRESHOLD - 1) + +#define ALIGN_1024 1024 + +TEST_F(test, proxyLib_basic) { + // a check to verify we are running the proxy library + void *ptr = (void *)0x01; + +#ifdef _WIN32 + size_t size = _msize(ptr); +#elif __APPLE__ + size_t size = ::malloc_size(ptr); +#else + size_t size = ::malloc_usable_size(ptr); +#endif + + ASSERT_EQ(size, 0xDEADBEEF); +} + +TEST_F(test, proxyLib_realloc_size0) { + // realloc(ptr, 0) == free(ptr) + // realloc(ptr, 0) returns NULL + ASSERT_EQ(::realloc(::malloc(SIZE_EQ), 0), nullptr); +} + +// The proxyLib_size_threshold_* tests test the size threshold of the proxy library. +// The size threshold is set to SIZE_THRESHOLD bytes in this test, so all allocations of: +// 1) size < SIZE_THRESHOLD go through the default system allocator +// (umfPoolByPtr(ptr_size < SIZE_THRESHOLD) == nullptr) +// 2) size >= SIZE_THRESHOLD go through the proxy library allocator +// (umfPoolByPtr(ptr_size >= SIZE_THRESHOLD) != nullptr) + +TEST_F(test, proxyLib_size_threshold_aligned_alloc) { +#ifdef _WIN32 + void *ptr_LT = _aligned_malloc(SIZE_LT, ALIGN_1024); + void *ptr_EQ = _aligned_malloc(SIZE_EQ, ALIGN_1024); +#else + void *ptr_LT = ::aligned_alloc(ALIGN_1024, SIZE_LT); + void *ptr_EQ = ::aligned_alloc(ALIGN_1024, SIZE_EQ); +#endif + + ASSERT_NE(ptr_LT, nullptr); + ASSERT_NE(ptr_EQ, nullptr); + + // verify alignment + ASSERT_EQ((int)(IS_ALIGNED((uintptr_t)ptr_LT, ALIGN_1024)), 1); + ASSERT_EQ((int)(IS_ALIGNED((uintptr_t)ptr_EQ, ALIGN_1024)), 1); + + ASSERT_EQ(umfPoolByPtr(ptr_LT), nullptr); + ASSERT_NE(umfPoolByPtr(ptr_EQ), nullptr); + +#ifdef _WIN32 + _aligned_free(ptr_LT); + _aligned_free(ptr_EQ); +#else + ::free(ptr_LT); + ::free(ptr_EQ); +#endif +} + +TEST_F(test, proxyLib_size_threshold_malloc) { + void *ptr_LT = malloc(SIZE_LT); + void *ptr_EQ = malloc(SIZE_EQ); + + ASSERT_NE(ptr_LT, nullptr); + ASSERT_NE(ptr_EQ, nullptr); + + ASSERT_EQ(umfPoolByPtr(ptr_LT), nullptr); + ASSERT_NE(umfPoolByPtr(ptr_EQ), nullptr); + + ::free(ptr_LT); + ::free(ptr_EQ); +} + +TEST_F(test, proxyLib_size_threshold_calloc) { + void *ptr_LT = calloc(SIZE_LT, 1); + void *ptr_EQ = calloc(SIZE_EQ, 1); + + ASSERT_NE(ptr_LT, nullptr); + ASSERT_NE(ptr_EQ, nullptr); + + ASSERT_EQ(umfPoolByPtr(ptr_LT), nullptr); + ASSERT_NE(umfPoolByPtr(ptr_EQ), nullptr); + + ::free(ptr_LT); + ::free(ptr_EQ); +} + +TEST_F(test, proxyLib_size_threshold_realloc_up) { + void *ptr_LT = malloc(SIZE_LT); + void *ptr_EQ = malloc(SIZE_EQ); + + ASSERT_NE(ptr_LT, nullptr); + ASSERT_NE(ptr_EQ, nullptr); + + void *ptr_LT_r = realloc(ptr_LT, 2 * SIZE_LT); + void *ptr_EQ_r = realloc(ptr_EQ, 2 * SIZE_EQ); + + ASSERT_NE(ptr_LT_r, nullptr); + ASSERT_NE(ptr_EQ_r, nullptr); + + ASSERT_EQ(umfPoolByPtr(ptr_LT_r), nullptr); + ASSERT_NE(umfPoolByPtr(ptr_EQ_r), nullptr); + + ::free(ptr_LT_r); + ::free(ptr_EQ_r); +} + +TEST_F(test, proxyLib_size_threshold_realloc_down) { + void *ptr_LT = malloc(SIZE_LT); + void *ptr_EQ = malloc(SIZE_EQ); + + ASSERT_NE(ptr_LT, nullptr); + ASSERT_NE(ptr_EQ, nullptr); + + void *ptr_LT_r = realloc(ptr_LT, SIZE_LT / 2); + void *ptr_EQ_r = realloc(ptr_EQ, SIZE_EQ / 2); + + ASSERT_NE(ptr_LT_r, nullptr); + ASSERT_NE(ptr_EQ_r, nullptr); + + ASSERT_EQ(umfPoolByPtr(ptr_LT_r), nullptr); + ASSERT_NE(umfPoolByPtr(ptr_EQ_r), nullptr); + + ::free(ptr_LT_r); + ::free(ptr_EQ_r); +} + +TEST_F(test, proxyLib_size_threshold_malloc_usable_size) { + + void *ptr_LT = ::malloc(SIZE_LT); + void *ptr_EQ = ::malloc(SIZE_EQ); + + ASSERT_NE(ptr_LT, nullptr); + ASSERT_NE(ptr_EQ, nullptr); + + if (ptr_LT == nullptr || ptr_EQ == nullptr) { + // Fix for the following CodeQL's warning on Windows: + // 'ptr' could be '0': this does not adhere to the specification for the function '_msize'. + return; + } + +#ifdef _WIN32 + size_t size_LT = _msize(ptr_LT); + size_t size_EQ = _msize(ptr_EQ); +#elif __APPLE__ + size_t size_LT = ::malloc_size(ptr_LT); + size_t size_EQ = ::malloc_size(ptr_EQ); +#else + size_t size_LT = ::malloc_usable_size(ptr_LT); + size_t size_EQ = ::malloc_usable_size(ptr_EQ); +#endif + + ASSERT_EQ((int)(size_LT == 0 || size_LT >= SIZE_LT), 1); + ASSERT_EQ((int)(size_EQ == 0 || size_EQ >= SIZE_EQ), 1); + + ::free(ptr_LT); + ::free(ptr_EQ); +} diff --git a/test/test_valgrind.sh b/test/test_valgrind.sh index ed30e9ad6..9f84cf0d3 100755 --- a/test/test_valgrind.sh +++ b/test/test_valgrind.sh @@ -20,7 +20,7 @@ if ! valgrind --version > /dev/null; then exit 1 fi -if [ "$3" == "" ]; then +if [ "$3" = "" ]; then echo -e "error: too few arguments\n" print_usage exit 1 @@ -68,7 +68,7 @@ echo echo "Working directory: $(pwd)" echo "Running: \"valgrind $OPTION\" for the following tests:" -FAIL=0 +ANY_TEST_FAILED=0 rm -f umf_test-*.log umf_test-*.err for test in $(ls -1 umf_test-*); do @@ -84,13 +84,32 @@ for test in $(ls -1 umf_test-*); do # skip tests incompatible with valgrind FILTER="" case $test in + umf_test-disjointPool) + if [ "$TOOL" = "helgrind" ]; then + # skip because of the assert in helgrind: + # Helgrind: hg_main.c:308 (lockN_acquire_reader): Assertion 'lk->kind == LK_rdwr' failed. + echo "- SKIPPED (helgrind only)" + continue; + fi + ;; umf_test-ipc_os_prov_*) echo "- SKIPPED" - continue; # skip it - this is a 2 processes test run using the ipc_os_prov_anon_fd.sh script + continue; # skip testing helper binaries used by the ipc_os_prov_* tests + ;; + umf_test-ipc_devdax_prov_*) + echo "- SKIPPED" + continue; # skip testing helper binaries used by the ipc_devdax_prov_* tests + ;; + umf_test-ipc_file_prov_*) + echo "- SKIPPED" + continue; # skip testing helper binaries used by the ipc_file_prov_* tests ;; umf_test-memspace_host_all) FILTER='--gtest_filter="-*allocsSpreadAcrossAllNumaNodes"' ;; + umf_test-provider_os_memory) + FILTER='--gtest_filter="-osProviderTest/umfIpcTest*"' + ;; umf_test-provider_os_memory_config) FILTER='--gtest_filter="-*protection_flag_none:*protection_flag_read:*providerConfigTestNumaMode*"' ;; @@ -106,12 +125,18 @@ for test in $(ls -1 umf_test-*); do umf_test-memspace_lowest_latency) FILTER='--gtest_filter="-*allocLocalMt*"' ;; + umf_test-memoryPool) + FILTER='--gtest_filter="-*allocMaxSize*"' + ;; esac [ "$FILTER" != "" ] && echo -n "($FILTER) " + LAST_TEST_FAILED=0 + if ! HWLOC_CPUID_PATH=./cpuid valgrind $OPTION $OPT_SUP --gen-suppressions=all ./$test $FILTER >$LOG 2>&1; then - FAIL=1 + LAST_TEST_FAILED=1 + ANY_TEST_FAILED=1 echo "(valgrind FAILED) " echo "Command: HWLOC_CPUID_PATH=./cpuid valgrind $OPTION $OPT_SUP --gen-suppressions=all ./$test $FILTER >$LOG 2>&1" echo "Output:" @@ -121,17 +146,19 @@ for test in $(ls -1 umf_test-*); do fi || true # grep for "ERROR SUMMARY" with errors (there can be many lines with "ERROR SUMMARY") grep -e "ERROR SUMMARY:" $LOG | grep -v -e "ERROR SUMMARY: 0 errors from 0 contexts" > $ERR || true - if [ $(cat $ERR | wc -l) -eq 0 ]; then + if [ $LAST_TEST_FAILED -eq 0 -a $(cat $ERR | wc -l) -eq 0 ]; then echo "- OK" rm -f $LOG $ERR else echo "- FAILED!" cat $ERR | cut -d' ' -f2- - FAIL=1 + ANY_TEST_FAILED=1 fi || true done -[ $FAIL -eq 0 ] && echo PASSED && exit 0 +rm -rf ${BUILD_DIR}/test/cpuid + +[ $ANY_TEST_FAILED -eq 0 ] && echo PASSED && exit 0 echo echo "======================================================================" diff --git a/test/utils/utils.cpp b/test/utils/utils.cpp index de4a71d65..302971f7e 100644 --- a/test/utils/utils.cpp +++ b/test/utils/utils.cpp @@ -8,52 +8,52 @@ using umf_test::test; -TEST_F(test, util_parse_var) { - EXPECT_FALSE(util_parse_var("", "test1", 0)); +TEST_F(test, utils_parse_var) { + EXPECT_FALSE(utils_parse_var("", "test1", 0)); - EXPECT_TRUE(util_parse_var("test1;test2;test3;test4", "test1", 0)); - EXPECT_TRUE(util_parse_var("test1;test2;test3;test4", "test2", 0)); - EXPECT_TRUE(util_parse_var("test1;test2;test3;test4", "test3", 0)); - EXPECT_TRUE(util_parse_var("test1;test2;test3;test4", "test4", 0)); + EXPECT_TRUE(utils_parse_var("test1;test2;test3;test4", "test1", 0)); + EXPECT_TRUE(utils_parse_var("test1;test2;test3;test4", "test2", 0)); + EXPECT_TRUE(utils_parse_var("test1;test2;test3;test4", "test3", 0)); + EXPECT_TRUE(utils_parse_var("test1;test2;test3;test4", "test4", 0)); - EXPECT_TRUE(util_parse_var(";test1;test2;test3;test4;", "test1", 0)); - EXPECT_TRUE(util_parse_var(";test1;test2;test3;test4;", "test2", 0)); - EXPECT_TRUE(util_parse_var(";test1;test2;test3;test4;", "test3", 0)); - EXPECT_TRUE(util_parse_var(";test1;test2;test3;test4;", "test4", 0)); + EXPECT_TRUE(utils_parse_var(";test1;test2;test3;test4;", "test1", 0)); + EXPECT_TRUE(utils_parse_var(";test1;test2;test3;test4;", "test2", 0)); + EXPECT_TRUE(utils_parse_var(";test1;test2;test3;test4;", "test3", 0)); + EXPECT_TRUE(utils_parse_var(";test1;test2;test3;test4;", "test4", 0)); - EXPECT_FALSE(util_parse_var("test1;test2;test3;test4", "test5", 0)); + EXPECT_FALSE(utils_parse_var("test1;test2;test3;test4", "test5", 0)); - EXPECT_FALSE(util_parse_var("test1test2test3test4", "test1", 0)); - EXPECT_FALSE(util_parse_var("test1test2test3test4", "test2", 0)); - EXPECT_FALSE(util_parse_var("test1test2test3test4", "test3", 0)); - EXPECT_FALSE(util_parse_var("test1test2test3test4", "test4", 0)); + EXPECT_FALSE(utils_parse_var("test1test2test3test4", "test1", 0)); + EXPECT_FALSE(utils_parse_var("test1test2test3test4", "test2", 0)); + EXPECT_FALSE(utils_parse_var("test1test2test3test4", "test3", 0)); + EXPECT_FALSE(utils_parse_var("test1test2test3test4", "test4", 0)); - EXPECT_FALSE(util_parse_var("test1:test2;test3:test4", "test1", 0)); - EXPECT_FALSE(util_parse_var("test1:test2;test3:test4", "test2", 0)); - EXPECT_FALSE(util_parse_var("test1:test2;test3:test4", "test3", 0)); - EXPECT_FALSE(util_parse_var("test1:test2;test3:test4", "test4", 0)); + EXPECT_FALSE(utils_parse_var("test1:test2;test3:test4", "test1", 0)); + EXPECT_FALSE(utils_parse_var("test1:test2;test3:test4", "test2", 0)); + EXPECT_FALSE(utils_parse_var("test1:test2;test3:test4", "test3", 0)); + EXPECT_FALSE(utils_parse_var("test1:test2;test3:test4", "test4", 0)); - EXPECT_TRUE(util_parse_var("test1:test2;test3:test4", "test1:test2", 0)); - EXPECT_TRUE(util_parse_var("test1:test2;test3:test4", "test3:test4", 0)); - EXPECT_FALSE(util_parse_var("test1:test2;test3:test4", "test2:test3'", 0)); + EXPECT_TRUE(utils_parse_var("test1:test2;test3:test4", "test1:test2", 0)); + EXPECT_TRUE(utils_parse_var("test1:test2;test3:test4", "test3:test4", 0)); + EXPECT_FALSE(utils_parse_var("test1:test2;test3:test4", "test2:test3'", 0)); EXPECT_TRUE( - util_parse_var("test1;;test2;invalid;test3;;;test4", "test1", 0)); + utils_parse_var("test1;;test2;invalid;test3;;;test4", "test1", 0)); EXPECT_TRUE( - util_parse_var("test1;;test2;invalid;test3;;;test4", "test2", 0)); + utils_parse_var("test1;;test2;invalid;test3;;;test4", "test2", 0)); EXPECT_TRUE( - util_parse_var("test1;;test2;invalid;test3;;;test4", "test3", 0)); + utils_parse_var("test1;;test2;invalid;test3;;;test4", "test3", 0)); EXPECT_TRUE( - util_parse_var("test1;;test2;invalid;test3;;;test4", "test4", 0)); + utils_parse_var("test1;;test2;invalid;test3;;;test4", "test4", 0)); const char *arg; - EXPECT_FALSE(util_parse_var("test1;test2;test3;test4", "test1", &arg)); - EXPECT_FALSE(util_parse_var("test1;test2;test3;test4", "test2", &arg)); - EXPECT_FALSE(util_parse_var("test1;test2;test3;test4", "test3", &arg)); - EXPECT_FALSE(util_parse_var("test1;test2;test3;test4", "test4", &arg)); - - EXPECT_TRUE(util_parse_var("test1,abc;test2;test3;test4", "test1", &arg)); - EXPECT_TRUE(util_parse_var("test1;test2,abc;test3;test4", "test2", &arg)); - EXPECT_TRUE(util_parse_var("test1;test2;test3,abc;test4", "test3", &arg)); - EXPECT_TRUE(util_parse_var("test1;test2;test3;test4,abc", "test4", &arg)); + EXPECT_FALSE(utils_parse_var("test1;test2;test3;test4", "test1", &arg)); + EXPECT_FALSE(utils_parse_var("test1;test2;test3;test4", "test2", &arg)); + EXPECT_FALSE(utils_parse_var("test1;test2;test3;test4", "test3", &arg)); + EXPECT_FALSE(utils_parse_var("test1;test2;test3;test4", "test4", &arg)); + + EXPECT_TRUE(utils_parse_var("test1,abc;test2;test3;test4", "test1", &arg)); + EXPECT_TRUE(utils_parse_var("test1;test2,abc;test3;test4", "test2", &arg)); + EXPECT_TRUE(utils_parse_var("test1;test2;test3,abc;test4", "test3", &arg)); + EXPECT_TRUE(utils_parse_var("test1;test2;test3;test4,abc", "test4", &arg)); } diff --git a/test/utils/utils_linux.cpp b/test/utils/utils_linux.cpp new file mode 100644 index 000000000..7aa0a9d83 --- /dev/null +++ b/test/utils/utils_linux.cpp @@ -0,0 +1,171 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +#include "base.hpp" +#include "utils/utils_common.h" + +using umf_test::test; +TEST_F(test, utils_translate_mem_visibility_flag) { + umf_memory_visibility_t in_flag = static_cast(0); + unsigned out_flag; + auto ret = utils_translate_mem_visibility_flag(in_flag, &out_flag); + EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_F(test, utils_shm_open_invalid_args) { + auto ret = utils_shm_open(NULL); + EXPECT_EQ(ret, -1); + + ret = utils_shm_open("invalid_path"); + EXPECT_EQ(ret, -1); +} + +TEST_F(test, utils_get_file_size_invalid_args) { + size_t size; + auto ret = utils_get_file_size(0xffffff, &size); + EXPECT_EQ(ret, -1); + + int fd = utils_create_anonymous_fd(); + ASSERT_GE(fd, 0); + + // Explicit condition for coverity + if (fd >= 0) { + ret = utils_get_file_size(fd, &size); + EXPECT_EQ(ret, 0); + EXPECT_EQ(size, 0); + } +} + +TEST_F(test, utils_set_file_size_invalid_args) { + auto ret = utils_set_file_size(0xffffff, 256); + EXPECT_EQ(ret, -1); +} + +TEST_F(test, utils_shm_create_invalid_args) { + auto ret = utils_shm_create(NULL, 0); + EXPECT_EQ(ret, -1); + + ret = utils_shm_create("", 256); + EXPECT_EQ(ret, -1); + + // Ensure that a valid size results in a success + ret = utils_shm_create("/abc", 256); + EXPECT_GE(ret, 0); + + ret = utils_shm_create("/abc", -1); + EXPECT_EQ(ret, -1); +} + +TEST_F(test, utils_get_size_threshold) { + // Expected input to utils_get_size_threshold(): + // char *str_threshold = utils_env_var_get_str("UMF_PROXY", "size.threshold="); + + // positive tests + EXPECT_EQ(utils_get_size_threshold((char *)"size.threshold=111"), 111); + EXPECT_EQ(utils_get_size_threshold((char *)"size.threshold=222;abcd"), 222); + EXPECT_EQ(utils_get_size_threshold((char *)"size.threshold=333;var=value"), + 333); + // LONG_MAX = 9223372036854775807 + EXPECT_EQ(utils_get_size_threshold( + (char *)"size.threshold=9223372036854775807;var=value"), + 9223372036854775807); + + // negative tests + EXPECT_EQ(utils_get_size_threshold(NULL), 0); + EXPECT_EQ(utils_get_size_threshold((char *)"size.threshold="), -1); + EXPECT_EQ(utils_get_size_threshold((char *)"size.threshold=abc"), -1); + EXPECT_EQ(utils_get_size_threshold((char *)"size.threshold=-111"), -1); +} + +TEST_F(test, utils_errno_to_umf_result) { + EXPECT_EQ(utils_errno_to_umf_result(EBADF), + UMF_RESULT_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(utils_errno_to_umf_result(EINVAL), + UMF_RESULT_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(utils_errno_to_umf_result(ESRCH), + UMF_RESULT_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(utils_errno_to_umf_result(EPERM), + UMF_RESULT_ERROR_INVALID_ARGUMENT); + + EXPECT_EQ(utils_errno_to_umf_result(EMFILE), + UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY); + EXPECT_EQ(utils_errno_to_umf_result(ENOMEM), + UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY); + + EXPECT_EQ(utils_errno_to_umf_result(ENODEV), + UMF_RESULT_ERROR_NOT_SUPPORTED); + EXPECT_EQ(utils_errno_to_umf_result(ENOSYS), + UMF_RESULT_ERROR_NOT_SUPPORTED); + EXPECT_EQ(utils_errno_to_umf_result(ENOTSUP), + UMF_RESULT_ERROR_NOT_SUPPORTED); + + EXPECT_EQ(utils_errno_to_umf_result(E2BIG), UMF_RESULT_ERROR_UNKNOWN); +} + +TEST_F(test, utils_translate_mem_protection_flags) { + umf_result_t umf_result; + unsigned out_protection; + + umf_result = utils_translate_mem_protection_flags(UMF_PROTECTION_NONE, + &out_protection); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_EQ(out_protection, PROT_NONE); + + umf_result = utils_translate_mem_protection_flags(UMF_PROTECTION_READ, + &out_protection); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_EQ(out_protection, PROT_READ); + + umf_result = utils_translate_mem_protection_flags(UMF_PROTECTION_WRITE, + &out_protection); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_EQ(out_protection, PROT_WRITE); + + umf_result = utils_translate_mem_protection_flags(UMF_PROTECTION_EXEC, + &out_protection); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_EQ(out_protection, PROT_EXEC); + + umf_result = utils_translate_mem_protection_flags( + UMF_PROTECTION_READ | UMF_PROTECTION_WRITE, &out_protection); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_EQ(out_protection, PROT_READ | PROT_WRITE); + + umf_result = utils_translate_mem_protection_flags( + UMF_PROTECTION_READ | UMF_PROTECTION_WRITE | UMF_PROTECTION_EXEC, + &out_protection); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_EQ(out_protection, PROT_READ | PROT_WRITE | PROT_EXEC); + + umf_result = utils_translate_mem_protection_flags( + UMF_PROTECTION_READ | UMF_PROTECTION_EXEC, &out_protection); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_EQ(out_protection, PROT_READ | PROT_EXEC); + + umf_result = utils_translate_mem_protection_flags( + UMF_PROTECTION_WRITE | UMF_PROTECTION_EXEC, &out_protection); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_EQ(out_protection, PROT_WRITE | PROT_EXEC); + + // see https://github.com/oneapi-src/unified-memory-framework/issues/923 + out_protection = 0; + umf_result = utils_translate_mem_protection_flags( + 0xFFFF & ~(((UMF_PROTECTION_MAX - 1) << 1) - 1), &out_protection); + EXPECT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(out_protection, 0); +} + +TEST_F(test, utils_translate_purge_advise) { + EXPECT_EQ(utils_translate_purge_advise(UMF_PURGE_LAZY), MADV_FREE); + EXPECT_EQ(utils_translate_purge_advise(UMF_PURGE_FORCE), MADV_DONTNEED); + EXPECT_EQ(utils_translate_purge_advise(UMF_PURGE_MAX), -1); +} + +TEST_F(test, utils_open) { + EXPECT_EQ(utils_devdax_open(NULL), -1); + EXPECT_EQ(utils_file_open(NULL), -1); + EXPECT_EQ(utils_file_open_or_create(NULL), -1); +} diff --git a/test/utils/utils_log.cpp b/test/utils/utils_log.cpp index 3e899e685..c0f81abf0 100644 --- a/test/utils/utils_log.cpp +++ b/test/utils/utils_log.cpp @@ -19,7 +19,7 @@ FILE *mock_fopen(const char *filename, const char *mode) { } const std::string MOCK_FN_NAME = "MOCK_FUNCTION_NAME"; -std::string expected_message = "[ERROR UMF] util_log_init: Logging output not " +std::string expected_message = "[ERROR UMF] utils_log_init: Logging output not " "set - logging disabled (UMF_LOG = \"\")\n"; // The expected_message (above) is printed to stderr. FILE *expected_stream = stderr; @@ -98,7 +98,7 @@ const char *env_variable = ""; #define fopen(A, B) mock_fopen(A, B) #define fputs(A, B) mock_fputs(A, B) #define fflush(A) mock_fflush(A) -#define util_env_var(A, B, C) mock_util_env_var(A, B, C) +#define utils_env_var(A, B, C) mock_utils_env_var(A, B, C) #if defined(__APPLE__) #define strerror_r(A, B, C) mock_strerror_posix(A, B, C) #else @@ -107,9 +107,11 @@ const char *env_variable = ""; #define strerror_s(A, B, C) mock_strerror_windows(A, B, C) //getenv returns 'char *' not 'const char *' so we need explicit cast to drop const #define getenv(X) strstr(X, "UMF_LOG") ? (char *)env_variable : getenv(X) +#ifndef UMF_VERSION #define UMF_VERSION "test version" +#endif #include "utils/utils_log.c" -#undef util_env_var +#undef utils_env_var #undef fopen #undef fputs #undef fflush @@ -120,13 +122,13 @@ void helper_log_init(const char *var) { env_variable = var; fopen_count = 0; fput_count = 0; - util_log_init(); + utils_log_init(); env_variable = NULL; EXPECT_EQ(fopen_count, expect_fopen_count); EXPECT_EQ(fput_count, expect_fput_count); } -void helper_checkConfig(util_log_config_t *expected, util_log_config_t *is) { +void helper_checkConfig(utils_log_config_t *expected, utils_log_config_t *is) { EXPECT_EQ(expected->level, is->level); EXPECT_EQ(expected->flushLevel, is->flushLevel); EXPECT_EQ(expected->output, is->output); @@ -140,7 +142,7 @@ TEST_F(test, parseEnv_errors) { expect_fput_count = 0; expected_stream = stderr; - util_log_config_t b = loggerConfig; + utils_log_config_t b = loggerConfig; helper_log_init(NULL); helper_checkConfig(&b, &loggerConfig); @@ -154,13 +156,13 @@ TEST_F(test, parseEnv_errors) { helper_log_init("_level:debug"); helper_checkConfig(&b, &loggerConfig); expected_message = - "[ERROR UMF] util_log_init: Cannot open output file - path too long\n"; + "[ERROR UMF] utils_log_init: Cannot open output file - path too long\n"; std::string test_env = "output:file," + std::string(300, 'x'); helper_log_init(test_env.c_str()); } TEST_F(test, parseEnv) { - util_log_config_t b = loggerConfig; + utils_log_config_t b = loggerConfig; expected_message = ""; std::vector> logLevels = { @@ -226,9 +228,9 @@ TEST_F(test, parseEnv) { expected_stream = output.second; b.timestamp = timestamp.second; b.pid = pid.second; - b.flushLevel = (util_log_level_t)flushLevel.second; + b.flushLevel = (utils_log_level_t)flushLevel.second; - b.level = (util_log_level_t)logLevel.second; + b.level = (utils_log_level_t)logLevel.second; if (logLevel.second <= LOG_INFO) { expect_fput_count = 1; } @@ -236,7 +238,7 @@ TEST_F(test, parseEnv) { expect_fput_count = 1; if (expected_filename.size() > MAX_FILE_PATH) { expected_message = - "[ERROR UMF] util_log_init: Cannot open " + "[ERROR UMF] utils_log_init: Cannot open " "output file - path too long\n"; } } @@ -252,7 +254,7 @@ TEST_F(test, parseEnv) { template void helper_test_log(Args... args) { fput_count = 0; fflush_count = 0; - util_log(args...); + utils_log(args...); EXPECT_EQ(fput_count, expect_fput_count); EXPECT_EQ(fflush_count, expect_fflush_count); } @@ -279,7 +281,7 @@ TEST_F(test, log_levels) { expected_stream = stderr; for (int i = LOG_DEBUG; i <= LOG_ERROR; i++) { for (int j = LOG_DEBUG; j <= LOG_ERROR; j++) { - loggerConfig = {0, 0, (util_log_level_t)i, LOG_DEBUG, stderr}; + loggerConfig = {0, 0, (utils_log_level_t)i, LOG_DEBUG, stderr}; if (i > j) { expect_fput_count = 0; expect_fflush_count = 0; @@ -290,7 +292,7 @@ TEST_F(test, log_levels) { } expected_message = "[" + helper_log_str(j) + " UMF] " + MOCK_FN_NAME + ": example log\n"; - helper_test_log((util_log_level_t)j, MOCK_FN_NAME.c_str(), "%s", + helper_test_log((utils_log_level_t)j, MOCK_FN_NAME.c_str(), "%s", "example log"); } } @@ -313,7 +315,7 @@ TEST_F(test, flush_levels) { expect_fput_count = 1; for (int i = LOG_DEBUG; i <= LOG_ERROR; i++) { for (int j = LOG_DEBUG; j <= LOG_ERROR; j++) { - loggerConfig = {0, 0, LOG_DEBUG, (util_log_level_t)i, stderr}; + loggerConfig = {0, 0, LOG_DEBUG, (utils_log_level_t)i, stderr}; if (i > j) { expect_fflush_count = 0; } else { @@ -321,7 +323,7 @@ TEST_F(test, flush_levels) { } expected_message = "[" + helper_log_str(j) + " UMF] " + MOCK_FN_NAME + ": example log\n"; - helper_test_log((util_log_level_t)j, MOCK_FN_NAME.c_str(), "%s", + helper_test_log((utils_log_level_t)j, MOCK_FN_NAME.c_str(), "%s", "example log"); } } @@ -418,7 +420,7 @@ TEST_F(test, log_macros) { template void helper_test_plog(Args... args) { fput_count = 0; fflush_count = 0; - util_plog(args...); + utils_plog(args...); EXPECT_EQ(fput_count, expect_fput_count); EXPECT_EQ(fflush_count, expect_fflush_count); } diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 8a5f6c4cf..6a8be6e46 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -4,14 +4,14 @@ clang-format==15.0.7 cmake-format==0.6.13 black==24.3.0 # Tests -packaging==24.0 +packaging==24.2 # Generating HTML documentation -pygments==2.15.1 -sphinxcontrib_applehelp==1.0.4 -sphinxcontrib_devhelp==1.0.2 -sphinxcontrib_htmlhelp==2.0.1 -sphinxcontrib_serializinghtml==1.1.5 -sphinxcontrib_qthelp==1.0.3 +pygments==2.18.0 +sphinxcontrib_applehelp==2.0.0 +sphinxcontrib_devhelp==2.0.0 +sphinxcontrib_htmlhelp==2.1.0 +sphinxcontrib_serializinghtml==2.0.0 +sphinxcontrib_qthelp==2.0.0 breathe==4.35.0 -sphinx==4.5.0 -sphinx_book_theme==0.3.3 +sphinx==8.1.3 +sphinx_book_theme==1.1.3 pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy