diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 5478023849..ea5ef1f89f 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -25,6 +25,10 @@ jobs: strategy: matrix: python: ['3.10', '3.11', '3.12', '3.13', '3.14'] + python_spec: [''] + include: + - python: '3.14' + python_spec: '3.14.* *_cp314' steps: - name: Cancel Previous Runs uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0 @@ -45,9 +49,9 @@ jobs: with: path: ~/.conda/pkgs key: - ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('**/meta.yaml') }} + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-spec-${{ matrix.python == '3.14' && matrix.python_spec == '' && 't' || '' }}-${{hashFiles('**/meta.yaml') }} restore-keys: | - ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-spec-${{ matrix.python == '3.14' && matrix.python_spec == '' && 't' || '' }}- ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- - name: Add conda to system path run: echo $CONDA/bin >> $GITHUB_PATH @@ -61,22 +65,31 @@ jobs: run: | # use bootstrap channel to pull NumPy linked with OpenBLAS CHANNELS="-c conda-forge --override-channels" - VERSIONS="--python ${{ matrix.python }} --numpy 2.0" TEST="--no-test" - conda build \ - $TEST \ - $VERSIONS \ - $CHANNELS \ - conda-recipe + if [ -n "${{ matrix.python_spec }}" ]; then + conda build \ + $TEST \ + --python "${{ matrix.python_spec }}" \ + --numpy 2.0 \ + $CHANNELS \ + conda-recipe + else + conda build \ + $TEST \ + --python ${{ matrix.python }} \ + --numpy 2.0 \ + $CHANNELS \ + conda-recipe + fi - name: Upload artifact uses: actions/upload-artifact@v6.0.0 with: - name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python == '3.14' && (matrix.python_spec != '' && '3.14' || '3.14t') || matrix.python }} path: /usr/share/miniconda/conda-bld/linux-64/${{ env.PACKAGE_NAME }}-*.conda - name: Upload wheels artifact uses: actions/upload-artifact@v6.0.0 with: - name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Wheels Python ${{ matrix.python }} + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Wheels Python ${{ matrix.python == '3.14' && (matrix.python_spec != '' && '3.14' || '3.14t') || matrix.python }} path: ${{ env.WHEELS_OUTPUT_FOLDER }}${{ env.PACKAGE_NAME }}-*.whl build_windows: @@ -86,6 +99,10 @@ jobs: strategy: matrix: python: ['3.10', '3.11', '3.12', '3.13', '3.14'] + python_spec: [''] + include: + - python: '3.14' + python_spec: '3.14.* *_cp314' steps: - name: Cancel Previous Runs uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0 @@ -100,15 +117,15 @@ jobs: with: miniforge-variant: Miniforge3 miniforge-version: latest - activate-environment: build + auto-activate: true + activate-environment: base channels: conda-forge conda-remove-defaults: true python-version: ${{ matrix.python }} - name: Install conda build run: | - conda activate - conda install -y conda-build + conda install -n base -y conda-build conda list -n base - name: Cache conda packages @@ -118,9 +135,9 @@ jobs: with: path: /home/runner/conda_pkgs_dir key: - ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('**/meta.yaml') }} + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-spec-${{ matrix.python == '3.14' && matrix.python_spec == '' && 't' || '' }}-${{hashFiles('**/meta.yaml') }} restore-keys: | - ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-spec-${{ matrix.python == '3.14' && matrix.python_spec == '' && 't' || '' }}- ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- - name: Store conda paths as envs @@ -133,20 +150,24 @@ jobs: env: OVERRIDE_INTEL_IPO: 1 # IPO requires more resources that GH actions VM provides run: | - conda activate # TODO: roll back use of Intel channel when 2025.1 is available on conda-forge - conda build --no-test --python ${{ matrix.python }} --numpy 2.0 -c ${{ env.INTEL_CHANNEL }} -c conda-forge --override-channels conda-recipe + $PYTHON_SPEC = "${{ matrix.python_spec }}" + if ($PYTHON_SPEC -ne "") { + conda build --no-test --python "$PYTHON_SPEC" --numpy 2.0 -c ${{ env.INTEL_CHANNEL }} -c conda-forge --override-channels conda-recipe + } else { + conda build --no-test --python ${{ matrix.python }} --numpy 2.0 -c ${{ env.INTEL_CHANNEL }} -c conda-forge --override-channels conda-recipe + } - name: Upload artifact uses: actions/upload-artifact@v6.0.0 with: - name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python == '3.14' && (matrix.python_spec != '' && '3.14' || '3.14t') || matrix.python }} path: ${{ env.CONDA_BLD }}${{ env.PACKAGE_NAME }}-*.conda - name: Upload wheels artifact uses: actions/upload-artifact@v6.0.0 with: - name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Wheels Python ${{ matrix.python }} + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Wheels Python ${{ matrix.python == '3.14' && (matrix.python_spec != '' && '3.14' || '3.14t') || matrix.python }} path: ${{ env.WHEELS_OUTPUT_FOLDER }}${{ env.PACKAGE_NAME }}-*.whl test_linux: @@ -157,6 +178,12 @@ jobs: strategy: matrix: python: ['3.10', '3.11', '3.12', '3.13', '3.14'] + python_spec: [''] + include: + - python: '3.14' + python_spec: '3.14.* *_cp314' + experimental: false + runner: ubuntu-22.04 experimental: [false] runner: [ubuntu-22.04] continue-on-error: ${{ matrix.experimental }} @@ -171,7 +198,7 @@ jobs: - name: Download artifact uses: actions/download-artifact@v7 with: - name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python == '3.14' && (matrix.python_spec != '' && '3.14' || '3.14t') || matrix.python }} - name: Add conda to system path run: echo $CONDA/bin >> $GITHUB_PATH - name: Install conda-index @@ -192,7 +219,11 @@ jobs: run: | CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" export PACKAGE_VERSION=$(python -c "${VER_SCRIPT1} ${VER_SCRIPT2}") - conda create -n ${{ env.TEST_ENV_NAME }} $PACKAGE_NAME=${PACKAGE_VERSION} python=${{ matrix.python }} $CHANNELS --only-deps --dry-run > lockfile + PYTHON_SPEC="${{ matrix.python_spec }}" + if [ -z "${PYTHON_SPEC}" ]; then + PYTHON_SPEC="${{ matrix.python }}" + fi + conda create -n ${{ env.TEST_ENV_NAME }} $PACKAGE_NAME=${PACKAGE_VERSION} python="${PYTHON_SPEC}" $CHANNELS --only-deps --dry-run > lockfile cat lockfile - name: Set pkgs_dirs run: | @@ -204,16 +235,20 @@ jobs: with: path: ~/.conda/pkgs key: - ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('lockfile') }} + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-spec-${{ matrix.python == '3.14' && matrix.python_spec == '' && 't' || '' }}-${{hashFiles('lockfile') }} restore-keys: | - ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-spec-${{ matrix.python == '3.14' && matrix.python_spec == '' && 't' || '' }}- ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- - name: Install dpctl run: | export CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" - export TEST_DEPENDENCIES="pytest pytest-cov cython setuptools" + export TEST_DEPENDENCIES="pytest cython setuptools" export PACKAGE_VERSION=$(python -c "${VER_SCRIPT1} ${VER_SCRIPT2}") - conda create -n ${{ env.TEST_ENV_NAME }} $PACKAGE_NAME=${PACKAGE_VERSION} ${TEST_DEPENDENCIES} python=${{ matrix.python }} ${CHANNELS} + PYTHON_SPEC="${{ matrix.python_spec }}" + if [ -z "${PYTHON_SPEC}" ]; then + PYTHON_SPEC="${{ matrix.python }}" + fi + conda create -n ${{ env.TEST_ENV_NAME }} $PACKAGE_NAME=${PACKAGE_VERSION} ${TEST_DEPENDENCIES} python="${PYTHON_SPEC}" ${CHANNELS} # Test installed packages conda list -n ${{ env.TEST_ENV_NAME }} - name: Smoke test @@ -253,6 +288,12 @@ jobs: strategy: matrix: python: ['3.10', '3.11', '3.12', '3.13', '3.14'] + python_spec: [''] + include: + - python: '3.14' + python_spec: '3.14.* *_cp314' + experimental: false + runner: windows-latest experimental: [false] runner: [windows-latest] continue-on-error: ${{ matrix.experimental }} @@ -272,7 +313,7 @@ jobs: - name: Download artifact uses: actions/download-artifact@v7 with: - name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python == '3.14' && (matrix.python_spec != '' && '3.14' || '3.14t') || matrix.python }} - uses: conda-incubator/setup-miniconda@v3 with: @@ -329,7 +370,9 @@ jobs: FOR /F "tokens=* USEBACKQ" %%F IN (`python -c "%SCRIPT%"`) DO ( SET PACKAGE_VERSION=%%F ) - conda install -n ${{ env.TEST_ENV_NAME }} ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% python=${{ matrix.python }} -c ${{ env.workdir }}/channel ${{ env.CHANNELS }} --only-deps --dry-run > lockfile + SET "PYTHON_SPEC=${{ matrix.python_spec }}" + IF "%PYTHON_SPEC%"=="" SET "PYTHON_SPEC=${{ matrix.python }}" + conda install -n ${{ env.TEST_ENV_NAME }} ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% python="%PYTHON_SPEC%" -c ${{ env.workdir }}/channel ${{ env.CHANNELS }} --only-deps --dry-run > lockfile - name: Display lockfile content shell: pwsh @@ -342,9 +385,9 @@ jobs: with: path: /home/runner/conda_pkgs_dir key: - ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('lockfile') }} + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-spec-${{ matrix.python == '3.14' && matrix.python_spec == '' && 't' || '' }}-${{hashFiles('lockfile') }} restore-keys: | - ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-spec-${{ matrix.python == '3.14' && matrix.python_spec == '' && 't' || '' }}- ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- - name: Install opencl_rt @@ -362,8 +405,10 @@ jobs: FOR /F "tokens=* USEBACKQ" %%F IN (`python -c "%SCRIPT%"`) DO ( SET PACKAGE_VERSION=%%F ) - SET TEST_DEPENDENCIES=pytest"<8" pytest-cov cython setuptools - conda install -n ${{ env.TEST_ENV_NAME }} ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% %TEST_DEPENDENCIES% python=${{ matrix.python }} -c ${{ env.workdir }}/channel ${{ env.CHANNELS }} + SET "PYTHON_SPEC=${{ matrix.python_spec }}" + IF "%PYTHON_SPEC%"=="" SET "PYTHON_SPEC=${{ matrix.python }}" + SET TEST_DEPENDENCIES=pytest"<8" cython setuptools + conda install -n ${{ env.TEST_ENV_NAME }} ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% %TEST_DEPENDENCIES% python="%PYTHON_SPEC%" -c ${{ env.workdir }}/channel ${{ env.CHANNELS }} - name: Report content of test environment shell: cmd /C CALL {0} @@ -424,16 +469,20 @@ jobs: strategy: matrix: python: ['3.10', '3.11', '3.12', '3.13', '3.14'] + python_spec: [''] + include: + - python: '3.14' + python_spec: '3.14.* *_cp314' steps: - name: Download conda artifact uses: actions/download-artifact@v7 with: - name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python == '3.14' && (matrix.python_spec != '' && '3.14' || '3.14t') || matrix.python }} - name: Download wheel artifact uses: actions/download-artifact@v7 with: - name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Wheels Python ${{ matrix.python }} + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Wheels Python ${{ matrix.python == '3.14' && (matrix.python_spec != '' && '3.14' || '3.14t') || matrix.python }} - name: Install anaconda-client run: conda install anaconda-client -c conda-forge --override-channels @@ -468,24 +517,28 @@ jobs: strategy: matrix: python: ['3.10', '3.11', '3.12', '3.13', '3.14'] + python_spec: [''] + include: + - python: '3.14' + python_spec: '3.14.* *_cp314' steps: - name: Download artifact uses: actions/download-artifact@v7 with: - name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python == '3.14' && (matrix.python_spec != '' && '3.14' || '3.14t') || matrix.python }} - name: Download wheel artifact uses: actions/download-artifact@v7 with: - name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Wheels Python ${{ matrix.python }} + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Wheels Python ${{ matrix.python == '3.14' && (matrix.python_spec != '' && '3.14' || '3.14t') || matrix.python }} - uses: conda-incubator/setup-miniconda@v3 with: miniforge-version: latest channels: conda-forge conda-remove-defaults: true - auto-activate-base: true - activate-environment: "" + auto-activate: true + activate-environment: base - name: Install anaconda-client run: conda install anaconda-client -c conda-forge --override-channels diff --git a/.github/workflows/run-tests-from-dppy-bits.yaml b/.github/workflows/run-tests-from-dppy-bits.yaml index 0ee861aea1..7a1768310c 100644 --- a/.github/workflows/run-tests-from-dppy-bits.yaml +++ b/.github/workflows/run-tests-from-dppy-bits.yaml @@ -28,6 +28,10 @@ jobs: strategy: matrix: python: ['3.10', '3.11', '3.12', '3.13', '3.14'] + python_spec: [''] + include: + - python: '3.14' + python_spec: '3.14.* *_cp314' experimental: [false] runner: [ubuntu-22.04, ubuntu-24.04] continue-on-error: ${{ matrix.experimental }} @@ -47,7 +51,11 @@ jobs: - name: Install dpctl run: | - conda create -n ${{ env.TEST_ENV_NAME }} -c dppy/label/dev ${{ env.CHANNELS }} dpctl pytest pytest-cov cython setuptools c-compiler cxx-compiler + PYTHON_SPEC="${{ matrix.python_spec }}" + if [ -z "${PYTHON_SPEC}" ]; then + PYTHON_SPEC="${{ matrix.python }}" + fi + conda create -n ${{ env.TEST_ENV_NAME }} -c dppy/label/dev ${{ env.CHANNELS }} dpctl pytest cython setuptools c-compiler cxx-compiler python="${PYTHON_SPEC}" - name: Smoke test run: | @@ -79,6 +87,10 @@ jobs: strategy: matrix: python: ['3.10', '3.11', '3.12', '3.13', '3.14'] + python_spec: [''] + include: + - python: '3.14' + python_spec: '3.14.* *_cp314' experimental: [false] runner: [windows-latest] @@ -106,7 +118,9 @@ jobs: - name: Install dpctl run: | - conda install -n ${{ env.TEST_ENV_NAME }} -c dppy/label/dev ${{ env.CHANNELS }} dpctl pytest pytest-cov cython setuptools c-compiler cxx-compiler + SET "PYTHON_SPEC=${{ matrix.python_spec }}" + IF "%PYTHON_SPEC%"=="" SET "PYTHON_SPEC=${{ matrix.python }}" + conda install -n ${{ env.TEST_ENV_NAME }} -c dppy/label/dev ${{ env.CHANNELS }} dpctl pytest cython setuptools c-compiler cxx-compiler python="%PYTHON_SPEC%" # intel-opencl-rt is not being installed when running conda install dpctl, so do it manually - name: Install intel-opencl-rt diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index a5ad3bae93..99bfbfadbd 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -28,7 +28,6 @@ requirements: - {{ compiler('dpcpp') }} >={{ required_compiler_version }} host: - python - - python-gil # [py>=314] - pip >=24.0 - level-zero-devel >=1.16 - pybind11 >=2.12 @@ -52,7 +51,6 @@ requirements: - tomli # [py<311] run: - python - - python-gil # [py>=314] - {{ pin_compatible('intel-sycl-rt', min_pin='x.x', max_pin='x') }} - {{ pin_compatible('intel-cmplr-lib-rt', min_pin='x.x', max_pin='x') }} - numpy @@ -65,7 +63,6 @@ test: - cython - setuptools - pytest - - pytest-cov about: home: https://github.com/IntelPython/dpctl.git diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh index 504977258b..de137e0caa 100644 --- a/conda-recipe/run_test.sh +++ b/conda-recipe/run_test.sh @@ -4,11 +4,4 @@ set -e ${PYTHON} -c "import dpctl; print(dpctl.__version__)" ${PYTHON} -m dpctl -f -# don't use coverage for Python 3.13 due to crashes related to -# Cython >= 3.1.0 and Python >= 3.13 -# TODO: remove if crash is triaged -if ${PYTHON} --version 2>&1 | grep -q '^Python 3\.13'; then - ${PYTHON} -m pytest -q -ra --disable-warnings --pyargs dpctl -vv -else - ${PYTHON} -m pytest -q -ra --disable-warnings --cov dpctl --cov-report term-missing --pyargs dpctl -vv -fi +${PYTHON} -m pytest -q -ra --disable-warnings --pyargs dpctl -vv diff --git a/dpctl/_diagnostics.pyx b/dpctl/_diagnostics.pyx index 3723c23493..2d5879dfea 100644 --- a/dpctl/_diagnostics.pyx +++ b/dpctl/_diagnostics.pyx @@ -17,6 +17,7 @@ # distutils: language = c++ # cython: language_level=3 # cython: linetrace=True +# cython: freethreading_compatible = True """ Implements developer utilities. """ @@ -60,6 +61,10 @@ def _shutdown_logger(): def syclinterface_diagnostics(verbosity="warning", log_dir=None): """Context manager that activate verbosity of DPCTLSyclInterface function calls. + + .. warning:: + This context manager modifies the ``DPCTL_VERBOSITY`` environment + variable and should only be used from a single thread. """ _allowed_verbosity = ["warning", "error"] if verbosity not in _allowed_verbosity: diff --git a/dpctl/_sycl_context.pyx b/dpctl/_sycl_context.pyx index 1cc1a5ac3b..7a0074db61 100644 --- a/dpctl/_sycl_context.pyx +++ b/dpctl/_sycl_context.pyx @@ -17,6 +17,7 @@ # distutils: language = c++ # cython: language_level=3 # cython: linetrace=True +# cython: freethreading_compatible = True """ Implements SyclContext Cython extension type. """ diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 419ed2b9fb..25b616c9ee 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -17,6 +17,7 @@ # distutils: language = c++ # cython: language_level=3 # cython: linetrace=True +# cython: freethreading_compatible = True """ Implements SyclDevice Cython extension type. """ diff --git a/dpctl/_sycl_device_factory.pyx b/dpctl/_sycl_device_factory.pyx index 1689249bc9..4579b420ee 100644 --- a/dpctl/_sycl_device_factory.pyx +++ b/dpctl/_sycl_device_factory.pyx @@ -17,6 +17,7 @@ # distutils: language = c++ # cython: language_level=3 # cython: linetrace=True +# cython: freethreading_compatible = True """ This module implements several device creation helper functions: @@ -438,12 +439,27 @@ cdef class _DefaultDeviceCache: return _copy +# no default, as would share a single mutable instance across threads and +# concurrent access to the cache would not be thread-safe. Using ContextVar +# without a default ensures each context gets its own instance. _global_default_device_cache = ContextVar( "global_default_device_cache", - default=_DefaultDeviceCache() ) +cdef _DefaultDeviceCache _get_default_device_cache(): + """ + Factory function to get or create a default device cache for the current + context + """ + try: + return _global_default_device_cache.get() + except LookupError: + cache = _DefaultDeviceCache() + _global_default_device_cache.set(cache) + return cache + + cpdef SyclDevice _cached_default_device(): """Returns a cached device selected by default selector. @@ -452,7 +468,7 @@ cpdef SyclDevice _cached_default_device(): A cached default-selected SYCL device. """ - cdef _DefaultDeviceCache _cache = _global_default_device_cache.get() + cdef _DefaultDeviceCache _cache = _get_default_device_cache() d_, changed_ = _cache.get_or_create() if changed_: _global_default_device_cache.set(_cache) diff --git a/dpctl/_sycl_event.pyx b/dpctl/_sycl_event.pyx index 8766408644..6aa6a49d51 100644 --- a/dpctl/_sycl_event.pyx +++ b/dpctl/_sycl_event.pyx @@ -17,6 +17,7 @@ # distutils: language = c++ # cython: language_level=3 # cython: linetrace=True +# cython: freethreading_compatible = True """ Implements SyclEvent Cython extension type. """ diff --git a/dpctl/_sycl_platform.pyx b/dpctl/_sycl_platform.pyx index 7ceb725083..8a2f7a6bee 100644 --- a/dpctl/_sycl_platform.pyx +++ b/dpctl/_sycl_platform.pyx @@ -17,6 +17,7 @@ # distutils: language = c++ # cython: language_level=3 # cython: linetrace=True +# cython: freethreading_compatible = True """ Implements SyclPlatform Cython extension type. """ diff --git a/dpctl/_sycl_queue.pxd b/dpctl/_sycl_queue.pxd index 9469415cb8..c72cfb78e2 100644 --- a/dpctl/_sycl_queue.pxd +++ b/dpctl/_sycl_queue.pxd @@ -43,6 +43,7 @@ cdef public api class _SyclQueue [ cdef DPCTLSyclQueueRef _queue_ref cdef SyclContext _context cdef SyclDevice _device + cdef object __weakref__ cdef public api class SyclQueue (_SyclQueue) [ diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index 48c882b860..c13f1f6d1e 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -17,6 +17,7 @@ # distutils: language = c++ # cython: language_level=3 # cython: linetrace=True +# cython: freethreading_compatible = True """ Implements SyclQueue Cython extension type. """ diff --git a/dpctl/_sycl_queue_manager.pyx b/dpctl/_sycl_queue_manager.pyx index bf720e9402..274de15a29 100644 --- a/dpctl/_sycl_queue_manager.pyx +++ b/dpctl/_sycl_queue_manager.pyx @@ -17,6 +17,7 @@ # distutils: language = c++ # cython: language_level=3 # cython: linetrace=True +# cython: freethreading_compatible = True import logging from contextvars import ContextVar @@ -74,12 +75,27 @@ cdef class _DeviceDefaultQueueCache: return _copy +# no default, as would share a single mutable instance across threads and +# concurrent access to the cache would not be thread-safe. Using ContextVar +# without a default ensures each context gets its own instance. _global_device_queue_cache = ContextVar( "global_device_queue_cache", - default=_DeviceDefaultQueueCache() ) +cdef _DeviceDefaultQueueCache _get_device_queue_cache(): + """ + Factory function to get or create a default device queue cache for the + current context + """ + try: + return _global_device_queue_cache.get() + except LookupError: + cache = _DeviceDefaultQueueCache() + _global_device_queue_cache.set(cache) + return cache + + cpdef object get_device_cached_queue(object key): """Returns a cached queue associated with given device. @@ -96,7 +112,7 @@ cpdef object get_device_cached_queue(object key): TypeError: If the input key is not one of the accepted types. """ - _cache = _global_device_queue_cache.get() + _cache = _get_device_queue_cache() q_, changed_ = _cache.get_or_create(key) if changed_: _global_device_queue_cache.set(_cache) diff --git a/dpctl/memory/_memory.pyx b/dpctl/memory/_memory.pyx index 0cc6394741..f59025e55f 100644 --- a/dpctl/memory/_memory.pyx +++ b/dpctl/memory/_memory.pyx @@ -17,6 +17,7 @@ # distutils: language = c++ # cython: language_level=3 # cython: linetrace=True +# cython: freethreading_compatible = True """This file implements Python buffer protocol using Sycl USM shared and host allocators. The USM device allocator is also exposed through this module for diff --git a/dpctl/program/_program.pyx b/dpctl/program/_program.pyx index 3859314505..4983e6e469 100644 --- a/dpctl/program/_program.pyx +++ b/dpctl/program/_program.pyx @@ -17,6 +17,7 @@ # distutils: language = c++ # cython: language_level=3 # cython: linetrace=True +# cython: freethreading_compatible = True """Implements a Python interface for SYCL's program and kernel runtime classes. diff --git a/dpctl/tests/_cython_api.pyx b/dpctl/tests/_cython_api.pyx index e694ecb9c3..84e981e040 100644 --- a/dpctl/tests/_cython_api.pyx +++ b/dpctl/tests/_cython_api.pyx @@ -16,6 +16,7 @@ # cython: language=c++ # cython: language_level=3 +# cython: freethreading_compatible = True cimport dpctl as c_dpctl diff --git a/dpctl/tests/test_sycl_usm.py b/dpctl/tests/test_sycl_usm.py index 7b79738231..1840ff2a8d 100644 --- a/dpctl/tests/test_sycl_usm.py +++ b/dpctl/tests/test_sycl_usm.py @@ -59,7 +59,7 @@ def test_memory_create(memory_ctor): assert mobj.sycl_queue == queue assert type(repr(mobj)) is str assert type(bytes(mobj)) is bytes - assert sys.getsizeof(mobj) > nbytes + assert sys.getsizeof(mobj) >= nbytes def test_memory_create_with_np(): diff --git a/dpctl/utils/_onetrace_context.py b/dpctl/utils/_onetrace_context.py index e35269ae91..bdc93d7dcd 100644 --- a/dpctl/utils/_onetrace_context.py +++ b/dpctl/utils/_onetrace_context.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import threading from contextlib import contextmanager from os import environ, getenv from platform import system as sys_platform @@ -25,6 +26,7 @@ _UNCHECKED = sys_platform() == "Linux" del sys_platform +_unchecked_lock = threading.Lock() @contextmanager @@ -68,21 +70,25 @@ def onetrace_enabled(): """ global _UNCHECKED - if _UNCHECKED: - _UNCHECKED = False - if not ( - getenv("PTI_ENABLE", None) == "1" - and "onetrace_tool" in getenv("LD_PRELOAD", "") - ): - import warnings - - warnings.warn( - "It looks like Python interpreter was not started using " - "`onetrace` utility. Using `onetrace_enabled` may have " - "no effect. See `onetrace_enabled.__doc__` for usage.", - RuntimeWarning, - stacklevel=2, + with _unchecked_lock: + if _UNCHECKED: + _UNCHECKED = False + needs_warning = not ( + getenv("PTI_ENABLE", None) == "1" + and "onetrace_tool" in getenv("LD_PRELOAD", "") ) + else: + needs_warning = False + if needs_warning: + import warnings + + warnings.warn( + "It looks like Python interpreter was not started using " + "`onetrace` utility. Using `onetrace_enabled` may have " + "no effect. See `onetrace_enabled.__doc__` for usage.", + RuntimeWarning, + stacklevel=2, + ) _env_var_name = "PTI_ENABLE_COLLECTION" saved = getenv(_env_var_name, None) diff --git a/dpctl/utils/_order_manager.py b/dpctl/utils/_order_manager.py index fbe2c47763..d0ccf25985 100644 --- a/dpctl/utils/_order_manager.py +++ b/dpctl/utils/_order_manager.py @@ -16,7 +16,7 @@ class _SequentialOrderManager: def __init__(self): self._state = _OrderManager(16) - def __dealloc__(self): + def __del__(self): _local = self._state SyclEvent.wait_for(_local.get_submitted_events()) SyclEvent.wait_for(_local.get_host_task_events()) @@ -71,24 +71,31 @@ class SyclQueueToOrderManagerMap: def __init__(self): self._map = ContextVar( "global_order_manager_map", - default=defaultdict(_SequentialOrderManager), + # no default to avoid sharing a single defaultdict + # across threads ) + def _get_map(self): + """ + Factory method to get or create a default device queue cache for the + current context + """ + try: + return self._map.get() + except LookupError: + m = defaultdict(_SequentialOrderManager) + self._map.set(m) + return m + def __getitem__(self, q: SyclQueue) -> _SequentialOrderManager: """Get order manager for given SyclQueue""" - _local = self._map.get() if not isinstance(q, SyclQueue): raise TypeError(f"Expected `dpctl.SyclQueue`, got {type(q)}") - if q in _local: - return _local[q] - else: - v = _local[q] - _local[q] = v - return v + return self._get_map()[q] def clear(self): """Clear content of internal dictionary""" - _local = self._map.get() + _local = self._get_map() for v in _local.values(): v.wait() _local.clear() diff --git a/dpctl/utils/src/device_queries.cpp b/dpctl/utils/src/device_queries.cpp index c350a1f280..05fea90e2a 100644 --- a/dpctl/utils/src/device_queries.cpp +++ b/dpctl/utils/src/device_queries.cpp @@ -132,7 +132,7 @@ std::uint32_t py_intel_memory_bus_width(const sycl::device &d) }; // namespace -PYBIND11_MODULE(_device_queries, m) +PYBIND11_MODULE(_device_queries, m, py::mod_gil_not_used()) { m.def("intel_device_info_device_id", &py_intel_device_id, "Get ext_intel_device_id for the device, zero if not an intel device", diff --git a/dpctl/utils/src/order_keeper.cpp b/dpctl/utils/src/order_keeper.cpp index 62fc7e7e65..7f0074f91a 100644 --- a/dpctl/utils/src/order_keeper.cpp +++ b/dpctl/utils/src/order_keeper.cpp @@ -6,7 +6,7 @@ #include "sequential_order_keeper.hpp" #include -PYBIND11_MODULE(_seq_order_keeper, m) +PYBIND11_MODULE(_seq_order_keeper, m, py::mod_gil_not_used()) { py::class_(m, "_OrderManager") .def(py::init()) diff --git a/dpctl/utils/src/sequential_order_keeper.hpp b/dpctl/utils/src/sequential_order_keeper.hpp index a6cba662b0..f8c6feb107 100644 --- a/dpctl/utils/src/sequential_order_keeper.hpp +++ b/dpctl/utils/src/sequential_order_keeper.hpp @@ -3,6 +3,7 @@ #include #include +#include #include namespace @@ -21,10 +22,12 @@ inline bool is_event_complete(const sycl::event &e) class SequentialOrder { private: + mutable std::mutex mu_events; std::vector host_task_events; std::vector submitted_events; - void prune_complete() + // only called with mu_events held + void prune_complete_nolock() { const auto &ht_it = std::remove_if(host_task_events.begin(), host_task_events.end(), @@ -46,76 +49,78 @@ class SequentialOrder } SequentialOrder(const SequentialOrder &other) - : host_task_events(other.host_task_events), - submitted_events(other.submitted_events) { - prune_complete(); + std::lock_guard lock(other.mu_events); + host_task_events = other.host_task_events; + submitted_events = other.submitted_events; + prune_complete_nolock(); } SequentialOrder(SequentialOrder &&other) : host_task_events{}, submitted_events{} { + std::lock_guard lock(other.mu_events); host_task_events = std::move(other.host_task_events); submitted_events = std::move(other.submitted_events); - prune_complete(); + prune_complete_nolock(); } SequentialOrder &operator=(const SequentialOrder &other) { - host_task_events = other.host_task_events; - submitted_events = other.submitted_events; - - prune_complete(); + if (this != &other) { + std::scoped_lock lock(mu_events, other.mu_events); + host_task_events = other.host_task_events; + submitted_events = other.submitted_events; + prune_complete_nolock(); + } return *this; } SequentialOrder &operator=(SequentialOrder &&other) { if (this != &other) { + std::scoped_lock lock(mu_events, other.mu_events); host_task_events = std::move(other.host_task_events); submitted_events = std::move(other.submitted_events); - prune_complete(); + prune_complete_nolock(); } return *this; } std::size_t get_num_submitted_events() const { + std::lock_guard lock(mu_events); return submitted_events.size(); } - const std::vector &get_host_task_events() + // returns a copy to avoid returning a reference that + // could be modified after the lock is released + std::vector get_host_task_events() { - prune_complete(); + std::lock_guard lock(mu_events); + prune_complete_nolock(); return host_task_events; } - /* - const std::vector & get_host_task_events() const { - return host_task_events; - } - */ - std::size_t get_num_host_task_events() const { + std::lock_guard lock(mu_events); return host_task_events.size(); } - const std::vector &get_submitted_events() + // returns a copy to avoid returning a reference that + // could be modified after the lock is released + std::vector get_submitted_events() { - prune_complete(); + std::lock_guard lock(mu_events); + prune_complete_nolock(); return submitted_events; } - /* - const std::vector & get_submitted_events() const { - return submitted_events; - } - */ - void add_to_both_events(const sycl::event &ht_ev, const sycl::event &comp_ev) { - prune_complete(); + std::lock_guard lock(mu_events); + prune_complete_nolock(); if (!is_event_complete(ht_ev)) host_task_events.push_back(ht_ev); if (!is_event_complete(comp_ev)) @@ -125,7 +130,8 @@ class SequentialOrder void add_vector_to_both_events(const std::vector &ht_evs, const std::vector &comp_evs) { - prune_complete(); + std::lock_guard lock(mu_events); + prune_complete_nolock(); for (const auto &e : ht_evs) { if (!is_event_complete(e)) host_task_events.push_back(e); @@ -138,7 +144,8 @@ class SequentialOrder void add_to_host_task_events(const sycl::event &ht_ev) { - prune_complete(); + std::lock_guard lock(mu_events); + prune_complete_nolock(); if (!is_event_complete(ht_ev)) { host_task_events.push_back(ht_ev); } @@ -146,7 +153,8 @@ class SequentialOrder void add_to_submitted_events(const sycl::event &comp_ev) { - prune_complete(); + std::lock_guard lock(mu_events); + prune_complete_nolock(); if (!is_event_complete(comp_ev)) { submitted_events.push_back(comp_ev); } @@ -155,7 +163,8 @@ class SequentialOrder template void add_list_to_host_task_events(const sycl::event (&ht_events)[num]) { - prune_complete(); + std::lock_guard lock(mu_events); + prune_complete_nolock(); for (std::size_t i = 0; i < num; ++i) { const auto &e = ht_events[i]; if (!is_event_complete(e)) @@ -166,7 +175,8 @@ class SequentialOrder template void add_list_to_submitted_events(const sycl::event (&comp_events)[num]) { - prune_complete(); + std::lock_guard lock(mu_events); + prune_complete_nolock(); for (std::size_t i = 0; i < num; ++i) { const auto &e = comp_events[i]; if (!is_event_complete(e)) @@ -176,8 +186,20 @@ class SequentialOrder void wait() { - sycl::event::wait(submitted_events); - sycl::event::wait(host_task_events); - prune_complete(); + // snapeshot events outside of mutex to avoid + // calling wait inside mutex + std::vector sub_copy; + std::vector ht_copy; + { + std::lock_guard lock(mu_events); + sub_copy = submitted_events; + ht_copy = host_task_events; + } + sycl::event::wait(sub_copy); + sycl::event::wait(ht_copy); + { + std::lock_guard lock(mu_events); + prune_complete_nolock(); + } } }; diff --git a/pyproject.toml b/pyproject.toml index 92be4c0771..1b37a71ae5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ classifiers = [ "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Free Threading :: 2 - Beta", "Topic :: Software Development", "Topic :: Scientific/Engineering", "Operating System :: Microsoft :: Windows", @@ -60,7 +61,7 @@ readme = {file = "README.md", content-type = "text/markdown"} requires-python = ">=3.10" [project.optional-dependencies] -coverage = ["Cython", "pytest", "pytest-cov", "coverage", "tomli"] +coverage = ["Cython", "pytest", "coverage", "tomli"] docs = [ "Cython", "graphviz",