From 362e4b179ae2897042bc57ded4c5592f0c9da613 Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 15 Jun 2026 14:48:21 +0800 Subject: [PATCH 01/43] migrate workflows to github actions --- .github/actions/rerun-workflow/action.yml | 31 ++++ .github/actions/rerun-workflow/rerun.sh | 77 +++++++++ .github/workflows/Install_check.yml | 92 +++++++++++ .github/workflows/Modeltest_check.yml | 135 ++++++++++++++++ .github/workflows/Unittest_check.yml | 126 +++++++++++++++ .../workflows/Unittest_check_distribute.yml | 148 ++++++++++++++++++ .github/workflows/Unittest_check_gpu.yml | 135 ++++++++++++++++ .github/workflows/codestyle.yml | 52 ++++++ .github/workflows/consistency.yml | 78 +++++++++ .github/workflows/rerun.yml | 107 +++++++++++++ 10 files changed, 981 insertions(+) create mode 100644 .github/actions/rerun-workflow/action.yml create mode 100644 .github/actions/rerun-workflow/rerun.sh create mode 100644 .github/workflows/Install_check.yml create mode 100644 .github/workflows/Modeltest_check.yml create mode 100644 .github/workflows/Unittest_check.yml create mode 100644 .github/workflows/Unittest_check_distribute.yml create mode 100644 .github/workflows/Unittest_check_gpu.yml create mode 100644 .github/workflows/codestyle.yml create mode 100644 .github/workflows/consistency.yml create mode 100644 .github/workflows/rerun.yml diff --git a/.github/actions/rerun-workflow/action.yml b/.github/actions/rerun-workflow/action.yml new file mode 100644 index 000000000..2cb4a9c85 --- /dev/null +++ b/.github/actions/rerun-workflow/action.yml @@ -0,0 +1,31 @@ +name: 'Rerun workflow' +description: 'Rerun a failed jobs or a specific named job for a pull request' + +inputs: + GITHUB_TOKEN: + description: 'Token with actions:write scope (pass secrets.GITHUB_TOKEN).' + required: true + OWNER: + description: 'Repository owner' + required: true + REPO: + description: 'Repository name' + required: true + PR_ID: + description: 'Pull request ID' + required: true + JOB_NAME: + description: 'Name of the job to rerun' + required: true + +runs: + using: 'composite' + steps: + - run: bash ./.github/actions/rerun-workflow/rerun.sh + shell: bash + env: + GITHUB_TOKEN: ${{ inputs.GITHUB_TOKEN }} + OWNER: ${{ inputs.OWNER }} + REPO: ${{ inputs.REPO }} + PR_ID: ${{ inputs.PR_ID }} + JOB_NAME: ${{ inputs.JOB_NAME }} diff --git a/.github/actions/rerun-workflow/rerun.sh b/.github/actions/rerun-workflow/rerun.sh new file mode 100644 index 000000000..6473de10c --- /dev/null +++ b/.github/actions/rerun-workflow/rerun.sh @@ -0,0 +1,77 @@ +# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +COMMIT_SHA=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_ID" | jq -r '.head.sha') + +echo "Commit SHA: $COMMIT_SHA" + +response=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/actions/runs?head_sha=$COMMIT_SHA&per_page=100") + +echo "Response: $response" + +run_ids=$(echo "$response" | jq -r '.workflow_runs[].id') + +if [ -n "$run_ids" ]; then + echo "Found run_ids for commit $COMMIT_SHA: $run_ids" + + for run_id in $run_ids; do + if [ "$JOB_NAME" = "all-failed" ]; then + echo "Rerunning all failed jobs for run_id: $run_id" + + rerun_response=$(curl -X POST -s -w "%{http_code}" -o /dev/null \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Authorization: Bearer $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/rerun-failed-jobs") + if [ "$rerun_response" -eq 201 ]; then + echo "Successfully requested rerun for all blocked jobs in run_id: $run_id" + else + echo "Failed to request rerun for run_id: $run_id with status code $rerun_response" + fi + + else + jobs_response=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/jobs") + + echo "Jobs Response for run_id $run_id: $jobs_response" + + # if [[ "$JOB_NAME" == *"bypass"* ]]; then + block_jobs=$(echo "$jobs_response" | jq -r --arg job_name "$JOB_NAME" \ + '.jobs[] | select(.name == $job_name) | .id') + # else + # block_jobs=$(echo "$jobs_response" | jq -r --arg job_name "$JOB_NAME" \ + # '.jobs[] | select(.name == $job_name and .conclusion != "success") | .id') + # fi + + if [ -n "$block_jobs" ]; then + echo "Found block jobs for run_id $run_id: $block_jobs" + + for job_id in $block_jobs; do + echo "Rerunning job_id: $job_id" + curl -X POST -H "Accept: application/vnd.github.v3+json" \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/actions/jobs/$job_id/rerun" + done + else + echo "No block jobs found for run_id $run_id with name $JOB_NAME." + fi + fi + done +else + echo "No matching workflow runs found for commit $COMMIT_SHA." + exit 1 +fi diff --git a/.github/workflows/Install_check.yml b/.github/workflows/Install_check.yml new file mode 100644 index 000000000..3b1756c8e --- /dev/null +++ b/.github/workflows/Install_check.yml @@ -0,0 +1,92 @@ +name: Install Check + +on: + # Nightly: re-validate master against the freshly built nightly paddle + # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 + # schedule: + # - cron: '0 21 * * *' + pull_request: + branches: [master, develop, "release/**"] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - '.pre-commit-config.yaml' + + push: + branches: [master, develop] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - '.pre-commit-config.yaml' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + install-check: + name: Install Check + # TODO: replace with the actual self-hosted CPU runner group name + runs-on: ubuntu-24.04 + timeout-minutes: 20 + + steps: + - name: Checkout Repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: pip + cache-dependency-path: | + requirements.txt + tests/requirements.txt + + - name: Install Latest Release CPU Version Torch + run: | + python -m pip install --upgrade pip + python -m pip install -U torch torchvision --index-url https://download.pytorch.org/whl/cpu + python -c "import torch; print('torch version information:', torch.__version__)" + + - name: Install Latest develop CPU Version Paddle + run: | + python -m pip uninstall -y paddlepaddle + python -m pip uninstall -y paddlepaddle-gpu + python -m pip install --force-reinstall --no-deps --no-cache-dir -U --pre paddlepaddle \ + -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ + --timeout 120 --retries 3 + python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + if [ -f tests/requirements.txt ]; then + python -m pip install -r tests/requirements.txt + fi + + - name: Install Check + run: | + python setup.py sdist bdist_wheel + python -m pip install dist/*.whl --force-reinstall + paconvert -V + paconvert --run_check + + - name: Upload dist on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: install-dist-${{ github.run_id }} + path: dist/ + if-no-files-found: ignore + retention-days: 14 diff --git a/.github/workflows/Modeltest_check.yml b/.github/workflows/Modeltest_check.yml new file mode 100644 index 000000000..c21618b02 --- /dev/null +++ b/.github/workflows/Modeltest_check.yml @@ -0,0 +1,135 @@ +name: Modeltest Check + +on: + # Nightly: re-validate master against the freshly built nightly paddle + # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 + # schedule: + # - cron: '0 21 * * *' + pull_request: + branches: [master, develop, 'release/**'] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - '.pre-commit-config.yaml' + + push: + branches: [master, develop] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - '.pre-commit-config.yaml' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + modeltest: + name: Modeltest Check + runs-on: + group: PaConvert + timeout-minutes: 120 + env: + TORCH_PROJECT_PATH: /workspace/torch_project + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: pip + cache-dependency-path: | + requirements.txt + tests/requirements.txt + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -U torch --index-url https://download.pytorch.org/whl/cpu + python -m pip uninstall -y paddlepaddle paddlepaddle-gpu + + # Detect current Python ABI tag (e.g. cp310) to avoid installing a mismatched wheel + PY_ABI=$(python -c "import sys; print('cp{}{}'.format(*sys.version_info[:2]))") + echo "Current Python ABI: $PY_ABI" + + if ls paddlepaddle-0.0.0-*.whl >/dev/null 2>&1; then + # Validate the local wheel matches the current Python version before installing + MATCHED_WHL=$(ls paddlepaddle-0.0.0-*${PY_ABI}*.whl 2>/dev/null | head -1) + if [ -z "$MATCHED_WHL" ]; then + echo "WARNING: local wheel found but none matches ABI $PY_ABI; falling back to nightly." + python -m pip install --force-reinstall --no-cache-dir --pre paddlepaddle \ + -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ + --timeout 120 --retries 3 + else + echo "Installing local wheel: $MATCHED_WHL" + python -m pip install "$MATCHED_WHL" + fi + else + # No local wheel — install from nightly with retry to reduce transient failures + python -m pip install --force-reinstall --no-cache-dir --pre paddlepaddle \ + -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ + --timeout 120 --retries 3 + fi + python -m pip install -r requirements.txt + python -m pip install pandas openpyxl + python -c "import torch; print('torch version information:', torch.__version__)" + python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" + + - name: Run code set convert check + shell: bash + run: | + if [ ! -d "$TORCH_PROJECT_PATH" ]; then + echo "$TORCH_PROJECT_PATH does not exist. Please prepare the model code set on the self-hosted runner." + exit 1 + fi + + shopt -s nullglob + projects=("$TORCH_PROJECT_PATH"/*) + if [ ${#projects[@]} -eq 0 ]; then + echo "$TORCH_PROJECT_PATH is empty. Please prepare the model code set on the self-hosted runner." + exit 1 + fi + + failed_projects=() + for project in "${projects[@]}"; do + if [ -d "$project" ]; then + project_name=$(basename "$project") + echo "[code-set-convert] Converting project: $project_name" + if ! python paconvert/main.py --in_dir "$project" --show_unsupport_api --calculate_speed; then + failed_projects+=("$project_name") + fi + fi + done + + if [ ${#failed_projects[@]} -ne 0 ]; then + printf '%s\n' "${failed_projects[@]}" > failed_projects.txt + echo "[code-set-convert] The following projects failed to convert:" + cat failed_projects.txt + exit 1 + fi + + - name: Run modeltest + run: python tools/modeltest/modeltest_check.py + + - name: Upload modeltest logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: modeltest-log-${{ github.run_id }} + path: | + failed_projects.txt + tests/code_library/model_case/**/convert_paddle_code* + if-no-files-found: ignore + retention-days: 14 diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml new file mode 100644 index 000000000..24abfdd1b --- /dev/null +++ b/.github/workflows/Unittest_check.yml @@ -0,0 +1,126 @@ +name: CI Unittest + +on: + # Nightly: re-validate master against the freshly built nightly paddle + # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 + # schedule: + # - cron: '0 21 * * *' + pull_request: + branches: [master, develop, "release/**"] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - '.pre-commit-config.yaml' + push: + branches: [master, develop] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - '.pre-commit-config.yaml' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + ci-unittest: + name: CI Unittest + # TODO: replace with the actual self-hosted CPU runner group name + runs-on: ubuntu-24.04 + timeout-minutes: 60 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: pip + cache-dependency-path: | + requirements.txt + tests/requirements.txt + + - name: Check Env + run: | + python --version + pwd + + - name: Install Latest Release CPU Version Torch + run: | + python -m pip install --upgrade pip + python -m pip install -U torch torchvision --index-url https://download.pytorch.org/whl/cpu + python -c "import torch; print('torch version information:', torch.__version__)" + + - name: Install Latest develop CPU Version Paddle + run: | + python -m pip uninstall -y paddlepaddle + python -m pip uninstall -y paddlepaddle-gpu + python -m pip install --force-reinstall --no-deps --no-cache-dir -U --pre paddlepaddle \ + -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ + --timeout 120 --retries 3 + python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" + + - name: Install paconvert requirements + run: | + python -m pip install -r requirements.txt + if [ -f tests/requirements.txt ]; then + python -m pip install -r tests/requirements.txt + fi + + - name: Run CI Unittest + shell: bash + run: | + python -m pip install pytest-timeout pytest-xdist pytest-rerunfailures + + # Disable errexit so a failing run is captured instead of aborting immediately + # A passing retry can clear the error. + set +e + + echo "Checking code cpu unit test by pytest ..." + # tee output to pytest.log so it can be uploaded as an artifact on failure; + # PIPESTATUS[0] preserves pytest's exit code instead of tee's. + python -m pytest -v -s -p no:warnings -n 1 --reruns=3 ./tests 2>&1 | tee pytest.log + check_error=${PIPESTATUS[0]} + if [ ${check_error} -ne 0 ]; then + echo "Rerun cpu unit test check." + python -m pytest -v -s -p no:warnings -n 1 --lf ./tests 2>&1 | tee -a pytest.log + check_error=${PIPESTATUS[0]} + fi + + echo '************************************************************************************************************' + if [ ${check_error} -ne 0 ]; then + echo "Your PR code cpu unit test check failed." + echo "Please run the following command:" + echo "" + echo " python -m pytest tests" + echo "" + echo "For more information, please refer to our check guide:" + echo "https://github.com/PaddlePaddle/PaConvert#readme." + else + echo "Your PR code cpu unit test check passed." + fi + echo '************************************************************************************************************' + + exit ${check_error} + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: ci-unittest-log-${{ github.run_id }} + path: | + pytest.log + tests/**/pytest.log + if-no-files-found: ignore + retention-days: 14 diff --git a/.github/workflows/Unittest_check_distribute.yml b/.github/workflows/Unittest_check_distribute.yml new file mode 100644 index 000000000..57a2560e4 --- /dev/null +++ b/.github/workflows/Unittest_check_distribute.yml @@ -0,0 +1,148 @@ +name: Distributed Unittest Check + +on: + # Nightly: re-validate master against the freshly built nightly paddle + # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 + # schedule: + # - cron: '0 21 * * *' + pull_request: + branches: [master, develop, "release/**"] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - '.pre-commit-config.yaml' + push: + branches: [master, develop] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - '.pre-commit-config.yaml' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + distributed-unittest: + name: Distributed Unittest Check + runs-on: + group: PaConvert + timeout-minutes: 120 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: pip + cache-dependency-path: | + requirements.txt + tests/requirements.txt + + - name: Check Env + run: | + python --version + pwd + + - name: Download GPU dependencies + working-directory: tests/distributed + run: | + wget --no-proxy -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + + - name: Install Latest Release GPU Version Torch + working-directory: tests/distributed + run: | + python -m pip install --upgrade pip + python -m pip uninstall -y torchaudio + python -m pip uninstall -y torchvision + python -m pip install torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + python -c "import torch; print('torch version information:', torch.__version__)" + + - name: Install Latest develop GPU Version Paddle + run: | + python -m pip uninstall -y paddlepaddle + python -m pip uninstall -y paddlepaddle-gpu + python -m pip install --force-reinstall --no-deps --no-cache-dir -U --pre paddlepaddle-gpu \ + -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ + --timeout 120 --retries 3 + python -m pip install safetensors==0.6.2 + python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" + + - name: Install paconvert requirements + run: | + python -m pip install -r requirements.txt + + - name: Convert torch code to paddle + working-directory: tests/distributed + run: | + python ../../paconvert/main.py -i . -o /tmp/paddle_dist --log_level "DEBUG" + + - name: Run distributed unit tests + working-directory: tests/distributed + shell: bash + env: + CUDA_VISIBLE_DEVICES: "0,1" + run: | + # Free port 29500 if any leftover process is occupying it + netstat -tulnp 2>/dev/null | grep ':29500' | awk '{print $7}' | cut -d/ -f1 | xargs -r kill -9 2>/dev/null || true + + # Disable errexit so every test runs and all failures are collected + # Instead of aborting on the first failing test. + set +e + + check_error=0 + failed_tests=() + test_list=$(ls *.py | grep -v run_and_compare.py) + for item in $test_list; do + cmd1="torchrun --nproc_per_node=2 ${item}" + cmd2="python -m paddle.distributed.launch /tmp/paddle_dist/${item}" + python run_and_compare.py "$cmd1" "$cmd2" + tmp_check_error=$? + if [ $tmp_check_error -ne 0 ]; then + check_error=1 + failed_tests+=("$item") + fi + done + + echo '************************************************************************************************************' + if [ ${#failed_tests[@]} -ne 0 ]; then + printf '%s\n' "${failed_tests[@]}" > failed_tests.txt + echo "Your PR code distributed unittest check FAILED" + echo "The following distributed tests failed:" + cat failed_tests.txt + echo "Please run the following command:" + echo "" + echo " cd tests/distributed && bash unittest_check_distribute.sh" + echo "" + echo "For more information, please refer to our check guide:" + echo "https://github.com/PaddlePaddle/PaConvert#readme" + else + echo "Your PR code distributed unit test check passed." + fi + echo '************************************************************************************************************' + + exit ${check_error} + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: distributed-unittest-log-${{ github.run_id }} + path: | + tests/distributed/failed_tests.txt + tests/distributed/*.log + /tmp/paddle_dist/** + if-no-files-found: ignore + retention-days: 14 diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml new file mode 100644 index 000000000..78cceb5bd --- /dev/null +++ b/.github/workflows/Unittest_check_gpu.yml @@ -0,0 +1,135 @@ +name: GPU Unittest Check + +on: + # Nightly: re-validate master against the freshly built nightly paddle + # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 + # schedule: + # - cron: '0 21 * * *' + pull_request: + branches: [master, develop, "release/**"] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - '.pre-commit-config.yaml' + push: + branches: [master, develop] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - '.pre-commit-config.yaml' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + gpu-unittest: + name: GPU Unittest Check + runs-on: + group: PaConvert + timeout-minutes: 120 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: pip + cache-dependency-path: | + requirements.txt + tests/requirements.txt + + - name: Check Env + run: | + python --version + pwd + + - name: Download GPU dependencies + run: | + wget --no-proxy -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + wget --no-proxy -q https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + + - name: Install Latest Release GPU Version Torch + run: | + python -m pip install --upgrade pip + python -m pip uninstall -y torchaudio + python -m pip install torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + python -m pip install torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + python -c "import torch; print('torch version information:', torch.__version__)" + + - name: Install Latest develop GPU Version Paddle + run: | + python -m pip uninstall -y paddlepaddle + python -m pip uninstall -y paddlepaddle-gpu + python -m pip install --force-reinstall --no-deps --no-cache-dir -U --pre paddlepaddle-gpu \ + -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ + --timeout 120 --retries 3 + python -m pip install safetensors==0.6.2 + python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" + + - name: Install paconvert requirements + run: | + python -m pip install -r requirements.txt + if [ -f tests/requirements.txt ]; then + python -m pip install -r tests/requirements.txt + fi + + - name: Run GPU Unittest + shell: bash + run: | + python -m pip install pytest-timeout pytest-xdist pytest-rerunfailures + + # Disable errexit so a failing run is captured instead of aborting immediately + # A passing retry can clear the error. + set +e + + echo "Checking code gpu unit test by pytest ..." + # tee output to pytest.log so it can be uploaded as an artifact on failure; + # PIPESTATUS[0] preserves pytest's exit code instead of tee's. + python -m pytest -v -s -p no:warnings -n 1 --reruns=3 ./tests 2>&1 | tee pytest.log + check_error=${PIPESTATUS[0]} + if [ ${check_error} -ne 0 ]; then + echo "Rerun gpu unit test check." + python -m pytest -v -s -p no:warnings -n 1 --lf ./tests 2>&1 | tee -a pytest.log + check_error=${PIPESTATUS[0]} + fi + + echo '************************************************************************************************************' + if [ ${check_error} -ne 0 ]; then + echo "Your PR code gpu unit test check failed." + echo "Please run the following command:" + echo "" + echo " python -m pytest tests" + echo "" + echo "For more information, please refer to our check guide:" + echo "https://github.com/PaddlePaddle/PaConvert#readme." + else + echo "Your PR code gpu unit test check passed." + fi + echo '************************************************************************************************************' + + exit ${check_error} + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: gpu-unittest-log-${{ github.run_id }} + path: | + pytest.log + tests/**/pytest.log + if-no-files-found: ignore + retention-days: 14 + diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml new file mode 100644 index 000000000..b599f0617 --- /dev/null +++ b/.github/workflows/codestyle.yml @@ -0,0 +1,52 @@ +name: CodeStyle Check + +on: + # Nightly: re-validate master against the freshly built nightly paddle + # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 + # schedule: + # - cron: '0 21 * * *' + pull_request: + branches: [master, develop, "release/**"] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + push: + branches: [master, develop] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + pre-commit: + # TODO: replace with the actual self-hosted CPU runner group name + runs-on: ubuntu-24.04 + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: pip + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pre-commit==2.17.0 + + - name: Run pre-commit + run: pre-commit run --all-files --show-diff-on-failure \ No newline at end of file diff --git a/.github/workflows/consistency.yml b/.github/workflows/consistency.yml new file mode 100644 index 000000000..773f8ed31 --- /dev/null +++ b/.github/workflows/consistency.yml @@ -0,0 +1,78 @@ +name: Consistency Check + +on: + # Nightly: re-validate master against the freshly built nightly paddle + # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 + # schedule: + # - cron: '0 21 * * *' + pull_request: + branches: [master, develop, "release/**"] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - '.pre-commit-config.yaml' + + push: + branches: [master, develop] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - '.pre-commit-config.yaml' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + common-api-consistency: + name: Common API Consistency Check + # TODO: replace with the actual self-hosted CPU runner group name + runs-on: ubuntu-24.04 + timeout-minutes: 20 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: pip + cache-dependency-path: | + requirements.txt + tests/requirements.txt + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + if [ -f tests/requirements.txt ]; then + python -m pip install -r tests/requirements.txt + fi + + - name: Run common API consistency check + run: | + set -o pipefail + python tools/consistency/consistency_check.py 2>&1 | tee consistency_check.log + + - name : Upload log on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: consistency-log-${{ github.run_id }} + path: | + consistency_check.log + tools/consistency/*.log + tools/consistency/*.txt + if-no-files-found: ignore + retention-days: 14 diff --git a/.github/workflows/rerun.yml b/.github/workflows/rerun.yml new file mode 100644 index 000000000..22f82f09b --- /dev/null +++ b/.github/workflows/rerun.yml @@ -0,0 +1,107 @@ +name: Rerun Workflows + +on: + issue_comment: + types: [created] + +permissions: + actions: write + contents: read + pull-requests: read + +concurrency: + group: rerun-${{ github.event.issue.number }} + cancel-in-progress: true + +jobs: + re-run: + name: rerun workflows + if: ${{ github.event.issue.pull_request && contains(github.event.comment.body, '/re-run') && github.event.comment.user.login == github.event.issue.user.login }} + runs-on: ubuntu-latest + timeout-minutes: 10 + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Rerun all failed jobs + if: ${{ contains(github.event.comment.body, 'all-failed') }} + uses: ./.github/actions/rerun-workflow + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OWNER: ${{ github.repository_owner }} + REPO: ${{ github.event.repository.name }} + PR_ID: ${{ github.event.issue.number }} + JOB_NAME: 'all-failed' + + - name: Rerun Codestyle + if: ${{ contains(github.event.comment.body, 'codestyle') }} + uses: ./.github/actions/rerun-workflow + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OWNER: ${{ github.repository_owner }} + REPO: ${{ github.event.repository.name }} + PR_ID: ${{ github.event.issue.number }} + JOB_NAME: 'pre-commit' + + - name: Rerun Common API Consistency Check + if: ${{ contains(github.event.comment.body, 'consistency') }} + uses: ./.github/actions/rerun-workflow + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OWNER: ${{ github.repository_owner }} + REPO: ${{ github.event.repository.name }} + PR_ID: ${{ github.event.issue.number }} + JOB_NAME: 'Common API Consistency Check' + + - name: Rerun Install Check + if: ${{ contains(github.event.comment.body, 'install-check') }} + uses: ./.github/actions/rerun-workflow + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OWNER: ${{ github.repository_owner }} + REPO: ${{ github.event.repository.name }} + PR_ID: ${{ github.event.issue.number }} + JOB_NAME: 'Install Check' + + - name: Rerun Modeltest Check + if: ${{ contains(github.event.comment.body, 'model-test') }} + uses: ./.github/actions/rerun-workflow + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OWNER: ${{ github.repository_owner }} + REPO: ${{ github.event.repository.name }} + PR_ID: ${{ github.event.issue.number }} + JOB_NAME: 'Modeltest Check' + + - name: Rerun Distributed Unittest Check + if: ${{ contains(github.event.comment.body, 'distributed-unittest') }} + uses: ./.github/actions/rerun-workflow + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OWNER: ${{ github.repository_owner }} + REPO: ${{ github.event.repository.name }} + PR_ID: ${{ github.event.issue.number }} + JOB_NAME: 'Distributed Unittest Check' + + - name: Rerun CI Unittest + if: ${{ contains(github.event.comment.body, 'cpu-unittest') }} + uses: ./.github/actions/rerun-workflow + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OWNER: ${{ github.repository_owner }} + REPO: ${{ github.event.repository.name }} + PR_ID: ${{ github.event.issue.number }} + JOB_NAME: 'CI Unittest' + + - name: Rerun GPU Unittest Check + if: ${{ contains(github.event.comment.body, 'gpu-unittest') }} + uses: ./.github/actions/rerun-workflow + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OWNER: ${{ github.repository_owner }} + REPO: ${{ github.event.repository.name }} + PR_ID: ${{ github.event.issue.number }} + JOB_NAME: 'GPU Unittest Check' From b315d25fb1a866eba12d3514f2c2672d3b8a56d6 Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 15 Jun 2026 15:30:12 +0800 Subject: [PATCH 02/43] allow collaborator to rerun --- .github/workflows/rerun.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rerun.yml b/.github/workflows/rerun.yml index 22f82f09b..78ed4a978 100644 --- a/.github/workflows/rerun.yml +++ b/.github/workflows/rerun.yml @@ -16,7 +16,7 @@ concurrency: jobs: re-run: name: rerun workflows - if: ${{ github.event.issue.pull_request && contains(github.event.comment.body, '/re-run') && github.event.comment.user.login == github.event.issue.user.login }} + if: ${{ github.event.issue.pull_request && contains(github.event.comment.body, '/re-run') && (github.event.comment.user.login == github.event.issue.user.login || contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.comment.author_association)) }} runs-on: ubuntu-latest timeout-minutes: 10 From b59b79df4eb52d49dfc9db2be584b6a1e012a4bf Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 15 Jun 2026 15:51:14 +0800 Subject: [PATCH 03/43] fix install --- .github/workflows/Unittest_check.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index 24abfdd1b..f4b38f114 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -66,8 +66,9 @@ jobs: run: | python -m pip uninstall -y paddlepaddle python -m pip uninstall -y paddlepaddle-gpu - python -m pip install --force-reinstall --no-deps --no-cache-dir -U --pre paddlepaddle \ + python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle \ -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ + --extra-index-url https://pypi.org/simple \ --timeout 120 --retries 3 python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" From 5e763e21117874665ac41e20153dbcc13cfef806 Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 15 Jun 2026 16:22:07 +0800 Subject: [PATCH 04/43] fix other install issues --- .github/workflows/Install_check.yml | 3 ++- .github/workflows/Modeltest_check.yml | 1 + .github/workflows/Unittest_check_distribute.yml | 3 ++- .github/workflows/Unittest_check_gpu.yml | 3 ++- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/Install_check.yml b/.github/workflows/Install_check.yml index 3b1756c8e..67ff49c6b 100644 --- a/.github/workflows/Install_check.yml +++ b/.github/workflows/Install_check.yml @@ -62,8 +62,9 @@ jobs: run: | python -m pip uninstall -y paddlepaddle python -m pip uninstall -y paddlepaddle-gpu - python -m pip install --force-reinstall --no-deps --no-cache-dir -U --pre paddlepaddle \ + python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle \ -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ + --extra-index-url https://pypi.org/simple \ --timeout 120 --retries 3 python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" diff --git a/.github/workflows/Modeltest_check.yml b/.github/workflows/Modeltest_check.yml index c21618b02..dfaaa51db 100644 --- a/.github/workflows/Modeltest_check.yml +++ b/.github/workflows/Modeltest_check.yml @@ -71,6 +71,7 @@ jobs: echo "WARNING: local wheel found but none matches ABI $PY_ABI; falling back to nightly." python -m pip install --force-reinstall --no-cache-dir --pre paddlepaddle \ -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ + --extra-index-url https://pypi.org/simple \ --timeout 120 --retries 3 else echo "Installing local wheel: $MATCHED_WHL" diff --git a/.github/workflows/Unittest_check_distribute.yml b/.github/workflows/Unittest_check_distribute.yml index 57a2560e4..ed8eda679 100644 --- a/.github/workflows/Unittest_check_distribute.yml +++ b/.github/workflows/Unittest_check_distribute.yml @@ -74,8 +74,9 @@ jobs: run: | python -m pip uninstall -y paddlepaddle python -m pip uninstall -y paddlepaddle-gpu - python -m pip install --force-reinstall --no-deps --no-cache-dir -U --pre paddlepaddle-gpu \ + python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ + --extra-index-url https://pypi.org/simple \ --timeout 120 --retries 3 python -m pip install safetensors==0.6.2 python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index 78cceb5bd..4d6b42888 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -73,8 +73,9 @@ jobs: run: | python -m pip uninstall -y paddlepaddle python -m pip uninstall -y paddlepaddle-gpu - python -m pip install --force-reinstall --no-deps --no-cache-dir -U --pre paddlepaddle-gpu \ + python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ + --extra-index-url https://pypi.org/simple \ --timeout 120 --retries 3 python -m pip install safetensors==0.6.2 python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" From db372678e8660620584df6e8885eaf042e8dd95a Mon Sep 17 00:00:00 2001 From: Manfredss Date: Mon, 15 Jun 2026 16:57:05 +0800 Subject: [PATCH 05/43] fix codestyle --- .gitattributes | 7 +++++++ .github/workflows/Install_check.yml | 4 ++-- .github/workflows/Modeltest_check.yml | 2 +- .github/workflows/Unittest_check.yml | 2 +- .github/workflows/Unittest_check_distribute.yml | 2 +- .github/workflows/Unittest_check_gpu.yml | 3 +-- .github/workflows/codestyle.yml | 4 ++-- .github/workflows/consistency.yml | 6 +++--- 8 files changed, 18 insertions(+), 12 deletions(-) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..f1c416fbf --- /dev/null +++ b/.gitattributes @@ -0,0 +1,7 @@ +# Enforce LF line endings for line-ending-sensitive files on every platform, +# so editors on Windows can't introduce CRLF (which breaks shell scripts and +# trips the codestyle / pre-commit checks). +*.sh text eol=lf +*.yml text eol=lf +*.yaml text eol=lf +*.py text eol=lf diff --git a/.github/workflows/Install_check.yml b/.github/workflows/Install_check.yml index 67ff49c6b..a49aef111 100644 --- a/.github/workflows/Install_check.yml +++ b/.github/workflows/Install_check.yml @@ -4,7 +4,7 @@ on: # Nightly: re-validate master against the freshly built nightly paddle # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 # schedule: - # - cron: '0 21 * * *' + # - cron: '0 21 * * *' pull_request: branches: [master, develop, "release/**"] paths-ignore: @@ -67,7 +67,7 @@ jobs: --extra-index-url https://pypi.org/simple \ --timeout 120 --retries 3 python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" - + - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/Modeltest_check.yml b/.github/workflows/Modeltest_check.yml index dfaaa51db..b7408f405 100644 --- a/.github/workflows/Modeltest_check.yml +++ b/.github/workflows/Modeltest_check.yml @@ -4,7 +4,7 @@ on: # Nightly: re-validate master against the freshly built nightly paddle # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 # schedule: - # - cron: '0 21 * * *' + # - cron: '0 21 * * *' pull_request: branches: [master, develop, 'release/**'] paths-ignore: diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index f4b38f114..3ebb8bd12 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -4,7 +4,7 @@ on: # Nightly: re-validate master against the freshly built nightly paddle # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 # schedule: - # - cron: '0 21 * * *' + # - cron: '0 21 * * *' pull_request: branches: [master, develop, "release/**"] paths-ignore: diff --git a/.github/workflows/Unittest_check_distribute.yml b/.github/workflows/Unittest_check_distribute.yml index ed8eda679..116c3df40 100644 --- a/.github/workflows/Unittest_check_distribute.yml +++ b/.github/workflows/Unittest_check_distribute.yml @@ -4,7 +4,7 @@ on: # Nightly: re-validate master against the freshly built nightly paddle # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 # schedule: - # - cron: '0 21 * * *' + # - cron: '0 21 * * *' pull_request: branches: [master, develop, "release/**"] paths-ignore: diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index 4d6b42888..dcddf6236 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -4,7 +4,7 @@ on: # Nightly: re-validate master against the freshly built nightly paddle # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 # schedule: - # - cron: '0 21 * * *' + # - cron: '0 21 * * *' pull_request: branches: [master, develop, "release/**"] paths-ignore: @@ -133,4 +133,3 @@ jobs: tests/**/pytest.log if-no-files-found: ignore retention-days: 14 - diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index b599f0617..48feb08ea 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -4,7 +4,7 @@ on: # Nightly: re-validate master against the freshly built nightly paddle # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 # schedule: - # - cron: '0 21 * * *' + # - cron: '0 21 * * *' pull_request: branches: [master, develop, "release/**"] paths-ignore: @@ -49,4 +49,4 @@ jobs: pip install pre-commit==2.17.0 - name: Run pre-commit - run: pre-commit run --all-files --show-diff-on-failure \ No newline at end of file + run: pre-commit run --all-files --show-diff-on-failure diff --git a/.github/workflows/consistency.yml b/.github/workflows/consistency.yml index 773f8ed31..accd5d3fa 100644 --- a/.github/workflows/consistency.yml +++ b/.github/workflows/consistency.yml @@ -4,10 +4,10 @@ on: # Nightly: re-validate master against the freshly built nightly paddle # Cron is UTC: '0 21 * * *' => 05:00 UTC+8 # schedule: - # - cron: '0 21 * * *' + # - cron: '0 21 * * *' pull_request: branches: [master, develop, "release/**"] - paths-ignore: + paths-ignore: - '**/*.md' - 'docs/**' - 'LICENSE' @@ -51,7 +51,7 @@ jobs: cache-dependency-path: | requirements.txt tests/requirements.txt - + - name: Install dependencies run: | python -m pip install --upgrade pip From a0719391deb37249193ae6bb7a7d0b17731d9376 Mon Sep 17 00:00:00 2001 From: manfredss Date: Tue, 16 Jun 2026 06:19:21 +0000 Subject: [PATCH 06/43] fix the EOF with pre-commit --- tests/test_block_diag.py | 214 +++++++++++++++---------------- tests/test_cartesian_prod.py | 178 ++++++++++++------------- tests/test_nn_ConvTranspose2d.py | 1 - tests/test_nn_ConvTranspose3d.py | 1 - 4 files changed, 196 insertions(+), 198 deletions(-) diff --git a/tests/test_block_diag.py b/tests/test_block_diag.py index c38b5d9cc..2e2952c8f 100644 --- a/tests/test_block_diag.py +++ b/tests/test_block_diag.py @@ -1,108 +1,108 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import textwrap - -from apibase import APIBase - -obj = APIBase("torch.block_diag") - - -def test_case_1(): - pytorch_code = textwrap.dedent( - """ - import torch - A = torch.tensor([[0, 1], [1, 0]]) - B = torch.tensor([[3, 4, 5], [6, 7, 8]]) - C = torch.tensor(7) - D = torch.tensor([1, 2, 3]) - E = torch.tensor([[4], [5], [6]]) - result = torch.block_diag(A, B, C, D, E) - """ - ) - obj.run(pytorch_code, ["result"]) - - -def test_case_2(): - pytorch_code = textwrap.dedent( - """ - import torch - A = torch.tensor([[4], [3], [2]]) - B = torch.tensor([7, 6, 5]) - C = torch.tensor(1) - result = torch.block_diag(torch.tensor([[4], [3], [2]]), - torch.tensor([7, 6, 5]), - torch.tensor(1)) - """ - ) - obj.run(pytorch_code, ["result"]) - - -def test_case_3(): - pytorch_code = textwrap.dedent( - """ - import torch - A = torch.tensor([[4], [3], [2]]) - B = torch.tensor([[5, 6], [9, 1]]) - C = torch.tensor([1, 2, 3]) - result = torch.block_diag(A) - """ - ) - obj.run(pytorch_code, ["result"]) - - -def test_case_4(): - pytorch_code = textwrap.dedent( - """ - import torch - A = torch.tensor([[4], [3], [2]]) - B = torch.tensor([[5], [6]]) - result = torch.block_diag(A, B, torch.tensor([1, 2, 3])) - """ - ) - obj.run(pytorch_code, ["result"]) - - -def test_case_5(): - pytorch_code = textwrap.dedent( - """ - import torch - tensors = torch.tensor([[0,1,2]]), torch.tensor([[0],[1]]), torch.tensor([[20]]) - result = torch.block_diag(*tensors) - """ - ) - obj.run(pytorch_code, ["result"]) - - -def test_case_6(): - pytorch_code = textwrap.dedent( - """ - import torch - result = torch.block_diag(torch.tensor([[4], [3], [2]])) - """ - ) - obj.run(pytorch_code, ["result"]) - - -def test_case_7(): - """Test with expression arguments""" - pytorch_code = textwrap.dedent( - """ - import torch - A = torch.tensor([[1, 2], [3, 4]]) - B = torch.tensor([[5, 6], [7, 8]]) - result = torch.block_diag(A + 1, B * 2) - """ - ) +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import textwrap + +from apibase import APIBase + +obj = APIBase("torch.block_diag") + + +def test_case_1(): + pytorch_code = textwrap.dedent( + """ + import torch + A = torch.tensor([[0, 1], [1, 0]]) + B = torch.tensor([[3, 4, 5], [6, 7, 8]]) + C = torch.tensor(7) + D = torch.tensor([1, 2, 3]) + E = torch.tensor([[4], [5], [6]]) + result = torch.block_diag(A, B, C, D, E) + """ + ) + obj.run(pytorch_code, ["result"]) + + +def test_case_2(): + pytorch_code = textwrap.dedent( + """ + import torch + A = torch.tensor([[4], [3], [2]]) + B = torch.tensor([7, 6, 5]) + C = torch.tensor(1) + result = torch.block_diag(torch.tensor([[4], [3], [2]]), + torch.tensor([7, 6, 5]), + torch.tensor(1)) + """ + ) + obj.run(pytorch_code, ["result"]) + + +def test_case_3(): + pytorch_code = textwrap.dedent( + """ + import torch + A = torch.tensor([[4], [3], [2]]) + B = torch.tensor([[5, 6], [9, 1]]) + C = torch.tensor([1, 2, 3]) + result = torch.block_diag(A) + """ + ) + obj.run(pytorch_code, ["result"]) + + +def test_case_4(): + pytorch_code = textwrap.dedent( + """ + import torch + A = torch.tensor([[4], [3], [2]]) + B = torch.tensor([[5], [6]]) + result = torch.block_diag(A, B, torch.tensor([1, 2, 3])) + """ + ) + obj.run(pytorch_code, ["result"]) + + +def test_case_5(): + pytorch_code = textwrap.dedent( + """ + import torch + tensors = torch.tensor([[0,1,2]]), torch.tensor([[0],[1]]), torch.tensor([[20]]) + result = torch.block_diag(*tensors) + """ + ) + obj.run(pytorch_code, ["result"]) + + +def test_case_6(): + pytorch_code = textwrap.dedent( + """ + import torch + result = torch.block_diag(torch.tensor([[4], [3], [2]])) + """ + ) + obj.run(pytorch_code, ["result"]) + + +def test_case_7(): + """Test with expression arguments""" + pytorch_code = textwrap.dedent( + """ + import torch + A = torch.tensor([[1, 2], [3, 4]]) + B = torch.tensor([[5, 6], [7, 8]]) + result = torch.block_diag(A + 1, B * 2) + """ + ) obj.run(pytorch_code, ["result"]) diff --git a/tests/test_cartesian_prod.py b/tests/test_cartesian_prod.py index 625094d1c..f8113cfea 100644 --- a/tests/test_cartesian_prod.py +++ b/tests/test_cartesian_prod.py @@ -1,90 +1,90 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import textwrap - -from apibase import APIBase - -obj = APIBase("torch.cartesian_prod") - - -def test_case_1(): - pytorch_code = textwrap.dedent( - """ - import torch - a = torch.tensor([1, 2, 3]) - b = torch.tensor([5, 6]) - result = torch.cartesian_prod(a, b) - """ - ) - obj.run(pytorch_code, ["result"]) - - -def test_case_2(): - pytorch_code = textwrap.dedent( - """ - import torch - a = torch.tensor([1, 2, 3]) - result = torch.cartesian_prod(a) - """ - ) - obj.run(pytorch_code, ["result"]) - - -def test_case_3(): - pytorch_code = textwrap.dedent( - """ - import torch - result = torch.cartesian_prod(torch.tensor([1, 2, 4, 5]), torch.tensor([5, 6]), torch.tensor([7])) - """ - ) - obj.run(pytorch_code, ["result"]) - - -def test_case_4(): - pytorch_code = textwrap.dedent( - """ - import torch - a = torch.tensor([1, 2, 3]) - b = torch.tensor([5, 6]) - c = (a, b) - result = torch.cartesian_prod(*c) - """ - ) - obj.run(pytorch_code, ["result"]) - - -def test_case_5(): - pytorch_code = textwrap.dedent( - """ - import torch - a = torch.tensor([1, 2, 3]) - b = torch.tensor([5, 6]) - result = torch.cartesian_prod(*[a, b]) - """ - ) - obj.run(pytorch_code, ["result"]) - - -def test_case_6(): - """Test with expression arguments""" - pytorch_code = textwrap.dedent( - """ - import torch - a = torch.tensor([1, 2]) - b = torch.tensor([3, 4]) - result = torch.cartesian_prod(a + 1, b * 2) - """ - ) +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import textwrap + +from apibase import APIBase + +obj = APIBase("torch.cartesian_prod") + + +def test_case_1(): + pytorch_code = textwrap.dedent( + """ + import torch + a = torch.tensor([1, 2, 3]) + b = torch.tensor([5, 6]) + result = torch.cartesian_prod(a, b) + """ + ) + obj.run(pytorch_code, ["result"]) + + +def test_case_2(): + pytorch_code = textwrap.dedent( + """ + import torch + a = torch.tensor([1, 2, 3]) + result = torch.cartesian_prod(a) + """ + ) + obj.run(pytorch_code, ["result"]) + + +def test_case_3(): + pytorch_code = textwrap.dedent( + """ + import torch + result = torch.cartesian_prod(torch.tensor([1, 2, 4, 5]), torch.tensor([5, 6]), torch.tensor([7])) + """ + ) + obj.run(pytorch_code, ["result"]) + + +def test_case_4(): + pytorch_code = textwrap.dedent( + """ + import torch + a = torch.tensor([1, 2, 3]) + b = torch.tensor([5, 6]) + c = (a, b) + result = torch.cartesian_prod(*c) + """ + ) + obj.run(pytorch_code, ["result"]) + + +def test_case_5(): + pytorch_code = textwrap.dedent( + """ + import torch + a = torch.tensor([1, 2, 3]) + b = torch.tensor([5, 6]) + result = torch.cartesian_prod(*[a, b]) + """ + ) + obj.run(pytorch_code, ["result"]) + + +def test_case_6(): + """Test with expression arguments""" + pytorch_code = textwrap.dedent( + """ + import torch + a = torch.tensor([1, 2]) + b = torch.tensor([3, 4]) + result = torch.cartesian_prod(a + 1, b * 2) + """ + ) obj.run(pytorch_code, ["result"]) diff --git a/tests/test_nn_ConvTranspose2d.py b/tests/test_nn_ConvTranspose2d.py index d1947e5bb..b2f5c4567 100644 --- a/tests/test_nn_ConvTranspose2d.py +++ b/tests/test_nn_ConvTranspose2d.py @@ -256,4 +256,3 @@ def test_case_15(): """ ) obj.run(pytorch_code, ["result"], check_value=False) - diff --git a/tests/test_nn_ConvTranspose3d.py b/tests/test_nn_ConvTranspose3d.py index e7e0e9184..8db588815 100644 --- a/tests/test_nn_ConvTranspose3d.py +++ b/tests/test_nn_ConvTranspose3d.py @@ -239,4 +239,3 @@ def test_case_14(): """ ) obj.run(pytorch_code, ["result"], check_value=False) - From d6b09c6beec15a91720be010206587cd9e135d0f Mon Sep 17 00:00:00 2001 From: manfredss Date: Wed, 17 Jun 2026 03:23:17 +0000 Subject: [PATCH 07/43] fix collection error --- .github/workflows/Unittest_check.yml | 4 ++-- .github/workflows/Unittest_check_gpu.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index 3ebb8bd12..a2d8ce393 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -91,11 +91,11 @@ jobs: echo "Checking code cpu unit test by pytest ..." # tee output to pytest.log so it can be uploaded as an artifact on failure; # PIPESTATUS[0] preserves pytest's exit code instead of tee's. - python -m pytest -v -s -p no:warnings -n 1 --reruns=3 ./tests 2>&1 | tee pytest.log + python -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log check_error=${PIPESTATUS[0]} if [ ${check_error} -ne 0 ]; then echo "Rerun cpu unit test check." - python -m pytest -v -s -p no:warnings -n 1 --lf ./tests 2>&1 | tee -a pytest.log + python -m pytest -v -s -p no:warnings --lf ./tests 2>&1 | tee -a pytest.log check_error=${PIPESTATUS[0]} fi diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index dcddf6236..5191dbdc5 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -99,11 +99,11 @@ jobs: echo "Checking code gpu unit test by pytest ..." # tee output to pytest.log so it can be uploaded as an artifact on failure; # PIPESTATUS[0] preserves pytest's exit code instead of tee's. - python -m pytest -v -s -p no:warnings -n 1 --reruns=3 ./tests 2>&1 | tee pytest.log + python -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log check_error=${PIPESTATUS[0]} if [ ${check_error} -ne 0 ]; then echo "Rerun gpu unit test check." - python -m pytest -v -s -p no:warnings -n 1 --lf ./tests 2>&1 | tee -a pytest.log + python -m pytest -v -s -p no:warnings --lf ./tests 2>&1 | tee -a pytest.log check_error=${PIPESTATUS[0]} fi From e73b051757240ff195e609aed1255ea6bba2b742 Mon Sep 17 00:00:00 2001 From: manfredss Date: Wed, 17 Jun 2026 07:04:17 +0000 Subject: [PATCH 08/43] add __init__.py to allow tests import apibase --- tests/flash_attn_tests/__init__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 tests/flash_attn_tests/__init__.py diff --git a/tests/flash_attn_tests/__init__.py b/tests/flash_attn_tests/__init__.py new file mode 100644 index 000000000..8766bb689 --- /dev/null +++ b/tests/flash_attn_tests/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# From 5c2738a3d3814041d533fba4189bc4aa858e42ed Mon Sep 17 00:00:00 2001 From: manfredss Date: Wed, 17 Jun 2026 07:59:52 +0000 Subject: [PATCH 09/43] change download source and change runing machine, as github hosted machine can't reach baidu internal network --- .github/workflows/Unittest_check.yml | 3 ++- .github/workflows/Unittest_check_distribute.yml | 4 +++- .github/workflows/Unittest_check_gpu.yml | 6 ++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index a2d8ce393..2499ffde6 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -33,7 +33,8 @@ jobs: ci-unittest: name: CI Unittest # TODO: replace with the actual self-hosted CPU runner group name - runs-on: ubuntu-24.04 + runs-on: + group: PaConvert timeout-minutes: 60 steps: diff --git a/.github/workflows/Unittest_check_distribute.yml b/.github/workflows/Unittest_check_distribute.yml index 116c3df40..a82c4d283 100644 --- a/.github/workflows/Unittest_check_distribute.yml +++ b/.github/workflows/Unittest_check_distribute.yml @@ -67,7 +67,9 @@ jobs: python -m pip install --upgrade pip python -m pip uninstall -y torchaudio python -m pip uninstall -y torchvision - python -m pip install torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + python -m pip install torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + -i https://pypi.tuna.tsinghua.edu.cn/simple \ + --timeout 120 --retries 3 python -c "import torch; print('torch version information:', torch.__version__)" - name: Install Latest develop GPU Version Paddle diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index 5191dbdc5..796722d71 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -65,8 +65,10 @@ jobs: run: | python -m pip install --upgrade pip python -m pip uninstall -y torchaudio - python -m pip install torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - python -m pip install torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + python -m pip install torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + -i https://pypi.tuna.tsinghua.edu.cn/simple \ + --timeout 120 --retries 3 python -c "import torch; print('torch version information:', torch.__version__)" - name: Install Latest develop GPU Version Paddle From b902e61b28e1546e7def7f1f9ff30c6615f69fc1 Mon Sep 17 00:00:00 2001 From: manfredss Date: Wed, 17 Jun 2026 09:03:40 +0000 Subject: [PATCH 10/43] add no proxy to gpu related workflow, enable downloading --- .github/workflows/Unittest_check_distribute.yml | 3 ++- .github/workflows/Unittest_check_gpu.yml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/Unittest_check_distribute.yml b/.github/workflows/Unittest_check_distribute.yml index a82c4d283..1eb2ddb2f 100644 --- a/.github/workflows/Unittest_check_distribute.yml +++ b/.github/workflows/Unittest_check_distribute.yml @@ -67,7 +67,8 @@ jobs: python -m pip install --upgrade pip python -m pip uninstall -y torchaudio python -m pip uninstall -y torchvision - python -m pip install torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + NO_PROXY='*' no_proxy='*' python -m pip install \ + torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ -i https://pypi.tuna.tsinghua.edu.cn/simple \ --timeout 120 --retries 3 python -c "import torch; print('torch version information:', torch.__version__)" diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index 796722d71..82ff54a75 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -65,7 +65,8 @@ jobs: run: | python -m pip install --upgrade pip python -m pip uninstall -y torchaudio - python -m pip install torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + NO_PROXY='*' no_proxy='*' python -m pip install \ + torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ -i https://pypi.tuna.tsinghua.edu.cn/simple \ --timeout 120 --retries 3 From 601ac9ad630ce69b82bbe293612887eea1717c55 Mon Sep 17 00:00:00 2001 From: manfredss Date: Wed, 17 Jun 2026 09:48:49 +0000 Subject: [PATCH 11/43] add test info --- .github/workflows/Unittest_check_gpu.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index 82ff54a75..43a6b2ad1 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -37,6 +37,16 @@ jobs: timeout-minutes: 120 steps: + - name: Diagnose network before checkout + run: | + echo "user=$(whoami)" + echo "home=$HOME" + echo "host=$(hostname)" + env | grep -i proxy || true + git config --global --list || true + curl -I https://github.com || true + curl -v https://github.com 2>&1 | head -120 || true + - name: Checkout repository uses: actions/checkout@v4 with: From 28994aa5d5a9a2e49105a90019c6fda58801eb0c Mon Sep 17 00:00:00 2001 From: manfredss Date: Wed, 17 Jun 2026 13:20:55 +0000 Subject: [PATCH 12/43] fix gpu proxy issue --- .github/workflows/Unittest_check_gpu.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index 43a6b2ad1..7da237b94 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -86,11 +86,12 @@ jobs: run: | python -m pip uninstall -y paddlepaddle python -m pip uninstall -y paddlepaddle-gpu - python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ + NO_PROXY='*' no_proxy='*' python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ - --extra-index-url https://pypi.org/simple \ + --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple \ --timeout 120 --retries 3 - python -m pip install safetensors==0.6.2 + NO_PROXY='*' no_proxy='*' python -m pip install safetensors==0.6.2 \ + -i https://pypi.tuna.tsinghua.edu.cn/simple python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" - name: Install paconvert requirements From 693c3987f4e4eefc86b971aeafd434e0e504a920 Mon Sep 17 00:00:00 2001 From: manfredss Date: Thu, 18 Jun 2026 03:19:37 +0000 Subject: [PATCH 13/43] delete debug job --- .github/workflows/Unittest_check_gpu.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index 7da237b94..dd26b9af3 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -37,16 +37,6 @@ jobs: timeout-minutes: 120 steps: - - name: Diagnose network before checkout - run: | - echo "user=$(whoami)" - echo "home=$HOME" - echo "host=$(hostname)" - env | grep -i proxy || true - git config --global --list || true - curl -I https://github.com || true - curl -v https://github.com 2>&1 | head -120 || true - - name: Checkout repository uses: actions/checkout@v4 with: From 60afd0f8e6602c06e6d4d7dc5f7f78b3eb3078d1 Mon Sep 17 00:00:00 2001 From: manfredss Date: Thu, 18 Jun 2026 08:11:56 +0000 Subject: [PATCH 14/43] remove --no-proxy when download from bcebos.com --- .github/workflows/Unittest_check_distribute.yml | 2 +- .github/workflows/Unittest_check_gpu.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/Unittest_check_distribute.yml b/.github/workflows/Unittest_check_distribute.yml index 1eb2ddb2f..43b013162 100644 --- a/.github/workflows/Unittest_check_distribute.yml +++ b/.github/workflows/Unittest_check_distribute.yml @@ -59,7 +59,7 @@ jobs: - name: Download GPU dependencies working-directory: tests/distributed run: | - wget --no-proxy -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + wget -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - name: Install Latest Release GPU Version Torch working-directory: tests/distributed diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index dd26b9af3..37587e579 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -58,8 +58,8 @@ jobs: - name: Download GPU dependencies run: | - wget --no-proxy -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - wget --no-proxy -q https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + wget -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + wget -q https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - name: Install Latest Release GPU Version Torch run: | From 3502f38b25e0efc5ea20d28b6a33f5c0606cb3ff Mon Sep 17 00:00:00 2001 From: manfredss Date: Thu, 18 Jun 2026 08:43:19 +0000 Subject: [PATCH 15/43] delete setup python, use python on self-hosted --- .github/workflows/Unittest_check_gpu.yml | 52 ++++++++++++------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index 37587e579..9113b35a0 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -42,59 +42,59 @@ jobs: with: fetch-depth: 0 - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: pip - cache-dependency-path: | - requirements.txt - tests/requirements.txt + # - name: Setup Python + # uses: actions/setup-python@v5 + # with: + # python-version: '3.10' + # cache: pip + # cache-dependency-path: | + # requirements.txt + # tests/requirements.txt - name: Check Env run: | - python --version + python3 --version pwd - name: Download GPU dependencies run: | - wget -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - wget -q https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + wget --no-proxy -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + wget --no-proxy -q https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - name: Install Latest Release GPU Version Torch run: | - python -m pip install --upgrade pip - python -m pip uninstall -y torchaudio - NO_PROXY='*' no_proxy='*' python -m pip install \ + python3 -m pip install --upgrade pip + python3 -m pip uninstall -y torchaudio + NO_PROXY='*' no_proxy='*' python3 -m pip install \ torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ -i https://pypi.tuna.tsinghua.edu.cn/simple \ --timeout 120 --retries 3 - python -c "import torch; print('torch version information:', torch.__version__)" + python3 -c "import torch; print('torch version information:', torch.__version__)" - name: Install Latest develop GPU Version Paddle run: | - python -m pip uninstall -y paddlepaddle - python -m pip uninstall -y paddlepaddle-gpu - NO_PROXY='*' no_proxy='*' python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ + python3 -m pip uninstall -y paddlepaddle + python3 -m pip uninstall -y paddlepaddle-gpu + NO_PROXY='*' no_proxy='*' python3 -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple \ --timeout 120 --retries 3 - NO_PROXY='*' no_proxy='*' python -m pip install safetensors==0.6.2 \ + NO_PROXY='*' no_proxy='*' python3 -m pip install safetensors==0.6.2 \ -i https://pypi.tuna.tsinghua.edu.cn/simple - python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" + python3 -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" - name: Install paconvert requirements run: | - python -m pip install -r requirements.txt + python3 -m pip install -r requirements.txt if [ -f tests/requirements.txt ]; then - python -m pip install -r tests/requirements.txt + python3 -m pip install -r tests/requirements.txt fi - name: Run GPU Unittest shell: bash run: | - python -m pip install pytest-timeout pytest-xdist pytest-rerunfailures + python3 -m pip install pytest-timeout pytest-xdist pytest-rerunfailures # Disable errexit so a failing run is captured instead of aborting immediately # A passing retry can clear the error. @@ -103,11 +103,11 @@ jobs: echo "Checking code gpu unit test by pytest ..." # tee output to pytest.log so it can be uploaded as an artifact on failure; # PIPESTATUS[0] preserves pytest's exit code instead of tee's. - python -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log + python3 -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log check_error=${PIPESTATUS[0]} if [ ${check_error} -ne 0 ]; then echo "Rerun gpu unit test check." - python -m pytest -v -s -p no:warnings --lf ./tests 2>&1 | tee -a pytest.log + python3 -m pytest -v -s -p no:warnings --lf ./tests 2>&1 | tee -a pytest.log check_error=${PIPESTATUS[0]} fi @@ -116,7 +116,7 @@ jobs: echo "Your PR code gpu unit test check failed." echo "Please run the following command:" echo "" - echo " python -m pytest tests" + echo " python3 -m pytest tests" echo "" echo "For more information, please refer to our check guide:" echo "https://github.com/PaddlePaddle/PaConvert#readme." From 7943042893be651191aea08dac947c2a85fe30f8 Mon Sep 17 00:00:00 2001 From: manfredss Date: Thu, 18 Jun 2026 08:54:38 +0000 Subject: [PATCH 16/43] remove --no-proxy when download from bcebos.com --- .github/workflows/Unittest_check_gpu.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index 9113b35a0..f910f69d2 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -58,8 +58,8 @@ jobs: - name: Download GPU dependencies run: | - wget --no-proxy -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - wget --no-proxy -q https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + wget -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + wget -q https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - name: Install Latest Release GPU Version Torch run: | From 543caaf320761b08caef6bcd63478b4e2b23a612 Mon Sep 17 00:00:00 2001 From: manfredss Date: Thu, 18 Jun 2026 09:15:54 +0000 Subject: [PATCH 17/43] add non-confidencial info output --- .github/workflows/Unittest_check_distribute.yml | 2 +- .github/workflows/Unittest_check_gpu.yml | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/Unittest_check_distribute.yml b/.github/workflows/Unittest_check_distribute.yml index 43b013162..bcb74acea 100644 --- a/.github/workflows/Unittest_check_distribute.yml +++ b/.github/workflows/Unittest_check_distribute.yml @@ -59,7 +59,7 @@ jobs: - name: Download GPU dependencies working-directory: tests/distributed run: | - wget -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + wget --no-verbose https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - name: Install Latest Release GPU Version Torch working-directory: tests/distributed diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index f910f69d2..7d746ecf0 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -58,8 +58,9 @@ jobs: - name: Download GPU dependencies run: | - wget -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - wget -q https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + wget --no-verbose https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + wget --no-verbose https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + echo "Downloaded GPU dependencies successfully!" - name: Install Latest Release GPU Version Torch run: | From 4f087f04d83871bc2f28a67c9ec1f0d0bb8cf049 Mon Sep 17 00:00:00 2001 From: manfredss Date: Thu, 18 Jun 2026 09:32:45 +0000 Subject: [PATCH 18/43] print even more info --- .../workflows/Unittest_check_distribute.yml | 46 +++++++++---------- .github/workflows/Unittest_check_gpu.yml | 6 +++ 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/.github/workflows/Unittest_check_distribute.yml b/.github/workflows/Unittest_check_distribute.yml index bcb74acea..5ea54bad7 100644 --- a/.github/workflows/Unittest_check_distribute.yml +++ b/.github/workflows/Unittest_check_distribute.yml @@ -42,18 +42,18 @@ jobs: with: fetch-depth: 0 - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: pip - cache-dependency-path: | - requirements.txt - tests/requirements.txt + # - name: Setup Python + # uses: actions/setup-python@v5 + # with: + # python-version: '3.10' + # cache: pip + # cache-dependency-path: | + # requirements.txt + # tests/requirements.txt - name: Check Env run: | - python --version + python3 --version pwd - name: Download GPU dependencies @@ -64,34 +64,34 @@ jobs: - name: Install Latest Release GPU Version Torch working-directory: tests/distributed run: | - python -m pip install --upgrade pip - python -m pip uninstall -y torchaudio - python -m pip uninstall -y torchvision - NO_PROXY='*' no_proxy='*' python -m pip install \ + python3 -m pip install --upgrade pip + python3 -m pip uninstall -y torchaudio + python3 -m pip uninstall -y torchvision + NO_PROXY='*' no_proxy='*' python3 -m pip install \ torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ -i https://pypi.tuna.tsinghua.edu.cn/simple \ --timeout 120 --retries 3 - python -c "import torch; print('torch version information:', torch.__version__)" + python3 -c "import torch; print('torch version information:', torch.__version__)" - name: Install Latest develop GPU Version Paddle run: | - python -m pip uninstall -y paddlepaddle - python -m pip uninstall -y paddlepaddle-gpu - python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ + python3 -m pip uninstall -y paddlepaddle + python3 -m pip uninstall -y paddlepaddle-gpu + python3 -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ --extra-index-url https://pypi.org/simple \ --timeout 120 --retries 3 - python -m pip install safetensors==0.6.2 - python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" + python3 -m pip install safetensors==0.6.2 + python3 -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" - name: Install paconvert requirements run: | - python -m pip install -r requirements.txt + python3 -m pip install -r requirements.txt - name: Convert torch code to paddle working-directory: tests/distributed run: | - python ../../paconvert/main.py -i . -o /tmp/paddle_dist --log_level "DEBUG" + python3 ../../paconvert/main.py -i . -o /tmp/paddle_dist --log_level "DEBUG" - name: Run distributed unit tests working-directory: tests/distributed @@ -111,8 +111,8 @@ jobs: test_list=$(ls *.py | grep -v run_and_compare.py) for item in $test_list; do cmd1="torchrun --nproc_per_node=2 ${item}" - cmd2="python -m paddle.distributed.launch /tmp/paddle_dist/${item}" - python run_and_compare.py "$cmd1" "$cmd2" + cmd2="python3 -m paddle.distributed.launch /tmp/paddle_dist/${item}" + python3 run_and_compare.py "$cmd1" "$cmd2" tmp_check_error=$? if [ $tmp_check_error -ne 0 ]; then check_error=1 diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index 7d746ecf0..d6c7c0a7e 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -58,8 +58,14 @@ jobs: - name: Download GPU dependencies run: | + echo "Downloading torch wheel..." wget --no-verbose https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + ls -lh torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + + echo "Downloading torchvision wheel..." wget --no-verbose https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + ls -lh torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + echo "Downloaded GPU dependencies successfully!" - name: Install Latest Release GPU Version Torch From b2f836cb98123e4ac768403b4494477afd71e5fb Mon Sep 17 00:00:00 2001 From: manfredss Date: Thu, 18 Jun 2026 09:54:51 +0000 Subject: [PATCH 19/43] pre-install torch and torchvision to avoid lond time waiting --- .../workflows/Unittest_check_distribute.yml | 35 ++++++++------- .github/workflows/Unittest_check_gpu.yml | 44 +++++++++---------- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/.github/workflows/Unittest_check_distribute.yml b/.github/workflows/Unittest_check_distribute.yml index 5ea54bad7..df9cfacaa 100644 --- a/.github/workflows/Unittest_check_distribute.yml +++ b/.github/workflows/Unittest_check_distribute.yml @@ -56,22 +56,25 @@ jobs: python3 --version pwd - - name: Download GPU dependencies - working-directory: tests/distributed - run: | - wget --no-verbose https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - - - name: Install Latest Release GPU Version Torch - working-directory: tests/distributed - run: | - python3 -m pip install --upgrade pip - python3 -m pip uninstall -y torchaudio - python3 -m pip uninstall -y torchvision - NO_PROXY='*' no_proxy='*' python3 -m pip install \ - torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ - -i https://pypi.tuna.tsinghua.edu.cn/simple \ - --timeout 120 --retries 3 - python3 -c "import torch; print('torch version information:', torch.__version__)" + # - name: Download GPU dependencies + # working-directory: tests/distributed + # run: | + # echo "Download torch wheel..." + # wget --no-verbose https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + # ls -lh torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + # echo "Download torch wheel successful!" + + # - name: Install Latest Release GPU Version Torch + # working-directory: tests/distributed + # run: | + # python3 -m pip install --upgrade pip + # python3 -m pip uninstall -y torchaudio + # python3 -m pip uninstall -y torchvision + # NO_PROXY='*' no_proxy='*' python3 -m pip install \ + # torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + # -i https://pypi.tuna.tsinghua.edu.cn/simple \ + # --timeout 120 --retries 3 + # python3 -c "import torch; print('torch version information:', torch.__version__)" - name: Install Latest develop GPU Version Paddle run: | diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index d6c7c0a7e..cc9cf0166 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -56,28 +56,28 @@ jobs: python3 --version pwd - - name: Download GPU dependencies - run: | - echo "Downloading torch wheel..." - wget --no-verbose https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - ls -lh torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - - echo "Downloading torchvision wheel..." - wget --no-verbose https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - ls -lh torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - - echo "Downloaded GPU dependencies successfully!" - - - name: Install Latest Release GPU Version Torch - run: | - python3 -m pip install --upgrade pip - python3 -m pip uninstall -y torchaudio - NO_PROXY='*' no_proxy='*' python3 -m pip install \ - torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ - torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ - -i https://pypi.tuna.tsinghua.edu.cn/simple \ - --timeout 120 --retries 3 - python3 -c "import torch; print('torch version information:', torch.__version__)" + # - name: Download GPU dependencies + # run: | + # echo "Downloading torch wheel..." + # wget --no-verbose https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + # ls -lh torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + + # echo "Downloading torchvision wheel..." + # wget --no-verbose https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + # ls -lh torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + + # echo "Downloaded GPU dependencies successfully!" + + # - name: Install Latest Release GPU Version Torch + # run: | + # python3 -m pip install --upgrade pip + # python3 -m pip uninstall -y torchaudio + # NO_PROXY='*' no_proxy='*' python3 -m pip install \ + # torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + # torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + # -i https://pypi.tuna.tsinghua.edu.cn/simple \ + # --timeout 120 --retries 3 + # python3 -c "import torch; print('torch version information:', torch.__version__)" - name: Install Latest develop GPU Version Paddle run: | From d19cb4b4f29949fd541863e90ef49fe2ed172df3 Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 22 Jun 2026 07:02:14 +0000 Subject: [PATCH 20/43] move the CPU CI to docker, test if it works. --- .github/workflows/Unittest_check.yml | 180 +++++++++++++++----------- .github/workflows/build_ci_images.yml | 42 ++++++ scripts/ci/run_cpu_unittest.sh | 62 +++++++++ tools/docker/Dockerfile | 18 ++- 4 files changed, 220 insertions(+), 82 deletions(-) create mode 100644 .github/workflows/build_ci_images.yml create mode 100644 scripts/ci/run_cpu_unittest.sh diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index 2499ffde6..18c551478 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -32,7 +32,6 @@ concurrency: jobs: ci-unittest: name: CI Unittest - # TODO: replace with the actual self-hosted CPU runner group name runs-on: group: PaConvert timeout-minutes: 60 @@ -43,86 +42,111 @@ jobs: with: fetch-depth: 0 - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: pip - cache-dependency-path: | - requirements.txt - tests/requirements.txt - - - name: Check Env - run: | - python --version - pwd - - - name: Install Latest Release CPU Version Torch + - name: Ensure CI image Exists run: | - python -m pip install --upgrade pip - python -m pip install -U torch torchvision --index-url https://download.pytorch.org/whl/cpu - python -c "import torch; print('torch version information:', torch.__version__)" + docker image inspect paconvert-ci:cu118 >/dev/null 2>&1 || \ + docker build --pull -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - - name: Install Latest develop CPU Version Paddle + - name: Start Container run: | - python -m pip uninstall -y paddlepaddle - python -m pip uninstall -y paddlepaddle-gpu - python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle \ - -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ - --extra-index-url https://pypi.org/simple \ - --timeout 120 --retries 3 - python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" - - - name: Install paconvert requirements + container_name="paconvert-cpu-unittest-${{ github.run_id }}-${{ github.run_attempt }}" + echo "container_name=${container_name}" >> "$GITHUB_ENV" + docker run -d --name "${container_name}" \ + -e NVIDIA_VISIBLE_DEVICES=void \ + --network host \ + -v /dev/shm:/dev/shm \ + -v "${{ github.workspace }}:/ws" \ + -w /ws \ + paconvert-ci:cu118 sleep infinity + + - name: Cleanup Container + if: always() run: | - python -m pip install -r requirements.txt - if [ -f tests/requirements.txt ]; then - python -m pip install -r tests/requirements.txt + if [ -n "${container_name:-}" ]; then + docker exec "${container_name}" chown -R "$(id -u):$(id -g)" /ws 2>/dev/null || true + docker rm -f "${container_name}" || true fi - - name: Run CI Unittest - shell: bash - run: | - python -m pip install pytest-timeout pytest-xdist pytest-rerunfailures - - # Disable errexit so a failing run is captured instead of aborting immediately - # A passing retry can clear the error. - set +e - - echo "Checking code cpu unit test by pytest ..." - # tee output to pytest.log so it can be uploaded as an artifact on failure; - # PIPESTATUS[0] preserves pytest's exit code instead of tee's. - python -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log - check_error=${PIPESTATUS[0]} - if [ ${check_error} -ne 0 ]; then - echo "Rerun cpu unit test check." - python -m pytest -v -s -p no:warnings --lf ./tests 2>&1 | tee -a pytest.log - check_error=${PIPESTATUS[0]} - fi - - echo '************************************************************************************************************' - if [ ${check_error} -ne 0 ]; then - echo "Your PR code cpu unit test check failed." - echo "Please run the following command:" - echo "" - echo " python -m pytest tests" - echo "" - echo "For more information, please refer to our check guide:" - echo "https://github.com/PaddlePaddle/PaConvert#readme." - else - echo "Your PR code cpu unit test check passed." - fi - echo '************************************************************************************************************' - - exit ${check_error} - - - name: Upload logs on failure - if: failure() - uses: actions/upload-artifact@v4 - with: - name: ci-unittest-log-${{ github.run_id }} - path: | - pytest.log - tests/**/pytest.log - if-no-files-found: ignore - retention-days: 14 + # - name: Setup Python + # uses: actions/setup-python@v5 + # with: + # python-version: '3.10' + # cache: pip + # cache-dependency-path: | + # requirements.txt + # tests/requirements.txt + + # - name: Check Env + # run: | + # python --version + # pwd + + # - name: Install Latest Release CPU Version Torch + # run: | + # python -m pip install --upgrade pip + # python -m pip install -U torch torchvision --index-url https://download.pytorch.org/whl/cpu + # python -c "import torch; print('torch version information:', torch.__version__)" + + # - name: Install Latest develop CPU Version Paddle + # run: | + # python -m pip uninstall -y paddlepaddle + # python -m pip uninstall -y paddlepaddle-gpu + # python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle \ + # -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ + # --extra-index-url https://pypi.org/simple \ + # --timeout 120 --retries 3 + # python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" + + # - name: Install paconvert requirements + # run: | + # python -m pip install -r requirements.txt + # if [ -f tests/requirements.txt ]; then + # python -m pip install -r tests/requirements.txt + # fi + + # - name: Run CI Unittest + # shell: bash + # run: | + # python -m pip install pytest-timeout pytest-xdist pytest-rerunfailures + + # # Disable errexit so a failing run is captured instead of aborting immediately + # # A passing retry can clear the error. + # set +e + + # echo "Checking code cpu unit test by pytest ..." + # # tee output to pytest.log so it can be uploaded as an artifact on failure; + # # PIPESTATUS[0] preserves pytest's exit code instead of tee's. + # python -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log + # check_error=${PIPESTATUS[0]} + # if [ ${check_error} -ne 0 ]; then + # echo "Rerun cpu unit test check." + # python -m pytest -v -s -p no:warnings --lf ./tests 2>&1 | tee -a pytest.log + # check_error=${PIPESTATUS[0]} + # fi + + # echo '************************************************************************************************************' + # if [ ${check_error} -ne 0 ]; then + # echo "Your PR code cpu unit test check failed." + # echo "Please run the following command:" + # echo "" + # echo " python -m pytest tests" + # echo "" + # echo "For more information, please refer to our check guide:" + # echo "https://github.com/PaddlePaddle/PaConvert#readme." + # else + # echo "Your PR code cpu unit test check passed." + # fi + # echo '************************************************************************************************************' + + # exit ${check_error} + + # - name: Upload logs on failure + # if: failure() + # uses: actions/upload-artifact@v4 + # with: + # name: ci-unittest-log-${{ github.run_id }} + # path: | + # pytest.log + # tests/**/pytest.log + # if-no-files-found: ignore + # retention-days: 14 diff --git a/.github/workflows/build_ci_images.yml b/.github/workflows/build_ci_images.yml new file mode 100644 index 000000000..f55fd5eba --- /dev/null +++ b/.github/workflows/build_ci_images.yml @@ -0,0 +1,42 @@ +name : Build CI Image + +on: + push: + branches: [master, develop, migrate_workflow] + paths: + - 'tools/docker/**' + - '.github/workflows/build_ci_image.yml' + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-ci-image: + name: Build CI Image + runs-on: + group: PaConvert + timeout-minutes: 100 + + steps: + - name: Checkout Repo + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Build Image + run: | + docker build --pull -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker + + - name: GPU Test + run: | + docker run --rm --gpus all paconvert-ci:cu118 nvidia-smi + + - name: Image Build Summary + run: | + docker image inspect paconvert-ci:cu118 \ + --format 'paconvert-ci:cu118 size={{.Size}} created={{.Created}}' diff --git a/scripts/ci/run_cpu_unittest.sh b/scripts/ci/run_cpu_unittest.sh new file mode 100644 index 000000000..4a8e45089 --- /dev/null +++ b/scripts/ci/run_cpu_unittest.sh @@ -0,0 +1,62 @@ +# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -eo pipefail + +echo '******************************************************************************' +echo "Installing develop CPU version paddle" +python -m pip uninstall -y paddlepaddle paddlepaddle-gpu || true +python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle \ + -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ + -extra-index-url https://pypi.org/simple \ + --timeout 120 --retries 3 +python -c "import paddle; print('paddle version: ', paddle.__version__); print('paddle commit info: ', paddle.__git_commit__)" + +echo '******************************************************************************' +echo "Installing paconvert requirements" +python -m pip install -r requirements.txt +if [ -f tests/requirements.txt ]; then + python -m pip install -r tests/requirements.txt +fi + +echo '******************************************************************************' +python -c "import torch; print('torch version: ', torch.__version__, '| cuda available: ', torch.cuda.is_available())" + +echo '******************************************************************************' +echo "Checking code cpu unit test by pytest ..." +set +e +python -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log +check_errors=${PIPESTATUS[0]} +if [ ${check_errors} -ne 0 ]; then + echo "Rerun CPU unit test" + python -m pytest -v -s -p no:warnings ./tests 2>&1 | tee -a pytest.log + check_errors=${PIPESTATUS[0]} +fi + +echo '******************************************************************************' +if [ ${check_errors} -eq 0 ]; then + echo "Your PR code CPU unit test check FAILED" + echo "Please run the following command:" + echo "" + echo " pytest -m pytest tests" + echo "" + echo "For more information, please refer to our check guides:" + echo "https://github.com/paddlepaddle/paconvert#readme" +else + echo "All tests PASSED!" +fi +echo '******************************************************************************' + +exit ${check_errors} diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile index e617424da..a22dd1981 100644 --- a/tools/docker/Dockerfile +++ b/tools/docker/Dockerfile @@ -8,14 +8,24 @@ RUN ln -sf `which python3.10` /usr/bin/python RUN ln -sf `which pip3.10` /usr/local/bin/pip -RUN python -m pip install astor +RUN python -m pip install --no-cache-dir \ + astor \ + pandas \ + openpyxl \ + pytest-timeout \ + pytest-rerunfailures -RUN python -m pip install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 +RUN wget -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl && \ + python -m pip install --no-cache-dir \ + torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl && \ + rm -f torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb && \ dpkg -i cuda-keyring_1.1-1_all.deb && \ rm cuda-keyring_1.1-1_all.deb RUN apt-get update && \ - apt-get install -y \ - cudnn9-cuda-11 + apt-get install -y cudnn9-cuda-11 From f7d9696c27fbe206431ef1c6e2cc40595d3ec234 Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 22 Jun 2026 08:42:47 +0000 Subject: [PATCH 21/43] add tsinghua source --- tools/docker/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile index a22dd1981..64a3bd6ed 100644 --- a/tools/docker/Dockerfile +++ b/tools/docker/Dockerfile @@ -1,6 +1,8 @@ # A image for testing PaConvert FROM registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8.5-gcc82 +ENV PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple + RUN apt-get update && \ apt-get install -y net-tools From ba6e2cab3dadf8b53e9d13b489e392180a89ffe5 Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 22 Jun 2026 08:58:17 +0000 Subject: [PATCH 22/43] fix downloading errur --- .github/workflows/Unittest_check.yml | 2 +- .github/workflows/build_ci_images.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index 18c551478..d46e01f22 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -45,7 +45,7 @@ jobs: - name: Ensure CI image Exists run: | docker image inspect paconvert-ci:cu118 >/dev/null 2>&1 || \ - docker build --pull -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker + docker build --pull --network=host -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - name: Start Container run: | diff --git a/.github/workflows/build_ci_images.yml b/.github/workflows/build_ci_images.yml index f55fd5eba..3fa5bdac9 100644 --- a/.github/workflows/build_ci_images.yml +++ b/.github/workflows/build_ci_images.yml @@ -30,7 +30,7 @@ jobs: - name: Build Image run: | - docker build --pull -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker + docker build --pull --network=host -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - name: GPU Test run: | From 02684db379504e9d083a01e74ea6c4c11d90352b Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 22 Jun 2026 09:12:59 +0000 Subject: [PATCH 23/43] fix and test --- .github/workflows/Unittest_check.yml | 6 +++++- .github/workflows/build_ci_images.yml | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index d46e01f22..02e578f28 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -45,7 +45,11 @@ jobs: - name: Ensure CI image Exists run: | docker image inspect paconvert-ci:cu118 >/dev/null 2>&1 || \ - docker build --pull --network=host -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker + docker build --pull --network=host \ + --build-arg http_proxy=$http_proxy \ + --build-arg https_proxy=$https_proxy \ + --build-arg no_proxy=".baidu.com,.bcebos.com,.tuna.tsinghua.edu.cn" \ + -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - name: Start Container run: | diff --git a/.github/workflows/build_ci_images.yml b/.github/workflows/build_ci_images.yml index 3fa5bdac9..021cd6559 100644 --- a/.github/workflows/build_ci_images.yml +++ b/.github/workflows/build_ci_images.yml @@ -30,7 +30,11 @@ jobs: - name: Build Image run: | - docker build --pull --network=host -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker + docker build --pull --network=host \ + --build-arg http_proxy=$http_proxy \ + --build-arg https_proxy=$https_proxy \ + --build-arg no_proxy=".baidu.com,.bcebos.com,.tuna.tsinghua.edu.cn" \ + -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - name: GPU Test run: | From a8b298e6cc6c768b13ecb9e8289dfc74f081d1a9 Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 22 Jun 2026 09:15:13 +0000 Subject: [PATCH 24/43] remove tsinghua --- .github/workflows/Unittest_check.yml | 2 +- .github/workflows/build_ci_images.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index 02e578f28..fdb394b17 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -48,7 +48,7 @@ jobs: docker build --pull --network=host \ --build-arg http_proxy=$http_proxy \ --build-arg https_proxy=$https_proxy \ - --build-arg no_proxy=".baidu.com,.bcebos.com,.tuna.tsinghua.edu.cn" \ + --build-arg no_proxy=".baidu.com,.bcebos.com" \ -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - name: Start Container diff --git a/.github/workflows/build_ci_images.yml b/.github/workflows/build_ci_images.yml index 021cd6559..ca03ba0b9 100644 --- a/.github/workflows/build_ci_images.yml +++ b/.github/workflows/build_ci_images.yml @@ -33,7 +33,7 @@ jobs: docker build --pull --network=host \ --build-arg http_proxy=$http_proxy \ --build-arg https_proxy=$https_proxy \ - --build-arg no_proxy=".baidu.com,.bcebos.com,.tuna.tsinghua.edu.cn" \ + --build-arg no_proxy=".baidu.com,.bcebos.com" \ -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - name: GPU Test From d863553f66a8c06cb4000b519df2dec74a02d30d Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 22 Jun 2026 09:51:09 +0000 Subject: [PATCH 25/43] add retries and fallback --- tools/docker/Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile index 64a3bd6ed..878d8b2fe 100644 --- a/tools/docker/Dockerfile +++ b/tools/docker/Dockerfile @@ -3,8 +3,8 @@ FROM registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8 ENV PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple -RUN apt-get update && \ - apt-get install -y net-tools +RUN apt-get -o Acquire::Retries=5 update || true; \ + apt-get -o Acquire::Retries=5 install -y net-tools || true RUN ln -sf `which python3.10` /usr/bin/python @@ -25,9 +25,9 @@ RUN wget -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310 rm -f torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl -RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb && \ +RUN wget --retries=5 --timeout=60 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb && \ dpkg -i cuda-keyring_1.1-1_all.deb && \ rm cuda-keyring_1.1-1_all.deb -RUN apt-get update && \ - apt-get install -y cudnn9-cuda-11 +RUN apt-get -o Acquire::Retries=5 update && \ + apt-get -o Acquire::Retries=5 install -y cudnn9-cuda-11 From a4847a89736a2f5df34d6b6d315dc1cc97a58c71 Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 22 Jun 2026 11:08:28 +0000 Subject: [PATCH 26/43] add no proxy to exempt download bosbce --- tools/docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile index 878d8b2fe..610143577 100644 --- a/tools/docker/Dockerfile +++ b/tools/docker/Dockerfile @@ -17,8 +17,8 @@ RUN python -m pip install --no-cache-dir \ pytest-timeout \ pytest-rerunfailures -RUN wget -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ - https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl && \ +RUN wget --no-proxy -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl && \ python -m pip install --no-cache-dir \ torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl && \ From 3ad52d360386ad7895bd5af95104dcc4fbcdb4ef Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 22 Jun 2026 11:28:23 +0000 Subject: [PATCH 27/43] adjust proxy setting --- .github/workflows/Unittest_check.yml | 2 +- tools/docker/Dockerfile | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index fdb394b17..0517ecaf6 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -48,7 +48,7 @@ jobs: docker build --pull --network=host \ --build-arg http_proxy=$http_proxy \ --build-arg https_proxy=$https_proxy \ - --build-arg no_proxy=".baidu.com,.bcebos.com" \ + # --build-arg no_proxy="localhost,127.0.0.1" \ -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - name: Start Container diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile index 610143577..20360b15d 100644 --- a/tools/docker/Dockerfile +++ b/tools/docker/Dockerfile @@ -17,15 +17,15 @@ RUN python -m pip install --no-cache-dir \ pytest-timeout \ pytest-rerunfailures -RUN wget --no-proxy -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ - https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl && \ +RUN wget -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl && \ python -m pip install --no-cache-dir \ torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl && \ rm -f torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl -RUN wget --retries=5 --timeout=60 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb && \ +RUN wget --tries=5 --timeout=60 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb && \ dpkg -i cuda-keyring_1.1-1_all.deb && \ rm cuda-keyring_1.1-1_all.deb From 128f56ee9d612eb1d8ff6b28409db5eea3bf3234 Mon Sep 17 00:00:00 2001 From: manfredss Date: Mon, 22 Jun 2026 11:33:38 +0000 Subject: [PATCH 28/43] fix --- .github/workflows/Unittest_check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index 0517ecaf6..9fd90ee23 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -48,7 +48,7 @@ jobs: docker build --pull --network=host \ --build-arg http_proxy=$http_proxy \ --build-arg https_proxy=$https_proxy \ - # --build-arg no_proxy="localhost,127.0.0.1" \ + --build-arg no_proxy="localhost,127.0.0.1" \ -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - name: Start Container From 7c6a2d2548f32cb0085e44776e46e2e2e744876c Mon Sep 17 00:00:00 2001 From: manfredss Date: Tue, 23 Jun 2026 01:49:52 +0000 Subject: [PATCH 29/43] pass the update --- tools/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile index 20360b15d..ca109dfbe 100644 --- a/tools/docker/Dockerfile +++ b/tools/docker/Dockerfile @@ -29,5 +29,5 @@ RUN wget --tries=5 --timeout=60 https://developer.download.nvidia.com/compute/cu dpkg -i cuda-keyring_1.1-1_all.deb && \ rm cuda-keyring_1.1-1_all.deb -RUN apt-get -o Acquire::Retries=5 update && \ +RUN apt-get -o Acquire::Retries=5 update || true; \ apt-get -o Acquire::Retries=5 install -y cudnn9-cuda-11 From 590df4dd328d8811e994f0e31507459347870b34 Mon Sep 17 00:00:00 2001 From: manfredss Date: Tue, 23 Jun 2026 03:52:23 +0000 Subject: [PATCH 30/43] run the cpu test --- .github/workflows/Unittest_check.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index 9fd90ee23..5b8aefc8e 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -57,12 +57,30 @@ jobs: echo "container_name=${container_name}" >> "$GITHUB_ENV" docker run -d --name "${container_name}" \ -e NVIDIA_VISIBLE_DEVICES=void \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy="localhost,127.0.0.1" \ --network host \ -v /dev/shm:/dev/shm \ -v "${{ github.workspace }}:/ws" \ -w /ws \ paconvert-ci:cu118 sleep infinity + - name: Run CI Unittest + shell: bash + run: docker exec "$container_name" bash /scripts/ci/run_cpu_unittest.sh + + - name: Upload logs on failing + if: failure() + uses: actions/upload-artifact@v4 + with: + name: ci-unittest-log-${{ github.run_id }} + path: | + pytest.log + tests/**/pytest.log + if-no-files-found: ignore + retention-days: 14 + - name: Cleanup Container if: always() run: | From e6737a1b0f84b05c00442bfd42821c7ed4649671 Mon Sep 17 00:00:00 2001 From: manfredss Date: Tue, 23 Jun 2026 04:00:21 +0000 Subject: [PATCH 31/43] fix dir --- .github/workflows/Unittest_check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index 5b8aefc8e..ec3394cb6 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -68,7 +68,7 @@ jobs: - name: Run CI Unittest shell: bash - run: docker exec "$container_name" bash /scripts/ci/run_cpu_unittest.sh + run: docker exec "$container_name" bash scripts/ci/run_cpu_unittest.sh - name: Upload logs on failing if: failure() From ae92ec08436a5c581174b60fa76868e9e75774f3 Mon Sep 17 00:00:00 2001 From: manfredss Date: Tue, 23 Jun 2026 04:56:35 +0000 Subject: [PATCH 32/43] fix cmd --- scripts/ci/run_cpu_unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci/run_cpu_unittest.sh b/scripts/ci/run_cpu_unittest.sh index 4a8e45089..62a320238 100644 --- a/scripts/ci/run_cpu_unittest.sh +++ b/scripts/ci/run_cpu_unittest.sh @@ -20,7 +20,7 @@ echo "Installing develop CPU version paddle" python -m pip uninstall -y paddlepaddle paddlepaddle-gpu || true python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle \ -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ - -extra-index-url https://pypi.org/simple \ + --extra-index-url https://pypi.org/simple \ --timeout 120 --retries 3 python -c "import paddle; print('paddle version: ', paddle.__version__); print('paddle commit info: ', paddle.__git_commit__)" From edac24d4c92cfce5b6c7a34c0accaa13d51a664a Mon Sep 17 00:00:00 2001 From: manfredss Date: Tue, 23 Jun 2026 05:52:58 +0000 Subject: [PATCH 33/43] add tsinghua --- scripts/ci/run_cpu_unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci/run_cpu_unittest.sh b/scripts/ci/run_cpu_unittest.sh index 62a320238..fe807eec5 100644 --- a/scripts/ci/run_cpu_unittest.sh +++ b/scripts/ci/run_cpu_unittest.sh @@ -20,7 +20,7 @@ echo "Installing develop CPU version paddle" python -m pip uninstall -y paddlepaddle paddlepaddle-gpu || true python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle \ -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ - --extra-index-url https://pypi.org/simple \ + --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple \ --timeout 120 --retries 3 python -c "import paddle; print('paddle version: ', paddle.__version__); print('paddle commit info: ', paddle.__git_commit__)" From 64bd74fee061f3adab14b399af2e0d9ec43964a0 Mon Sep 17 00:00:00 2001 From: manfredss Date: Tue, 23 Jun 2026 08:10:49 +0000 Subject: [PATCH 34/43] add cpu docker, fix cpu CI --- .github/workflows/Unittest_check.yml | 6 +++--- scripts/ci/run_cpu_unittest.sh | 4 ++-- tools/docker/Dockerfile_CPU | 22 ++++++++++++++++++++++ 3 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 tools/docker/Dockerfile_CPU diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index ec3394cb6..ab88cab98 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -44,12 +44,12 @@ jobs: - name: Ensure CI image Exists run: | - docker image inspect paconvert-ci:cu118 >/dev/null 2>&1 || \ + docker image inspect paconvert-ci:cpu >/dev/null 2>&1 || \ docker build --pull --network=host \ --build-arg http_proxy=$http_proxy \ --build-arg https_proxy=$https_proxy \ --build-arg no_proxy="localhost,127.0.0.1" \ - -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker + -t paconvert-ci:cpu -f tools/docker/Dockerfile_CPU tools/docker - name: Start Container run: | @@ -64,7 +64,7 @@ jobs: -v /dev/shm:/dev/shm \ -v "${{ github.workspace }}:/ws" \ -w /ws \ - paconvert-ci:cu118 sleep infinity + paconvert-ci:cpu sleep infinity - name: Run CI Unittest shell: bash diff --git a/scripts/ci/run_cpu_unittest.sh b/scripts/ci/run_cpu_unittest.sh index fe807eec5..508ca19cc 100644 --- a/scripts/ci/run_cpu_unittest.sh +++ b/scripts/ci/run_cpu_unittest.sh @@ -41,12 +41,12 @@ python -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log check_errors=${PIPESTATUS[0]} if [ ${check_errors} -ne 0 ]; then echo "Rerun CPU unit test" - python -m pytest -v -s -p no:warnings ./tests 2>&1 | tee -a pytest.log + python -m pytest -v -s -p no:warnings --lf ./tests 2>&1 | tee -a pytest.log check_errors=${PIPESTATUS[0]} fi echo '******************************************************************************' -if [ ${check_errors} -eq 0 ]; then +if [ ${check_errors} -ne 0 ]; then echo "Your PR code CPU unit test check FAILED" echo "Please run the following command:" echo "" diff --git a/tools/docker/Dockerfile_CPU b/tools/docker/Dockerfile_CPU new file mode 100644 index 000000000..b5a2db228 --- /dev/null +++ b/tools/docker/Dockerfile_CPU @@ -0,0 +1,22 @@ +# A image for testing PaConvert +FROM registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8.5-gcc82 + +ENV PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple + +RUN apt-get -o Acquire::Retries=5 update || true; \ + apt-get -o Acquire::Retries=5 install -y net-tools || true + +RUN ln -sf `which python3.10` /usr/bin/python + +RUN ln -sf `which pip3.10` /usr/local/bin/pip + +RUN python -m pip install --no-cache-dir \ + astor \ + pandas \ + openpyxl \ + pytest-timeout \ + pytest-rerunfailures + +RUN python -m pip install --no-cache-dir \ + torch==2.7.1 torchvision==0.22.1 \ + --index-url https://download.pytorch.org/whl/cpu From 0c8091b922e6b04e0fd6fc81f09cefc170a7c04a Mon Sep 17 00:00:00 2001 From: manfredss Date: Tue, 23 Jun 2026 08:34:52 +0000 Subject: [PATCH 35/43] download from aliyun --- tools/docker/Dockerfile_CPU | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/docker/Dockerfile_CPU b/tools/docker/Dockerfile_CPU index b5a2db228..07f0a7c4e 100644 --- a/tools/docker/Dockerfile_CPU +++ b/tools/docker/Dockerfile_CPU @@ -19,4 +19,4 @@ RUN python -m pip install --no-cache-dir \ RUN python -m pip install --no-cache-dir \ torch==2.7.1 torchvision==0.22.1 \ - --index-url https://download.pytorch.org/whl/cpu + --index-url https://mirrors.aliyun.com/pytorch-wheels/cpu From fda60be530ead185ed4b4a5d38193b58c7562f8e Mon Sep 17 00:00:00 2001 From: manfredss Date: Tue, 23 Jun 2026 09:23:08 +0000 Subject: [PATCH 36/43] implement full CI --- .github/workflows/Modeltest_check.yml | 192 +++++++++------- .github/workflows/Unittest_check.yml | 6 +- .../workflows/Unittest_check_distribute.yml | 206 +++++++++++------- .github/workflows/Unittest_check_gpu.yml | 166 ++++++++------ scripts/ci/run_cpu_unittest.sh | 12 +- scripts/ci/run_distribute.sh | 74 +++++++ scripts/ci/run_gpu_unittest.sh | 64 ++++++ scripts/ci/run_modeltest.sh | 81 +++++++ tools/docker/Dockerfile_CPU | 22 -- 9 files changed, 576 insertions(+), 247 deletions(-) create mode 100644 scripts/ci/run_distribute.sh create mode 100644 scripts/ci/run_gpu_unittest.sh create mode 100644 scripts/ci/run_modeltest.sh delete mode 100644 tools/docker/Dockerfile_CPU diff --git a/.github/workflows/Modeltest_check.yml b/.github/workflows/Modeltest_check.yml index b7408f405..bd4bfbe22 100644 --- a/.github/workflows/Modeltest_check.yml +++ b/.github/workflows/Modeltest_check.yml @@ -45,84 +45,27 @@ jobs: with: fetch-depth: 1 - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: pip - cache-dependency-path: | - requirements.txt - tests/requirements.txt - - - name: Install dependencies + - name: Ensure CI image Exists run: | - python -m pip install --upgrade pip - python -m pip install -U torch --index-url https://download.pytorch.org/whl/cpu - python -m pip uninstall -y paddlepaddle paddlepaddle-gpu - - # Detect current Python ABI tag (e.g. cp310) to avoid installing a mismatched wheel - PY_ABI=$(python -c "import sys; print('cp{}{}'.format(*sys.version_info[:2]))") - echo "Current Python ABI: $PY_ABI" - - if ls paddlepaddle-0.0.0-*.whl >/dev/null 2>&1; then - # Validate the local wheel matches the current Python version before installing - MATCHED_WHL=$(ls paddlepaddle-0.0.0-*${PY_ABI}*.whl 2>/dev/null | head -1) - if [ -z "$MATCHED_WHL" ]; then - echo "WARNING: local wheel found but none matches ABI $PY_ABI; falling back to nightly." - python -m pip install --force-reinstall --no-cache-dir --pre paddlepaddle \ - -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ - --extra-index-url https://pypi.org/simple \ - --timeout 120 --retries 3 - else - echo "Installing local wheel: $MATCHED_WHL" - python -m pip install "$MATCHED_WHL" - fi - else - # No local wheel — install from nightly with retry to reduce transient failures - python -m pip install --force-reinstall --no-cache-dir --pre paddlepaddle \ - -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ - --timeout 120 --retries 3 - fi - python -m pip install -r requirements.txt - python -m pip install pandas openpyxl - python -c "import torch; print('torch version information:', torch.__version__)" - python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" + docker image inspect paconvert-ci:cu118 >/dev/null 2>&1 || \ + docker build --pull -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - - name: Run code set convert check - shell: bash + - name: Start Container run: | - if [ ! -d "$TORCH_PROJECT_PATH" ]; then - echo "$TORCH_PROJECT_PATH does not exist. Please prepare the model code set on the self-hosted runner." - exit 1 - fi - - shopt -s nullglob - projects=("$TORCH_PROJECT_PATH"/*) - if [ ${#projects[@]} -eq 0 ]; then - echo "$TORCH_PROJECT_PATH is empty. Please prepare the model code set on the self-hosted runner." - exit 1 - fi - - failed_projects=() - for project in "${projects[@]}"; do - if [ -d "$project" ]; then - project_name=$(basename "$project") - echo "[code-set-convert] Converting project: $project_name" - if ! python paconvert/main.py --in_dir "$project" --show_unsupport_api --calculate_speed; then - failed_projects+=("$project_name") - fi - fi - done - - if [ ${#failed_projects[@]} -ne 0 ]; then - printf '%s\n' "${failed_projects[@]}" > failed_projects.txt - echo "[code-set-convert] The following projects failed to convert:" - cat failed_projects.txt - exit 1 - fi - - - name: Run modeltest - run: python tools/modeltest/modeltest_check.py + container_name="paconvert-modeltest-${{ github.run_id }}-${{ github.run_attempt }}" + echo "container_name=${container_name}" >> "$GITHUB_ENV" + docker run -d --name "${container_name}" \ + -e NVIDIA_VISIBLE_DEVICES=void \ + -e TORCH_PROJECT_PATH="${TORCH_PROEJCT_PATH}" \ + --network host \ + -v /dev/shm:/dev/shm \ + -v "${{ github.workspace }}:/ws" \ + -v "${TORCH_PROJECT_PATH}:${TORCH_PROJECT_PATH}:ro" \ + -w /ws \ + paconvert-ci:cu118 sleep infinity + + - name: Run Modeltest + run: docker exec "${container_name}" bash scripts/ci/run_modeltest.sh - name: Upload modeltest logs on failure if: failure() @@ -134,3 +77,102 @@ jobs: tests/code_library/model_case/**/convert_paddle_code* if-no-files-found: ignore retention-days: 14 + + - name: Cleanup Container + if: always() + run: | + if [ -n "${container_name:-}" ]; then + docker exec "${container_name}" chown -R "$(id -u):$(id -g)" /ws 2>/dev/null || true + docker rm -f "${container_name}" || true + fi + + + # - name: Setup Python + # uses: actions/setup-python@v5 + # with: + # python-version: '3.10' + # cache: pip + # cache-dependency-path: | + # requirements.txt + # tests/requirements.txt + + # - name: Install dependencies + # run: | + # python -m pip install --upgrade pip + # python -m pip install -U torch --index-url https://download.pytorch.org/whl/cpu + # python -m pip uninstall -y paddlepaddle paddlepaddle-gpu + + # # Detect current Python ABI tag (e.g. cp310) to avoid installing a mismatched wheel + # PY_ABI=$(python -c "import sys; print('cp{}{}'.format(*sys.version_info[:2]))") + # echo "Current Python ABI: $PY_ABI" + + # if ls paddlepaddle-0.0.0-*.whl >/dev/null 2>&1; then + # # Validate the local wheel matches the current Python version before installing + # MATCHED_WHL=$(ls paddlepaddle-0.0.0-*${PY_ABI}*.whl 2>/dev/null | head -1) + # if [ -z "$MATCHED_WHL" ]; then + # echo "WARNING: local wheel found but none matches ABI $PY_ABI; falling back to nightly." + # python -m pip install --force-reinstall --no-cache-dir --pre paddlepaddle \ + # -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ + # --extra-index-url https://pypi.org/simple \ + # --timeout 120 --retries 3 + # else + # echo "Installing local wheel: $MATCHED_WHL" + # python -m pip install "$MATCHED_WHL" + # fi + # else + # # No local wheel — install from nightly with retry to reduce transient failures + # python -m pip install --force-reinstall --no-cache-dir --pre paddlepaddle \ + # -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ + # --timeout 120 --retries 3 + # fi + # python -m pip install -r requirements.txt + # python -m pip install pandas openpyxl + # python -c "import torch; print('torch version information:', torch.__version__)" + # python -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" + + # - name: Run code set convert check + # shell: bash + # run: | + # if [ ! -d "$TORCH_PROJECT_PATH" ]; then + # echo "$TORCH_PROJECT_PATH does not exist. Please prepare the model code set on the self-hosted runner." + # exit 1 + # fi + + # shopt -s nullglob + # projects=("$TORCH_PROJECT_PATH"/*) + # if [ ${#projects[@]} -eq 0 ]; then + # echo "$TORCH_PROJECT_PATH is empty. Please prepare the model code set on the self-hosted runner." + # exit 1 + # fi + + # failed_projects=() + # for project in "${projects[@]}"; do + # if [ -d "$project" ]; then + # project_name=$(basename "$project") + # echo "[code-set-convert] Converting project: $project_name" + # if ! python paconvert/main.py --in_dir "$project" --show_unsupport_api --calculate_speed; then + # failed_projects+=("$project_name") + # fi + # fi + # done + + # if [ ${#failed_projects[@]} -ne 0 ]; then + # printf '%s\n' "${failed_projects[@]}" > failed_projects.txt + # echo "[code-set-convert] The following projects failed to convert:" + # cat failed_projects.txt + # exit 1 + # fi + + # - name: Run modeltest + # run: python tools/modeltest/modeltest_check.py + + # - name: Upload modeltest logs on failure + # if: failure() + # uses: actions/upload-artifact@v4 + # with: + # name: modeltest-log-${{ github.run_id }} + # path: | + # failed_projects.txt + # tests/code_library/model_case/**/convert_paddle_code* + # if-no-files-found: ignore + # retention-days: 14 diff --git a/.github/workflows/Unittest_check.yml b/.github/workflows/Unittest_check.yml index ab88cab98..ec3394cb6 100644 --- a/.github/workflows/Unittest_check.yml +++ b/.github/workflows/Unittest_check.yml @@ -44,12 +44,12 @@ jobs: - name: Ensure CI image Exists run: | - docker image inspect paconvert-ci:cpu >/dev/null 2>&1 || \ + docker image inspect paconvert-ci:cu118 >/dev/null 2>&1 || \ docker build --pull --network=host \ --build-arg http_proxy=$http_proxy \ --build-arg https_proxy=$https_proxy \ --build-arg no_proxy="localhost,127.0.0.1" \ - -t paconvert-ci:cpu -f tools/docker/Dockerfile_CPU tools/docker + -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - name: Start Container run: | @@ -64,7 +64,7 @@ jobs: -v /dev/shm:/dev/shm \ -v "${{ github.workspace }}:/ws" \ -w /ws \ - paconvert-ci:cpu sleep infinity + paconvert-ci:cu118 sleep infinity - name: Run CI Unittest shell: bash diff --git a/.github/workflows/Unittest_check_distribute.yml b/.github/workflows/Unittest_check_distribute.yml index df9cfacaa..4bb8d98a7 100644 --- a/.github/workflows/Unittest_check_distribute.yml +++ b/.github/workflows/Unittest_check_distribute.yml @@ -35,6 +35,9 @@ jobs: runs-on: group: PaConvert timeout-minutes: 120 + concurrency: + group: paconvert-gpu-runner + cancel-in-progress: false steps: - name: Checkout repository @@ -42,6 +45,53 @@ jobs: with: fetch-depth: 0 + - name: Ensure CI image Exists + run: | + docker image inspect paconvert-ci:cu118 >/dev/null 2>&1 || \ + docker build --pull -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker + + - name: Free port + run: | + ss -ltnp 2>/dev/null | grep ':29500' | grep -oP 'pid=\K[0-9]+' | xargs -r kill -9 2>/dev/null || true + + - name: Start Container + run: | + container_name="paconvert-distribute-${{ github.run_id }}-${{ github.run_attempt }}" + echo "container_name=${container_name}" >> "$GITHUB_ENV" + docker run -d --name "${container_name}" \ + --gpus all \ + --network host \ + --ipc host \ + -e CUDA_VISIBLE_DEVICES=0,1 \ + -v /dev/shm:/dev/shm \ + -v "${{ github.workspace }}:/ws" \ + -w /ws \ + paconvert-ci:cu118 sleep infinity + + - name: Run Distributed Unittest + run: | + docker exec "${container_name}" bash scripts/ci/run_distributed.sh + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: distributed-unittest-log-${{ github.run_id }} + path: | + tests/distributed/failed_tests.txt + tests/distributed/*.log + paddle_dist/** + if-no-files-found: ignore + retention-days: 14 + + - name: Cleanup Container + if: always() + run: | + if [ -n "${container_name:-}" ]; then + docker exec "${container_name}" chown -R "$(id -u):$(id -g)" /ws 2>/dev/null || true + docker rm -f "${container_name}" || true + fi + # - name: Setup Python # uses: actions/setup-python@v5 # with: @@ -51,10 +101,10 @@ jobs: # requirements.txt # tests/requirements.txt - - name: Check Env - run: | - python3 --version - pwd + # - name: Check Env + # run: | + # python3 --version + # pwd # - name: Download GPU dependencies # working-directory: tests/distributed @@ -76,80 +126,80 @@ jobs: # --timeout 120 --retries 3 # python3 -c "import torch; print('torch version information:', torch.__version__)" - - name: Install Latest develop GPU Version Paddle - run: | - python3 -m pip uninstall -y paddlepaddle - python3 -m pip uninstall -y paddlepaddle-gpu - python3 -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ - -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ - --extra-index-url https://pypi.org/simple \ - --timeout 120 --retries 3 - python3 -m pip install safetensors==0.6.2 - python3 -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" - - - name: Install paconvert requirements - run: | - python3 -m pip install -r requirements.txt - - - name: Convert torch code to paddle - working-directory: tests/distributed - run: | - python3 ../../paconvert/main.py -i . -o /tmp/paddle_dist --log_level "DEBUG" + # - name: Install Latest develop GPU Version Paddle + # run: | + # python3 -m pip uninstall -y paddlepaddle + # python3 -m pip uninstall -y paddlepaddle-gpu + # python3 -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ + # -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ + # --extra-index-url https://pypi.org/simple \ + # --timeout 120 --retries 3 + # python3 -m pip install safetensors==0.6.2 + # python3 -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" - - name: Run distributed unit tests - working-directory: tests/distributed - shell: bash - env: - CUDA_VISIBLE_DEVICES: "0,1" - run: | - # Free port 29500 if any leftover process is occupying it - netstat -tulnp 2>/dev/null | grep ':29500' | awk '{print $7}' | cut -d/ -f1 | xargs -r kill -9 2>/dev/null || true - - # Disable errexit so every test runs and all failures are collected - # Instead of aborting on the first failing test. - set +e - - check_error=0 - failed_tests=() - test_list=$(ls *.py | grep -v run_and_compare.py) - for item in $test_list; do - cmd1="torchrun --nproc_per_node=2 ${item}" - cmd2="python3 -m paddle.distributed.launch /tmp/paddle_dist/${item}" - python3 run_and_compare.py "$cmd1" "$cmd2" - tmp_check_error=$? - if [ $tmp_check_error -ne 0 ]; then - check_error=1 - failed_tests+=("$item") - fi - done - - echo '************************************************************************************************************' - if [ ${#failed_tests[@]} -ne 0 ]; then - printf '%s\n' "${failed_tests[@]}" > failed_tests.txt - echo "Your PR code distributed unittest check FAILED" - echo "The following distributed tests failed:" - cat failed_tests.txt - echo "Please run the following command:" - echo "" - echo " cd tests/distributed && bash unittest_check_distribute.sh" - echo "" - echo "For more information, please refer to our check guide:" - echo "https://github.com/PaddlePaddle/PaConvert#readme" - else - echo "Your PR code distributed unit test check passed." - fi - echo '************************************************************************************************************' + # - name: Install paconvert requirements + # run: | + # python3 -m pip install -r requirements.txt - exit ${check_error} + # - name: Convert torch code to paddle + # working-directory: tests/distributed + # run: | + # python3 ../../paconvert/main.py -i . -o /tmp/paddle_dist --log_level "DEBUG" - - name: Upload logs on failure - if: failure() - uses: actions/upload-artifact@v4 - with: - name: distributed-unittest-log-${{ github.run_id }} - path: | - tests/distributed/failed_tests.txt - tests/distributed/*.log - /tmp/paddle_dist/** - if-no-files-found: ignore - retention-days: 14 + # - name: Run distributed unit tests + # working-directory: tests/distributed + # shell: bash + # env: + # CUDA_VISIBLE_DEVICES: "0,1" + # run: | + # # Free port 29500 if any leftover process is occupying it + # netstat -tulnp 2>/dev/null | grep ':29500' | awk '{print $7}' | cut -d/ -f1 | xargs -r kill -9 2>/dev/null || true + + # # Disable errexit so every test runs and all failures are collected + # # Instead of aborting on the first failing test. + # set +e + + # check_error=0 + # failed_tests=() + # test_list=$(ls *.py | grep -v run_and_compare.py) + # for item in $test_list; do + # cmd1="torchrun --nproc_per_node=2 ${item}" + # cmd2="python3 -m paddle.distributed.launch /tmp/paddle_dist/${item}" + # python3 run_and_compare.py "$cmd1" "$cmd2" + # tmp_check_error=$? + # if [ $tmp_check_error -ne 0 ]; then + # check_error=1 + # failed_tests+=("$item") + # fi + # done + + # echo '************************************************************************************************************' + # if [ ${#failed_tests[@]} -ne 0 ]; then + # printf '%s\n' "${failed_tests[@]}" > failed_tests.txt + # echo "Your PR code distributed unittest check FAILED" + # echo "The following distributed tests failed:" + # cat failed_tests.txt + # echo "Please run the following command:" + # echo "" + # echo " cd tests/distributed && bash unittest_check_distribute.sh" + # echo "" + # echo "For more information, please refer to our check guide:" + # echo "https://github.com/PaddlePaddle/PaConvert#readme" + # else + # echo "Your PR code distributed unit test check passed." + # fi + # echo '************************************************************************************************************' + + # exit ${check_error} + + # - name: Upload logs on failure + # if: failure() + # uses: actions/upload-artifact@v4 + # with: + # name: distributed-unittest-log-${{ github.run_id }} + # path: | + # tests/distributed/failed_tests.txt + # tests/distributed/*.log + # /tmp/paddle_dist/** + # if-no-files-found: ignore + # retention-days: 14 diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index cc9cf0166..82b4cf4c6 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -35,6 +35,9 @@ jobs: runs-on: group: PaConvert timeout-minutes: 120 + concurrency: + group: paconvert-gpu-runner + cancel-in-progress: false steps: - name: Checkout repository @@ -42,6 +45,35 @@ jobs: with: fetch-depth: 0 + - name: Ensure CI image Exists + run: | + docker image inspect paconvert-ci:cu118 >/dev/null 2>&1 || \ + docker build --pull -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker + + - name: Start Container + run: | + container_name="paconvert-gpu-unittest-${{ github.run_id }}-${{ github.run_attempt }}" + echo "container_name=${container_name}" >> "$GITHUB_ENV" + docker run -d --name "${container_name}" \ + --gpus all \ + --network host \ + -v /dev/shm:/dev/shm \ + -v "${{ github.workspace }}:/ws" \ + -w /ws \ + paconvert-ci:cu118 sleep infinity + + - name: Run GPU Unittest + run: | + docker exec "$container_name" bash scripts/ci/run_gpu_unittest.sh + + - name: Cleanup Container + if: always() + run: | + if [ -n "${container_name:-}" ]; then + docker exec "${container_name}" chown -R "$(id -u):$(id -g)" /ws 2>/dev/null || true + docker rm -f "${container_name}" || true + fi + # - name: Setup Python # uses: actions/setup-python@v5 # with: @@ -51,10 +83,10 @@ jobs: # requirements.txt # tests/requirements.txt - - name: Check Env - run: | - python3 --version - pwd + # - name: Check Env + # run: | + # python3 --version + # pwd # - name: Download GPU dependencies # run: | @@ -79,68 +111,68 @@ jobs: # --timeout 120 --retries 3 # python3 -c "import torch; print('torch version information:', torch.__version__)" - - name: Install Latest develop GPU Version Paddle - run: | - python3 -m pip uninstall -y paddlepaddle - python3 -m pip uninstall -y paddlepaddle-gpu - NO_PROXY='*' no_proxy='*' python3 -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ - -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ - --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple \ - --timeout 120 --retries 3 - NO_PROXY='*' no_proxy='*' python3 -m pip install safetensors==0.6.2 \ - -i https://pypi.tuna.tsinghua.edu.cn/simple - python3 -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" - - - name: Install paconvert requirements - run: | - python3 -m pip install -r requirements.txt - if [ -f tests/requirements.txt ]; then - python3 -m pip install -r tests/requirements.txt - fi - - - name: Run GPU Unittest - shell: bash - run: | - python3 -m pip install pytest-timeout pytest-xdist pytest-rerunfailures - - # Disable errexit so a failing run is captured instead of aborting immediately - # A passing retry can clear the error. - set +e - - echo "Checking code gpu unit test by pytest ..." - # tee output to pytest.log so it can be uploaded as an artifact on failure; - # PIPESTATUS[0] preserves pytest's exit code instead of tee's. - python3 -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log - check_error=${PIPESTATUS[0]} - if [ ${check_error} -ne 0 ]; then - echo "Rerun gpu unit test check." - python3 -m pytest -v -s -p no:warnings --lf ./tests 2>&1 | tee -a pytest.log - check_error=${PIPESTATUS[0]} - fi - - echo '************************************************************************************************************' - if [ ${check_error} -ne 0 ]; then - echo "Your PR code gpu unit test check failed." - echo "Please run the following command:" - echo "" - echo " python3 -m pytest tests" - echo "" - echo "For more information, please refer to our check guide:" - echo "https://github.com/PaddlePaddle/PaConvert#readme." - else - echo "Your PR code gpu unit test check passed." - fi - echo '************************************************************************************************************' + # - name: Install Latest develop GPU Version Paddle + # run: | + # python3 -m pip uninstall -y paddlepaddle + # python3 -m pip uninstall -y paddlepaddle-gpu + # NO_PROXY='*' no_proxy='*' python3 -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ + # -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ + # --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple \ + # --timeout 120 --retries 3 + # NO_PROXY='*' no_proxy='*' python3 -m pip install safetensors==0.6.2 \ + # -i https://pypi.tuna.tsinghua.edu.cn/simple + # python3 -c "import paddle; print('paddle version information:', paddle.__version__); print('paddle commit information:', paddle.__git_commit__)" - exit ${check_error} + # - name: Install paconvert requirements + # run: | + # python3 -m pip install -r requirements.txt + # if [ -f tests/requirements.txt ]; then + # python3 -m pip install -r tests/requirements.txt + # fi - - name: Upload logs on failure - if: failure() - uses: actions/upload-artifact@v4 - with: - name: gpu-unittest-log-${{ github.run_id }} - path: | - pytest.log - tests/**/pytest.log - if-no-files-found: ignore - retention-days: 14 + # - name: Run GPU Unittest + # shell: bash + # run: | + # python3 -m pip install pytest-timeout pytest-xdist pytest-rerunfailures + + # # Disable errexit so a failing run is captured instead of aborting immediately + # # A passing retry can clear the error. + # set +e + + # echo "Checking code gpu unit test by pytest ..." + # # tee output to pytest.log so it can be uploaded as an artifact on failure; + # # PIPESTATUS[0] preserves pytest's exit code instead of tee's. + # python3 -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log + # check_error=${PIPESTATUS[0]} + # if [ ${check_error} -ne 0 ]; then + # echo "Rerun gpu unit test check." + # python3 -m pytest -v -s -p no:warnings --lf ./tests 2>&1 | tee -a pytest.log + # check_error=${PIPESTATUS[0]} + # fi + + # echo '************************************************************************************************************' + # if [ ${check_error} -ne 0 ]; then + # echo "Your PR code gpu unit test check failed." + # echo "Please run the following command:" + # echo "" + # echo " python3 -m pytest tests" + # echo "" + # echo "For more information, please refer to our check guide:" + # echo "https://github.com/PaddlePaddle/PaConvert#readme." + # else + # echo "Your PR code gpu unit test check passed." + # fi + # echo '************************************************************************************************************' + + # exit ${check_error} + + # - name: Upload logs on failure + # if: failure() + # uses: actions/upload-artifact@v4 + # with: + # name: gpu-unittest-log-${{ github.run_id }} + # path: | + # pytest.log + # tests/**/pytest.log + # if-no-files-found: ignore + # retention-days: 14 diff --git a/scripts/ci/run_cpu_unittest.sh b/scripts/ci/run_cpu_unittest.sh index 508ca19cc..378a2debd 100644 --- a/scripts/ci/run_cpu_unittest.sh +++ b/scripts/ci/run_cpu_unittest.sh @@ -37,11 +37,19 @@ python -c "import torch; print('torch version: ', torch.__version__, '| cuda ava echo '******************************************************************************' echo "Checking code cpu unit test by pytest ..." set +e -python -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log + +PYTEST_IGNORE="\ +--ignore=tests/test_backend_cpu_is_built.py \ +--ignore=tests/test_backend_cudnn_is_available.py \ +--ignore=tests/test_cuda_is_bf16_supported.py \ +--ignore=tests/test_distributed_is_nccl_available.py\ +" + +python -m pytest -v -s -p no:warnings $PYTEST_IGNORE --reruns=3 ./tests 2>&1 | tee pytest.log check_errors=${PIPESTATUS[0]} if [ ${check_errors} -ne 0 ]; then echo "Rerun CPU unit test" - python -m pytest -v -s -p no:warnings --lf ./tests 2>&1 | tee -a pytest.log + python -m pytest -v -s -p no:warnings $PYTEST_IGNORE --lf ./tests 2>&1 | tee -a pytest.log check_errors=${PIPESTATUS[0]} fi diff --git a/scripts/ci/run_distribute.sh b/scripts/ci/run_distribute.sh new file mode 100644 index 000000000..b896d3be6 --- /dev/null +++ b/scripts/ci/run_distribute.sh @@ -0,0 +1,74 @@ +# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -eo pipefail + +DIST_OUT="$(pwd)/paddle_dist" + +echo '******************************************************************************' +echo "Installing develop GPU version paddle" +python -m pip uninstall -y paddlepaddle paddlepaddle-gpu || true +python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ + -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ + --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple \ + --timeout 120 --retries 3 +python -m pip install safetensors==0.6.2 +python -c "import paddle; print('paddle version: ', paddle.__version__); print('paddle commit info: ', paddle.__git_commit__)" + +echo '******************************************************************************' +echo "Installing paconvert requirements" + +cd tests/distributed + +echo '******************************************************************************' +echo 'Converting torch code to paddle -> ${DIST_OUT}' +rm -rf "${DIST_OUT}" +python ../../paconvert/main.py -i . -o "${DIST_OUT}" --log_level "DEBUG" + +echo '******************************************************************************' +echo "Running Distribute Unit Tests" +set +e + +check_errors=0 +failed_tests=() +test_list=$(ls *.py | grep -v run_and_compare.py) +for item in $test_list; do + cmd1="torchrun --nproc_per_node=2 ${item}" + cmd2="python -m paddle.distributed.launch ${DIST_OUT}/${item}" + python run_and_compare.py "$cmd1" "$cmd2" + if [ $? -ne 0 ]; then + failed_tests+=("${item}") + check_errors=1 + fi +done + +echo '******************************************************************************' +if [ ${#failed_tests[@]} -ne 0 ]; then + printf '%s\n' "${failed_tests[@]}" > failed_tests.txt + echo "Your PR code Distributed unit test check FAILED" + echo "The following distributed tests failed:" + cat failed_tests.txt + echo "Please run the following command:" + echo "" + echo " cd tests/distributed && bash unittest_check_distribute.sh" + echo "" + echo "For more information, please refer to our check guides:" + echo "https://github.com/paddlepaddle/paconvert#readme" +else + echo "All tests PASSED!" +fi +echo '******************************************************************************' + +exit ${check_errors} diff --git a/scripts/ci/run_gpu_unittest.sh b/scripts/ci/run_gpu_unittest.sh new file mode 100644 index 000000000..02f4ce544 --- /dev/null +++ b/scripts/ci/run_gpu_unittest.sh @@ -0,0 +1,64 @@ +# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -eo pipefail + +echo '******************************************************************************' +echo "Installing develop GPU version paddle" +python -m pip uninstall -y paddlepaddle paddlepaddle-gpu || true +python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle-gpu \ + -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ + --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple \ + --timeout 120 --retries 3 +python -m pip install safetensors==0.6.2 +python -c "import paddle; print('paddle version: ', paddle.__version__); print('paddle commit info: ', paddle.__git_commit__)" + +echo '******************************************************************************' +echo "Installing paconvert requirements" +python -m pip install -r requirements.txt +if [ -f tests/requirements.txt ]; then + python -m pip install -r tests/requirements.txt +fi + +echo '******************************************************************************' +python -c "import torch; print('torch version: ', torch.__version__, '| cuda available: ', torch.cuda.is_available())" + +echo '******************************************************************************' +echo "Checking code gpu unit test by pytest ..." +set +e + +python -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log +check_errors=${PIPESTATUS[0]} +if [ ${check_errors} -ne 0 ]; then + echo "Rerun CPU unit test" + python -m pytest -v -s -p no:warnings --lf ./tests 2>&1 | tee -a pytest.log + check_errors=${PIPESTATUS[0]} +fi + +echo '******************************************************************************' +if [ ${check_errors} -ne 0 ]; then + echo "Your PR code GPU unit test check FAILED" + echo "Please run the following command:" + echo "" + echo " pytest -m pytest tests" + echo "" + echo "For more information, please refer to our check guides:" + echo "https://github.com/paddlepaddle/paconvert#readme" +else + echo "All tests PASSED!" +fi +echo '******************************************************************************' + +exit ${check_errors} diff --git a/scripts/ci/run_modeltest.sh b/scripts/ci/run_modeltest.sh new file mode 100644 index 000000000..619d892a1 --- /dev/null +++ b/scripts/ci/run_modeltest.sh @@ -0,0 +1,81 @@ +# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -eo pipefail + +TORCH_PROJECT_PATH="${TORCH_PROJECT_PATH:-/workspace/torch_project}" + +echo '******************************************************************************' +echo "Installing develop CPU version paddle" +python -m pip uninstall -y paddlepaddle paddlepaddle-gpu || true +python -m pip install --force-reinstall --no-cache-dir -U --pre paddlepaddle \ + -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ \ + --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple \ + --timeout 120 --retries 3 +python -c "import paddle; print('paddle version: ', paddle.__version__); print('paddle commit info: ', paddle.__git_commit__)" + +echo '******************************************************************************' +echo "Installing paconvert requirements" +python -m pip install -r requirements.txt +python -m pip install pandas openpyxl || true + +set +e + +echo '******************************************************************************' +echo "[code-set-convert] Start converting code set under ${TORCH_PROJECT_PATH}" +if [ ! -d "${TORCH_PROJECT_PATH}" ]; then + echo "${TORCH_PROJECT_PATH} is not a valid directory. Please stage the model code set on the runner host." + exit 1 +fi + +shopt -s nullglob +projects=("${TORCH_PROJECT_PATH}"/*) +if [ ${#projects[@]} -eq 0 ]; then + echo "${TORCH_PROJECT_PATH} is empty. Please stage the model code set on the runner host." + exit 1 +fi + +failed_project=() +for project in "${projects[@]}"; do + if [ -d "$project" ]; then + project_name=$(basename "$project") + echo "[code-set-convert] Converting project: $project_name" + if ! python paconvert/main.py --in_dir "$project" --show_unsupport_api --calculate_speed; then + failed_project+=("$project_name") + fi + fi +done + +if [ ${#failed_project[@]} -ne 0 ]; then + printf '%s\n' "${failed_project[@]}" > failed_projects.txt + echo "[code-set-convert] The following projects fail to convert:" + cat failed_projects.txt + exit 1 +fi + +echo '******************************************************************************' +echo "[modeltest] Start modeltest" +python tools/modeltest/modeltest_check.py +check_errors=$? + +echo '******************************************************************************' +if [ ${check_errors} -ne 0 ]; then + echo "Your PR code modeltest check FAILED" +else + echo "All Modeltest PASSED!" +fi +echo '******************************************************************************' + +exit ${check_errors} diff --git a/tools/docker/Dockerfile_CPU b/tools/docker/Dockerfile_CPU deleted file mode 100644 index 07f0a7c4e..000000000 --- a/tools/docker/Dockerfile_CPU +++ /dev/null @@ -1,22 +0,0 @@ -# A image for testing PaConvert -FROM registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8.5-gcc82 - -ENV PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple - -RUN apt-get -o Acquire::Retries=5 update || true; \ - apt-get -o Acquire::Retries=5 install -y net-tools || true - -RUN ln -sf `which python3.10` /usr/bin/python - -RUN ln -sf `which pip3.10` /usr/local/bin/pip - -RUN python -m pip install --no-cache-dir \ - astor \ - pandas \ - openpyxl \ - pytest-timeout \ - pytest-rerunfailures - -RUN python -m pip install --no-cache-dir \ - torch==2.7.1 torchvision==0.22.1 \ - --index-url https://mirrors.aliyun.com/pytorch-wheels/cpu From 1106da014bee39bf369b16d7230487db20d156e1 Mon Sep 17 00:00:00 2001 From: manfredss Date: Tue, 23 Jun 2026 09:54:46 +0000 Subject: [PATCH 37/43] align the file name and fix typo --- .github/workflows/Modeltest_check.yml | 2 +- scripts/ci/{run_distribute.sh => run_distributed.sh} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename scripts/ci/{run_distribute.sh => run_distributed.sh} (100%) diff --git a/.github/workflows/Modeltest_check.yml b/.github/workflows/Modeltest_check.yml index bd4bfbe22..1c856b7af 100644 --- a/.github/workflows/Modeltest_check.yml +++ b/.github/workflows/Modeltest_check.yml @@ -56,7 +56,7 @@ jobs: echo "container_name=${container_name}" >> "$GITHUB_ENV" docker run -d --name "${container_name}" \ -e NVIDIA_VISIBLE_DEVICES=void \ - -e TORCH_PROJECT_PATH="${TORCH_PROEJCT_PATH}" \ + -e TORCH_PROJECT_PATH="${TORCH_PROJECT_PATH}" \ --network host \ -v /dev/shm:/dev/shm \ -v "${{ github.workspace }}:/ws" \ diff --git a/scripts/ci/run_distribute.sh b/scripts/ci/run_distributed.sh similarity index 100% rename from scripts/ci/run_distribute.sh rename to scripts/ci/run_distributed.sh From 7edf38b6a41e0a90d8822f617c3f7f689e0bbdb5 Mon Sep 17 00:00:00 2001 From: manfredss Date: Tue, 23 Jun 2026 11:34:58 +0000 Subject: [PATCH 38/43] fix proxy for modeltest --- .github/workflows/Modeltest_check.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/Modeltest_check.yml b/.github/workflows/Modeltest_check.yml index 1c856b7af..479b15da8 100644 --- a/.github/workflows/Modeltest_check.yml +++ b/.github/workflows/Modeltest_check.yml @@ -55,6 +55,9 @@ jobs: container_name="paconvert-modeltest-${{ github.run_id }}-${{ github.run_attempt }}" echo "container_name=${container_name}" >> "$GITHUB_ENV" docker run -d --name "${container_name}" \ + -e http_proxy="http_proxy" \ + -e https_proxy="https_proxy" \ + -e no_proxy="localhost,127.0.0.1" \ -e NVIDIA_VISIBLE_DEVICES=void \ -e TORCH_PROJECT_PATH="${TORCH_PROJECT_PATH}" \ --network host \ From 6c11caff14ddb43ee9b1d20c9c3fe53d90504ac4 Mon Sep 17 00:00:00 2001 From: manfredss Date: Tue, 23 Jun 2026 12:20:08 +0000 Subject: [PATCH 39/43] test fallback to run on physical machine for gpu --- .github/workflows/Modeltest_check.yml | 4 +- .github/workflows/Unittest_check_gpu.yml | 60 ++++++++++++++---------- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/.github/workflows/Modeltest_check.yml b/.github/workflows/Modeltest_check.yml index 479b15da8..568d11764 100644 --- a/.github/workflows/Modeltest_check.yml +++ b/.github/workflows/Modeltest_check.yml @@ -55,8 +55,8 @@ jobs: container_name="paconvert-modeltest-${{ github.run_id }}-${{ github.run_attempt }}" echo "container_name=${container_name}" >> "$GITHUB_ENV" docker run -d --name "${container_name}" \ - -e http_proxy="http_proxy" \ - -e https_proxy="https_proxy" \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ -e no_proxy="localhost,127.0.0.1" \ -e NVIDIA_VISIBLE_DEVICES=void \ -e TORCH_PROJECT_PATH="${TORCH_PROJECT_PATH}" \ diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index 82b4cf4c6..87fa0da52 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -45,34 +45,44 @@ jobs: with: fetch-depth: 0 - - name: Ensure CI image Exists + - name: GPU unittest on host run: | - docker image inspect paconvert-ci:cu118 >/dev/null 2>&1 || \ - docker build --pull -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker + set -eo pipefail + python3 -m venv /tmp/gpuci && source /tmp/gpuci/bin/activate + python -m pip install --upgrade pip + python -m pip install --no-cache-dir \ + https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ + https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl + bash scripts/ci/run_gpu_unittest.sh + + # - name: Ensure CI image Exists + # run: | + # docker image inspect paconvert-ci:cu118 >/dev/null 2>&1 || \ + # docker build --pull -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - - name: Start Container - run: | - container_name="paconvert-gpu-unittest-${{ github.run_id }}-${{ github.run_attempt }}" - echo "container_name=${container_name}" >> "$GITHUB_ENV" - docker run -d --name "${container_name}" \ - --gpus all \ - --network host \ - -v /dev/shm:/dev/shm \ - -v "${{ github.workspace }}:/ws" \ - -w /ws \ - paconvert-ci:cu118 sleep infinity - - - name: Run GPU Unittest - run: | - docker exec "$container_name" bash scripts/ci/run_gpu_unittest.sh + # - name: Start Container + # run: | + # container_name="paconvert-gpu-unittest-${{ github.run_id }}-${{ github.run_attempt }}" + # echo "container_name=${container_name}" >> "$GITHUB_ENV" + # docker run -d --name "${container_name}" \ + # --gpus all \ + # --network host \ + # -v /dev/shm:/dev/shm \ + # -v "${{ github.workspace }}:/ws" \ + # -w /ws \ + # paconvert-ci:cu118 sleep infinity - - name: Cleanup Container - if: always() - run: | - if [ -n "${container_name:-}" ]; then - docker exec "${container_name}" chown -R "$(id -u):$(id -g)" /ws 2>/dev/null || true - docker rm -f "${container_name}" || true - fi + # - name: Run GPU Unittest + # run: | + # docker exec "$container_name" bash scripts/ci/run_gpu_unittest.sh + + # - name: Cleanup Container + # if: always() + # run: | + # if [ -n "${container_name:-}" ]; then + # docker exec "${container_name}" chown -R "$(id -u):$(id -g)" /ws 2>/dev/null || true + # docker rm -f "${container_name}" || true + # fi # - name: Setup Python # uses: actions/setup-python@v5 From 73801f3cadf687eaf41a51424a8288f4f300c45e Mon Sep 17 00:00:00 2001 From: manfredss Date: Wed, 24 Jun 2026 01:50:56 +0000 Subject: [PATCH 40/43] fix the correct file entry --- scripts/ci/run_cpu_unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci/run_cpu_unittest.sh b/scripts/ci/run_cpu_unittest.sh index 378a2debd..bcc4eb2ba 100644 --- a/scripts/ci/run_cpu_unittest.sh +++ b/scripts/ci/run_cpu_unittest.sh @@ -40,8 +40,8 @@ set +e PYTEST_IGNORE="\ --ignore=tests/test_backend_cpu_is_built.py \ ---ignore=tests/test_backend_cudnn_is_available.py \ --ignore=tests/test_cuda_is_bf16_supported.py \ +--ignore=tests/test_backends_cudnn_is_available.py \ --ignore=tests/test_distributed_is_nccl_available.py\ " From fa931b38b5351dd85af519cf7d8892461cf92698 Mon Sep 17 00:00:00 2001 From: manfredss Date: Wed, 24 Jun 2026 06:18:00 +0000 Subject: [PATCH 41/43] enable gpu to run on docker --- .../workflows/Unittest_check_distribute.yml | 3 + .github/workflows/Unittest_check_gpu.yml | 63 +++++++++---------- 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/.github/workflows/Unittest_check_distribute.yml b/.github/workflows/Unittest_check_distribute.yml index 4bb8d98a7..3e594db50 100644 --- a/.github/workflows/Unittest_check_distribute.yml +++ b/.github/workflows/Unittest_check_distribute.yml @@ -59,6 +59,9 @@ jobs: container_name="paconvert-distribute-${{ github.run_id }}-${{ github.run_attempt }}" echo "container_name=${container_name}" >> "$GITHUB_ENV" docker run -d --name "${container_name}" \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy="localhost,127.0.0.1" \ --gpus all \ --network host \ --ipc host \ diff --git a/.github/workflows/Unittest_check_gpu.yml b/.github/workflows/Unittest_check_gpu.yml index 87fa0da52..f0c9ebbc7 100644 --- a/.github/workflows/Unittest_check_gpu.yml +++ b/.github/workflows/Unittest_check_gpu.yml @@ -45,44 +45,37 @@ jobs: with: fetch-depth: 0 - - name: GPU unittest on host + - name: Ensure CI image Exists run: | - set -eo pipefail - python3 -m venv /tmp/gpuci && source /tmp/gpuci/bin/activate - python -m pip install --upgrade pip - python -m pip install --no-cache-dir \ - https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ - https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl - bash scripts/ci/run_gpu_unittest.sh - - # - name: Ensure CI image Exists - # run: | - # docker image inspect paconvert-ci:cu118 >/dev/null 2>&1 || \ - # docker build --pull -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - - # - name: Start Container - # run: | - # container_name="paconvert-gpu-unittest-${{ github.run_id }}-${{ github.run_attempt }}" - # echo "container_name=${container_name}" >> "$GITHUB_ENV" - # docker run -d --name "${container_name}" \ - # --gpus all \ - # --network host \ - # -v /dev/shm:/dev/shm \ - # -v "${{ github.workspace }}:/ws" \ - # -w /ws \ - # paconvert-ci:cu118 sleep infinity + docker image inspect paconvert-ci:cu118 >/dev/null 2>&1 || \ + docker build --pull -t paconvert-ci:cu118 -f tools/docker/Dockerfile tools/docker - # - name: Run GPU Unittest - # run: | - # docker exec "$container_name" bash scripts/ci/run_gpu_unittest.sh + - name: Start Container + run: | + container_name="paconvert-gpu-unittest-${{ github.run_id }}-${{ github.run_attempt }}" + echo "container_name=${container_name}" >> "$GITHUB_ENV" + docker run -d --name "${container_name}" \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy="localhost,127.0.0.1" \ + --gpus all \ + --network host \ + -v /dev/shm:/dev/shm \ + -v "${{ github.workspace }}:/ws" \ + -w /ws \ + paconvert-ci:cu118 sleep infinity + + - name: Run GPU Unittest + run: | + docker exec "$container_name" bash scripts/ci/run_gpu_unittest.sh - # - name: Cleanup Container - # if: always() - # run: | - # if [ -n "${container_name:-}" ]; then - # docker exec "${container_name}" chown -R "$(id -u):$(id -g)" /ws 2>/dev/null || true - # docker rm -f "${container_name}" || true - # fi + - name: Cleanup Container + if: always() + run: | + if [ -n "${container_name:-}" ]; then + docker exec "${container_name}" chown -R "$(id -u):$(id -g)" /ws 2>/dev/null || true + docker rm -f "${container_name}" || true + fi # - name: Setup Python # uses: actions/setup-python@v5 From 19549cf405c65799f5fc73fc34a85f75bd9fefae Mon Sep 17 00:00:00 2001 From: manfredss Date: Wed, 24 Jun 2026 09:32:00 +0000 Subject: [PATCH 42/43] fix file dir --- scripts/ci/run_cpu_unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci/run_cpu_unittest.sh b/scripts/ci/run_cpu_unittest.sh index bcc4eb2ba..8e195899c 100644 --- a/scripts/ci/run_cpu_unittest.sh +++ b/scripts/ci/run_cpu_unittest.sh @@ -39,7 +39,7 @@ echo "Checking code cpu unit test by pytest ..." set +e PYTEST_IGNORE="\ ---ignore=tests/test_backend_cpu_is_built.py \ +--ignore=tests/test_backends_cpu_is_built.py \ --ignore=tests/test_cuda_is_bf16_supported.py \ --ignore=tests/test_backends_cudnn_is_available.py \ --ignore=tests/test_distributed_is_nccl_available.py\ From a27ec4c561aa84cc4aab0d5152f38e5f84215c1e Mon Sep 17 00:00:00 2001 From: manfredss Date: Wed, 24 Jun 2026 12:02:07 +0000 Subject: [PATCH 43/43] fix --- scripts/ci/run_cpu_unittest.sh | 9 +++++++-- scripts/ci/run_distributed.sh | 1 + scripts/ci/run_gpu_unittest.sh | 6 +++--- tools/docker/Dockerfile | 3 ++- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/scripts/ci/run_cpu_unittest.sh b/scripts/ci/run_cpu_unittest.sh index 8e195899c..88d2555a8 100644 --- a/scripts/ci/run_cpu_unittest.sh +++ b/scripts/ci/run_cpu_unittest.sh @@ -39,10 +39,15 @@ echo "Checking code cpu unit test by pytest ..." set +e PYTEST_IGNORE="\ ---ignore=tests/test_backends_cpu_is_built.py \ +--ignore=tests/test_backends_cuda_is_built.py \ --ignore=tests/test_cuda_is_bf16_supported.py \ --ignore=tests/test_backends_cudnn_is_available.py \ ---ignore=tests/test_distributed_is_nccl_available.py\ +--ignore=tests/test_distributed_is_nccl_available.py \ +--ignore=tests/test_hub_download_url_to_file.py \ +--ignore=tests/test_hub_help.py \ +--ignore=tests/test_hub_list.py \ +--ignore=tests/test_hub_load.py \ +--ignore=tests/test_hub_load_state_dict_from_url.py \ " python -m pytest -v -s -p no:warnings $PYTEST_IGNORE --reruns=3 ./tests 2>&1 | tee pytest.log diff --git a/scripts/ci/run_distributed.sh b/scripts/ci/run_distributed.sh index b896d3be6..f4de5ea6c 100644 --- a/scripts/ci/run_distributed.sh +++ b/scripts/ci/run_distributed.sh @@ -29,6 +29,7 @@ python -c "import paddle; print('paddle version: ', paddle.__version__); print(' echo '******************************************************************************' echo "Installing paconvert requirements" +python -m pip install -r requirements.txt cd tests/distributed diff --git a/scripts/ci/run_gpu_unittest.sh b/scripts/ci/run_gpu_unittest.sh index 02f4ce544..18cec854c 100644 --- a/scripts/ci/run_gpu_unittest.sh +++ b/scripts/ci/run_gpu_unittest.sh @@ -39,11 +39,11 @@ echo '************************************************************************** echo "Checking code gpu unit test by pytest ..." set +e -python -m pytest -v -s -p no:warnings --reruns=3 ./tests 2>&1 | tee pytest.log +python -m pytest -v -s -p no:warnings -n 1 --reruns=3 ./tests 2>&1 | tee pytest.log check_errors=${PIPESTATUS[0]} if [ ${check_errors} -ne 0 ]; then - echo "Rerun CPU unit test" - python -m pytest -v -s -p no:warnings --lf ./tests 2>&1 | tee -a pytest.log + echo "Rerun GPU unit test" + python -m pytest -v -s -p no:warnings -n 1 --lf ./tests 2>&1 | tee -a pytest.log check_errors=${PIPESTATUS[0]} fi diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile index ca109dfbe..0ffc700f6 100644 --- a/tools/docker/Dockerfile +++ b/tools/docker/Dockerfile @@ -15,7 +15,8 @@ RUN python -m pip install --no-cache-dir \ pandas \ openpyxl \ pytest-timeout \ - pytest-rerunfailures + pytest-rerunfailures \ + pytest-xdist RUN wget -q https://paddle-paconvert.bj.bcebos.com/torch-2.7.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl \ https://paddle-paconvert.bj.bcebos.com/torchvision-0.22.1+cu118-cp310-cp310-manylinux_2_28_x86_64.whl && \