diff --git a/frameworks/peft/0.18.0/Dockerfile b/frameworks/peft/0.18.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a5f232b9730c5b47dc028afe441cc2927bf07264 --- /dev/null +++ b/frameworks/peft/0.18.0/Dockerfile @@ -0,0 +1,77 @@ +# syntax=docker/dockerfile:1.6 +# +# Dockerfile for Peft 0.18.0 +# -------------------------------------------------------------------- +# Base image: OpenCloudOS 9 (RHEL 9 compatible) + CUDA 12.8 devel +# Python : 3.11 (pre-installed in the base image) +# PyTorch : 2.11.0 + cu128 (matches Peft 0.18.0 requirements) +# -------------------------------------------------------------------- + +ARG CUDA_IMAGE=opencloudos/opencloudos9-cuda-devel:12.8 +FROM ${CUDA_IMAGE} AS base + +LABEL maintainer="harrywu0913" +LABEL org.opencontainers.image.source="https://gitee.com/OpenCloudOS/ai-agent-container" +LABEL org.opencontainers.image.description="Peft (GPU) on OpenCloudOS 9" + +# ---- Build-time arguments (override with --build-arg) -------------- +ARG PEFT_VERSION=0.18.0 +ARG TORCH_VERSION=2.11.0 +ARG TORCH_CUDA=cu128 +ARG PYTHON_VERSION=3.11 +ARG APP_USER=peft +ARG APP_UID=1000 +ARG APP_GID=1000 + +# ---- Runtime environment ------------------------------------------ +ENV LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + HF_HOME=/workspace/.cache/huggingface \ + TORCH_HOME=/workspace/.cache/torch \ + NVIDIA_VISIBLE_DEVICES=all \ + NVIDIA_DRIVER_CAPABILITIES=compute,utility + +# ---- System packages ---------------------------------------------- +# The base image already ships Python 3.11 and pip3; we only add the +# build toolchain, VCS, curl for healthchecks, and tini for PID 1. +RUN dnf install -y --setopt=install_weak_deps=False \ + git \ + curl \ + ca-certificates \ + tini \ + && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \ + && dnf clean all \ + && rm -rf /var/cache/dnf + +# ---- Non-root user ------------------------------------------------- +RUN groupadd --gid ${APP_GID} ${APP_USER} \ + && useradd --uid ${APP_UID} --gid ${APP_GID} --create-home --shell /bin/bash ${APP_USER} \ + && mkdir -p /workspace /workspace/.cache \ + && chown -R ${APP_USER}:${APP_USER} /workspace + +WORKDIR /workspace + +# ---- PyTorch (pinned to a CUDA 12.8 wheel) ------------------------ +RUN python -m pip install --no-cache-dir \ + --index-url https://download.pytorch.org/whl/${TORCH_CUDA} \ + torch==${TORCH_VERSION} + +# ---- PEFT (parameter-efficient fine-tuning) ----------------------- +RUN python -m pip install --no-cache-dir \ + "peft==${PEFT_VERSION}" + +# ---- Final wiring -------------------------------------------------- +USER ${APP_USER} + +# Healthcheck: import the library; fails fast if the install broke. +HEALTHCHECK --interval=30s --timeout=10s --start-period=20s --retries=3 \ + CMD python -c "import peft; print(peft.__version__)" || exit 1 + +# tini as PID 1 to reap zombies and forward signals cleanly. +ENTRYPOINT ["/usr/bin/tini", "--"] +CMD ["python", "-c", "import peft; print('peft', peft.__version__)"] + diff --git a/frameworks/peft/0.18.0/README.md b/frameworks/peft/0.18.0/README.md new file mode 100644 index 0000000000000000000000000000000000000000..758a1c6d0f336d116e6f6880837865399916af56 --- /dev/null +++ b/frameworks/peft/0.18.0/README.md @@ -0,0 +1,44 @@ +# PEFT on OpenCloudOS 9 + +## 基本信息 +- **框架版本**:v0.18.0 +- **基础镜像**:opencloudos/opencloudos9-cuda-devel:12.8 +- **Python 版本**:3.11 +- **CUDA 版本**:12.8 + +## 构建 + +```bash +docker build -f Dockerfile -t oc9-peft:0.18.0 . +``` + +## 使用示例 + +```bash +docker run --rm oc9-peft:0.18.0 \ + python -c "import peft; print(peft.__version__)" +``` + +运行 LoRA 微调示例(需要 GPU 与 HuggingFace 网络访问): + +```bash +docker run --rm --gpus all \ + -e HF_TOKEN="${HF_TOKEN}" \ + -v "$PWD/peft.py:/workspace/peft_demo.py:ro" \ + -v "$PWD/output:/workspace/output" \ + -w /workspace/output \ + oc9-peft:0.18.0 \ + python /workspace/peft_demo.py +``` + +构建后验证: + +```bash +chmod +x test.sh +./test.sh "oc9-peft:0.18.0" +``` + +## 已知问题 +- 运行示例会从 HuggingFace 拉取 `Qwen/Qwen2.5-0.5B-Instruct`,离线或受限网络环境下需预先准备本地模型缓存并挂载到 `/workspace/.cache/huggingface`。 + + diff --git a/frameworks/peft/0.18.0/build.conf b/frameworks/peft/0.18.0/build.conf new file mode 100644 index 0000000000000000000000000000000000000000..be4b4b813e939791a70c6b6399b5f99e281b8cdf --- /dev/null +++ b/frameworks/peft/0.18.0/build.conf @@ -0,0 +1,3 @@ +IMAGE_NAME=oc9-peft +IMAGE_TAG=0.18.0 +GPU_TEST=true diff --git a/frameworks/peft/0.18.0/test-success.png b/frameworks/peft/0.18.0/test-success.png new file mode 100644 index 0000000000000000000000000000000000000000..23a54eabf24ba869d0fa7b74c476ebbe1420effe Binary files /dev/null and b/frameworks/peft/0.18.0/test-success.png differ diff --git a/frameworks/peft/0.18.0/test.sh b/frameworks/peft/0.18.0/test.sh new file mode 100755 index 0000000000000000000000000000000000000000..245f58bca23b3410e77e25e0a6d3ff83eadf0664 --- /dev/null +++ b/frameworks/peft/0.18.0/test.sh @@ -0,0 +1,142 @@ +#!/bin/bash +# --------------------------------------------------------------- +# test.peft.sh - Post-build verification for oc9-peft:0.18.0 +# +# Usage: +# ./test.peft.sh [IMAGE_TAG] +# +# Exit code: 0 on success, non-zero on any failed check. +# --------------------------------------------------------------- +set -euo pipefail + +IMAGE="${1:-oc9-peft:0.18.0}" +EXPECTED_PEFT_VERSION="0.18.0" +EXPECTED_TORCH_MAJOR="2" +EXPECTED_USER="peft" + + +pass() { printf " \033[32m[✓ PASS]\033[0m %s\n" "$1"; } +fail() { printf " \033[31m[✗ FAIL]\033[0m %s\n" "$1"; exit 1; } +info() { printf "\033[34m==>\033[0m %s\n" "$1"; } + +# --- 0. Prerequisites ------------------------------------------ +info "Checking prerequisites" +command -v docker >/dev/null 2>&1 || fail "docker is not installed" +pass "docker binary found: $(docker --version)" + +# --- 1. Image exists ------------------------------------------- +info "Checking image '${IMAGE}' exists" +if ! docker image inspect "${IMAGE}" >/dev/null 2>&1; then + fail "image '${IMAGE}' not found. Build it first: docker build -f Dockerfile.X -t ${IMAGE} ." +fi +pass "image is present locally" + +# Detect whether the host exposes a GPU to Docker. +GPU_FLAG="" +if docker run --rm --gpus all "${IMAGE}" true >/dev/null 2>&1; then + GPU_FLAG="--gpus all" + info "GPU detected - running full test matrix" +else + info "No GPU available - running CPU-only subset" +fi + +# --- 2. Python & framework versions ---------------------------- +info "Verifying Python / PyTorch / PEFT / Transformers versions" +VERS_OUTPUT="$(docker run --rm -i ${GPU_FLAG} "${IMAGE}" python - <<'PY' +import sys, torch, peft +try: + import transformers + tfm = transformers.__version__ +except Exception: + tfm = "not-installed" +print(f"python={sys.version.split()[0]}") +print(f"torch={torch.__version__}") +print(f"peft={peft.__version__}") +print(f"transformers={tfm}") +print(f"cuda_available={torch.cuda.is_available()}") +PY +)" +echo "${VERS_OUTPUT}" | sed 's/^/ /' + +echo "${VERS_OUTPUT}" | grep -q "^python=3\.11" \ + && pass "python 3.11 OK" \ + || fail "unexpected python version" + +echo "${VERS_OUTPUT}" | grep -q "^torch=${EXPECTED_TORCH_MAJOR}\." \ + && pass "torch ${EXPECTED_TORCH_MAJOR}.x OK" \ + || fail "unexpected torch version" + +echo "${VERS_OUTPUT}" | grep -q "^peft=${EXPECTED_PEFT_VERSION}" \ + && pass "peft ${EXPECTED_PEFT_VERSION} OK" \ + || fail "unexpected peft version" + +# --- 3. CUDA visibility (GPU hosts only) ----------------------- +if [[ -n "${GPU_FLAG}" ]]; then + info "Verifying CUDA is visible to PyTorch" + echo "${VERS_OUTPUT}" | grep -q "^cuda_available=True" \ + && pass "torch.cuda.is_available() == True" \ + || fail "CUDA not visible inside container" +fi + +# --- 4. PEFT API smoke test ------------------------------------ +info "Importing core PEFT symbols" +if docker run --rm -i ${GPU_FLAG} "${IMAGE}" python - <<'PY' >/dev/null 2>&1 +from peft import LoraConfig, TaskType, get_peft_model, PeftModel +cfg = LoraConfig(r=8, lora_alpha=16, task_type=TaskType.CAUSAL_LM) +assert cfg.r == 8 and cfg.lora_alpha == 16 +PY +then + pass "peft LoraConfig / get_peft_model importable" +else + fail "peft core symbols failed to import" +fi + +# --- 5. Non-root user ------------------------------------------ +info "Verifying the container runs as non-root" +WHOAMI="$(docker run --rm ${GPU_FLAG} "${IMAGE}" whoami)" +[[ "${WHOAMI}" == "${EXPECTED_USER}" ]] \ + && pass "runtime user is '${WHOAMI}'" \ + || fail "expected '${EXPECTED_USER}', got '${WHOAMI}'" + +# # --- 6. End-to-end LoRA smoke test ----------------------------- +# # Small model used for the end-to-end LoRA smoke test. +# # Override with: LORA_MODEL= ./test.peft.sh +# # Requires GPU + network egress (pulls the model from HuggingFace). +# # Runs the repository's peft_demo.py inside the container and checks that +# # the LoRA adapter artifacts are produced. +# LORA_MODEL="${LORA_MODEL:-Qwen/Qwen2.5-0.5B-Instruct}" +# LORA_SCRIPT="${LORA_SCRIPT:-$(cd "$(dirname "$0")" && pwd)/peft_demo.py}" +# if [[ -n "${GPU_FLAG}" ]]; then +# if [[ ! -f "${LORA_SCRIPT}" ]]; then +# fail "LORA_SCRIPT not found: ${LORA_SCRIPT}" +# fi +# info "Running LoRA fine-tune smoke test with '${LORA_MODEL}'" +# OUT_DIR="$(mktemp -d -t peft-smoke-XXXXXX)" +# chmod 777 "${OUT_DIR}" +# trap 'rm -rf "${OUT_DIR}"' EXIT + +# docker run --rm ${GPU_FLAG} \ +# -e HF_TOKEN="${HF_TOKEN:-}" \ +# -v "${LORA_SCRIPT}:/workspace/peft_demo.py:ro" \ +# -v "${OUT_DIR}:/workspace/out" \ +# -w /workspace/out \ +# "${IMAGE}" \ +# python /workspace/peft_demo.py \ +# || fail "peft_demo.py exited non-zero" + +# if [[ -f "${OUT_DIR}/qwen2.5-0.5b-instruct-lora/adapter_config.json" ]]; then +# pass "LoRA adapter saved (adapter_config.json present)" +# else +# ls -la "${OUT_DIR}" || true +# fail "expected LoRA adapter artifacts were not produced" +# fi + +# rm -rf "${OUT_DIR}" +# trap - EXIT +# else +# info "Skipping LoRA smoke test (no GPU detected)" +# fi + + +printf "\n\033[32mAll checks passed for %s\033[0m\n" "${IMAGE}" +