From d4f4a03904a70ca114a31d8227a2ca4c89242892 Mon Sep 17 00:00:00 2001 From: NingWei Date: Thu, 30 Apr 2026 12:17:07 +0800 Subject: [PATCH 1/6] =?UTF-8?q?=E8=BF=BD=E5=8A=A0bash=5Fexec=5Fmqtt?= =?UTF-8?q?=E7=9A=84=E8=83=BD=E5=8A=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DEV_GUIDE.md | 37 ++- Dockerfile | 4 + MANUAL_DEPLOY.md | 8 +- README.md | 24 +- env.json | 7 +- platform_home/scripts/docker-entrypoint.sh | 103 ++++++++ platform_home/scripts/platform-common.sh | 2 + test/docker_test_common.py | 272 +++++++++++++++++++++ 8 files changed, 437 insertions(+), 20 deletions(-) diff --git a/DEV_GUIDE.md b/DEV_GUIDE.md index 7dbe7b5..9c8a913 100644 --- a/DEV_GUIDE.md +++ b/DEV_GUIDE.md @@ -35,7 +35,7 @@ openclaw-enterprise-terminal-oc/ 各路径用途: - `Dockerfile` - 镜像定义。负责安装系统依赖、Node/OpenClaw/QMD、agent-browser、bundled `mqtt-channel`,以及 4 个通过 ClawHub 安装的默认 Skills。 + 镜像定义。负责安装系统依赖、Node/OpenClaw/QMD、agent-browser、bundled `mqtt-channel`、`mqtt-bash-exec-channel`,以及 4 个通过 ClawHub 安装的默认 Skills。 - `docker-build.ps1` 标准构建入口。自动检测 ClawHub token,记录 build 日志和分步耗时。 - `docker-build-summary.ps1` @@ -46,7 +46,7 @@ openclaw-enterprise-terminal-oc/ 运行时环境变量源。脚本要求与仓库字段完全对位,不做兼容映射。 - `test/` 测试脚本目录。 - - `test/docker_test_common.py`:两套测试脚本共用的 helper。负责报告生成、容器创建、health wait、JSON/template 校验、宿主机 MQTT roundtrip 校验和 AI file output 校验。 + - `test/docker_test_common.py`:两套测试脚本共用的 helper。负责报告生成、容器创建、health wait、JSON/template 校验、`mqtt-bash-exec-channel` 进程校验、宿主机 MQTT roundtrip 校验和 AI file output 校验。 - `test/docker_fast_test.py`:快速回归测试脚本。只验证 Dockerfile 带来的文件处理和容器内包装命令,不依赖实际 AI 对话。 - `test/docker_full_test.py`:完整回归测试脚本。在快速测试基础上追加宿主机直连 MQTT broker 的 roundtrip 校验,以及 `openclaw agent` 文件输出链路。 - `test/requirements.txt`:Python 测试依赖,当前包含宿主机 MQTT 校验需要的 `paho-mqtt`。 @@ -187,7 +187,8 @@ CMD ["openclaw", "gateway", "run", "--port", "18789"] - 注入 bundled skills 目录 `/opt/openclaw-bundled-skills` - 确保持久化的 `main` agent 被注册 13. 创建默认 workspace 根目录。 -14. 如果启动的是 `openclaw gateway run/start`,进入前台 supervisor 模式,并把输出同时写入 `gateway.log`。 +14. 如果启动的是 `openclaw gateway run/start`,进入前台 supervisor 模式,并把 gateway 输出同时写入 `gateway.log`。 +15. 如果 `env.json` 中的 `BASH_EXEC_CHANNEL_*` 必填字段齐全,则额外拉起 `mqtt-bash-exec-channel` 前台进程,由同一个 entrypoint supervisor 负责保活,并把输出写入 `bash-exec-channel.log`。 ## 5. env.json 注入链路 @@ -197,6 +198,7 @@ CMD ["openclaw", "gateway", "run", "--port", "18789"] - `env.json` 中每一个键值都会被导出为容器进程环境变量。 - gateway 进程直接继承这些环境变量。 +- 当 `BASH_EXEC_CHANNEL_MQTT_BROKER_URL`、`BASH_EXEC_CHANNEL_MQTT_USERNAME`、`BASH_EXEC_CHANNEL_MQTT_PASSWORD`、`BASH_EXEC_CHANNEL_REQUEST_TOPIC`、`BASH_EXEC_CHANNEL_RESPONSE_TOPIC` 全部存在时,entrypoint 会自动启动并保活 `mqtt-bash-exec-channel`。 - `agents`、`doctor`、`logs` 等包装脚本继承同一套环境变量。 - agent 的 `exec` 工具运行出的 shell 也能读到同一套变量。 - 不做 `OC_OPENAI_KEY -> OC_OPENAI_API_KEY` 兼容映射。 @@ -262,13 +264,21 @@ docker exec "${CONTAINER_NAME}" doctor - `platform-version.json` - gateway 是否健康 +`doctor` 当前不会直接检查 `mqtt-bash-exec-channel` 是否订阅成功;该进程应通过容器进程列表和 `bash-exec-channel.log` 单独确认。 + ### 6.3 `logs` ```bash docker exec "${CONTAINER_NAME}" logs docker exec "${CONTAINER_NAME}" logs --limit 50 --plain +docker exec "${CONTAINER_NAME}" /bin/bash -lc "tail -n 50 /var/platform_data/openclaw/logs/bash-exec-channel.log" ``` +说明: + +- `logs` 包装命令当前面向 OpenClaw gateway 日志。 +- `mqtt-bash-exec-channel` 的独立日志文件固定为 `/var/platform_data/openclaw/logs/bash-exec-channel.log`。 + ### 6.4 `docker restart` 当前实现中不再提供容器内 `restart` 命令。 @@ -345,7 +355,7 @@ powershell -NoProfile -ExecutionPolicy Bypass -File .\docker-build-x64.ps1 ` 1. 安装 Ubuntu 系统依赖 2. 安装 Node.js 3. 创建 `platform` 用户和持久化目录骨架 -4. 安装 `openclaw`、`qmd`、`huozige-web-app-cli`、`agent-browser` +4. 安装 `openclaw`、`qmd`、`huozige-web-app-cli`、`agent-browser`、`mqtt-bash-exec-channel` 5. 预下载 QMD 模型 6. 通过 ClawHub 安装 bundled `mqtt-channel` 7. 通过 ClawHub 安装 4 个默认 Skills,并复制到 `/opt/openclaw-bundled-skills` @@ -395,6 +405,7 @@ python .\test\docker_fast_test.py ` 3. 断言: - `/var/platform_data/env.json` 中每个键值都已导出为容器环境变量 - `/var/platform_data/config.json` 中受模板同步管理的配置已同步进 `/var/platform_data/.openclaw/openclaw.json` + - `mqtt-bash-exec-channel` 命令存在,且在容器启动后进程处于运行中 4. 创建测试 agent,验证: - `agents add/list/info/inject/delete` - workspace 下的 `AGENTS.md`、`SOUL.md`、`USER.md` 与 `/opt/platform_home/templates/*.template.md` 完全一致 @@ -420,20 +431,24 @@ python .\test\docker_full_test.py ` 脚本行为: 1. 先完整执行一遍 `docker-fast-test` 的所有检查项 -2. 追加创建一个 AI 测试 agent -3. 重启容器并等待 gateway ready -4. 在开发机上直接连接 `env.json` 指定的 MQTT broker: +2. 在开发机上直接连接 `env.json` 中 `BASH_EXEC_CHANNEL_*` 指定的 MQTT broker / topic: + - 向 `BASH_EXEC_CHANNEL_REQUEST_TOPIC` 发布 `{"input":"echo ok","threadId":"<唯一值>"}` + - 订阅 `BASH_EXEC_CHANNEL_RESPONSE_TOPIC` + - 断言能收到同一个 `threadId` 的响应,且 `output` 归一化后等于 `ok` +3. 追加创建一个 AI 测试 agent +4. 重启容器并等待 gateway ready +5. 在开发机上直接连接 `env.json` 指定的 MQTT broker: - 从容器内读取该测试 agent 的 inbound / outbound topic - 在开发机上订阅 outbound topic - 在开发机上向 inbound topic 发布一条测试消息 - 断言能收到该 agent 经 `mqtt-channel` 返回的 final reply,确认 `mqtt-channel` 配置和 broker 链路可用 -5. 使用 `openclaw agent --agent --json` 请求该 agent: +6. 使用 `openclaw agent --agent --json` 请求该 agent: - 在 workspace 中创建一个内容包含 agent 名称的文件 - 按 `FILE-TRANSFER.md` 约束上传 - 只返回 `file_output://...` URI -6. 断言返回内容中存在 `file_output://` 开头的 URI -7. 使用 `aws s3 cp` 下载该 URI 对应的对象,断言文件内容确实包含 agent 名称 -8. 删除本轮 AI 测试 agent +7. 断言返回内容中存在 `file_output://` 开头的 URI +8. 使用 `aws s3 cp` 下载该 URI 对应的对象,断言文件内容确实包含 agent 名称 +9. 删除本轮 AI 测试 agent ## 9. 约束与注意事项 diff --git a/Dockerfile b/Dockerfile index 27b3b26..4e6622d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,7 @@ ARG OPENCLAW_VERSION=2026.4.15 ARG IMAGE_VERSION=dev ARG QMD_VERSION=2.1.0 ARG MQTT_CHANNEL_VERSION=2.2.0 +ARG MQTT_BASH_EXEC_CHANNEL_VERSION=0.1.0 ARG HZG_CLI_VERSION=2.0.0 ARG CHROME_FOR_TESTING_VERSION=147.0.7727.57 ARG CLAWHUB_TOKEN @@ -165,6 +166,7 @@ RUN npm config set registry ${NPM_REGISTRY} \ && npm install -g --prefix /usr/local \ openclaw@${OPENCLAW_VERSION} \ @tobilu/qmd@${QMD_VERSION} \ + mqtt-bash-exec-channel@${MQTT_BASH_EXEC_CHANNEL_VERSION} \ huozige-web-app-cli@${HZG_CLI_VERSION} \ agent-browser \ && case "${TARGETARCH}" in \ @@ -341,6 +343,7 @@ RUN jq -n \ --arg qmd_version "${QMD_VERSION}" \ --arg node_version "${NODE_VERSION}" \ --arg mqtt_channel_version "${MQTT_CHANNEL_VERSION}" \ + --arg mqtt_bash_exec_channel_version "${MQTT_BASH_EXEC_CHANNEL_VERSION}" \ --arg hzg_cli_version "${HZG_CLI_VERSION}" \ '{ \ stateSchemaVersion: 1, \ @@ -352,6 +355,7 @@ RUN jq -n \ qmd: $qmd_version, \ node: $node_version, \ mqttChannel: $mqtt_channel_version, \ + mqttBashExecChannel: $mqtt_bash_exec_channel_version, \ huozigeWebAppCli: $hzg_cli_version, \ bundledSkills: [ \ "agent-browser-clawdbot", \ diff --git a/MANUAL_DEPLOY.md b/MANUAL_DEPLOY.md index 733e70b..6a2813a 100644 --- a/MANUAL_DEPLOY.md +++ b/MANUAL_DEPLOY.md @@ -30,7 +30,7 @@ flowchart TB ## 关于 npm 和 ClawHub 的使用说明 -- 本方案中 `OpenClaw`、`@kadbbz/mqtt-channel`、`huozige-web-app-cli` 均需要通过 npm 安装,为了避免因为网络波动带来的困扰,建议使用国内的 npm 镜像站,如 `https://www.npmmirror.com/` +- 本方案中 `OpenClaw`、`@kadbbz/mqtt-channel`、`huozige-web-app-cli`、`mqtt-bash-exec-channel` 均需要通过 npm 安装,为了避免因为网络波动带来的困扰,建议使用国内的 npm 镜像站,如 `https://www.npmmirror.com/` - 如需通过 ClawHub 安装 Skills,为了避免因为网络波动带来的困扰,建议使用国内的 ClawHub 镜像站,如 `https://cn.clawhub-mirror.com` ## 0、准备 Agent 执行器 @@ -221,7 +221,7 @@ openclaw gateway restart 2. 停止并删除旧 container。 3. 使用新 image 按原来的挂载参数、重启策略、环境变量重新创建 container。 4. 启动新 container。 -5. 执行 `doctor`、`agents list`、`logs` 检查升级结果。 +5. 执行 `doctor`、`agents list`、`logs` 检查升级结果;如果启用了 bash exec 通道,再额外检查 `mqtt-bash-exec-channel` 进程和日志。 示例命令如下: @@ -248,6 +248,8 @@ docker start enterprise-agent-platform-oc docker exec enterprise-agent-platform-oc doctor docker exec enterprise-agent-platform-oc agents list docker exec enterprise-agent-platform-oc logs +docker exec enterprise-agent-platform-oc /bin/bash -lc "ps -ef | grep '[m]qtt-bash-exec-channel'" +docker exec enterprise-agent-platform-oc /bin/bash -lc "tail -n 50 /var/platform_data/openclaw/logs/bash-exec-channel.log" ``` 说明: @@ -256,6 +258,8 @@ docker exec enterprise-agent-platform-oc logs - 只要继续挂载原来的 DATA 目录,已有 agent、workspace、日志和运行态配置都会被保留。 - 当前镜像启动时会自动识别 `/var/platform_data/platform-version.json`,并按初始化、同版本复用或升级流程处理受管文件。 - 如果某个 DATA 目录已经被更高版本 image 升级过,再用低版本 image 启动时会被拒绝,避免降级破坏数据。 +- 新 image 内置了 `mqtt-bash-exec-channel`。当 `env.json` 中同时提供 `BASH_EXEC_CHANNEL_MQTT_BROKER_URL`、`BASH_EXEC_CHANNEL_MQTT_USERNAME`、`BASH_EXEC_CHANNEL_MQTT_PASSWORD`、`BASH_EXEC_CHANNEL_REQUEST_TOPIC`、`BASH_EXEC_CHANNEL_RESPONSE_TOPIC` 时,容器启动会自动拉起该进程,并在进程退出后自动重启。 +- `docker exec ... logs` 当前查看的是 OpenClaw gateway 日志;`mqtt-bash-exec-channel` 的独立日志文件位于 `/var/platform_data/openclaw/logs/bash-exec-channel.log`。 ## 5、部署 OC 端文件 diff --git a/README.md b/README.md index 9b4477d..947ede8 100644 --- a/README.md +++ b/README.md @@ -239,6 +239,11 @@ sudo chmod 755 /var/platform_data "OC_MQTT_CHANNEL_PASSWORD": "xxx", "OC_MQTT_CHANNEL_INBOUND_TOPIC_TPL": "agents/channel/{agent-name}/inbound", "OC_MQTT_CHANNEL_OUTBOUND_TOPIC_TPL": "agents/channel/{agent-name}/outbound", + "BASH_EXEC_CHANNEL_MQTT_BROKER_URL": "mqtts://example.ala.cn-hangzhou.emqxsl.cn:8883", + "BASH_EXEC_CHANNEL_MQTT_USERNAME": "xxx", + "BASH_EXEC_CHANNEL_MQTT_PASSWORD": "xxx", + "BASH_EXEC_CHANNEL_REQUEST_TOPIC": "agents/admin/req", + "BASH_EXEC_CHANNEL_RESPONSE_TOPIC": "agents/admin/res", "S3_ACCESS_KEY_ID": "xxx", "S3_SECRET_ACCESS_KEY": "xxx", "S3_ENDPOINT_URL": "https://s3.cn-east-1.qiniucs.com", @@ -249,9 +254,9 @@ sudo chmod 755 /var/platform_data 各项配置的来源如下: -- HZG_CLI_MQTT_BROKER:步骤2中记录的访问地址 -- HZG_CLI_USERNAME、OC_MQTT_CHANNEL_USERNAME:步骤2中记录的用户名 -- HZG_CLI_PASSWORD、OC_MQTT_CHANNEL_PASSWORD:步骤2中记录的密码 +- HZG_CLI_MQTT_BROKER、OC_MQTT_CHANNEL_BROKER、BASH_EXEC_CHANNEL_MQTT_BROKER_URL:步骤2中记录的访问地址 +- HZG_CLI_USERNAME、OC_MQTT_CHANNEL_USERNAME、BASH_EXEC_CHANNEL_MQTT_USERNAME:步骤2中记录的用户名 +- HZG_CLI_PASSWORD、OC_MQTT_CHANNEL_PASSWORD、BASH_EXEC_CHANNEL_MQTT_PASSWORD:步骤2中记录的密码 - OC_OPENAI_API_KEY:如果你采用的大模型是 OpenAI 风格接口,将密钥配置到这里 - OC_ANTHROPIC_API_KEY:如果你采用的大模型是 Anthropic 风格接口,将密钥配置到这里 - S3_ACCESS_KEY_ID:步骤3中记录的 AK @@ -338,7 +343,7 @@ docker run -d \ 2. 停止并删除旧 container。 3. 使用新 image 按原来的挂载参数、重启策略、环境变量重新创建 container。 4. 启动新 container。 -5. 执行 `doctor`、`agents list`、`logs` 检查升级结果。 +5. 执行 `doctor`、`agents list`、`logs` 检查升级结果;如果启用了 bash exec 通道,再额外检查 `mqtt-bash-exec-channel` 进程和日志。 示例命令如下: @@ -365,6 +370,8 @@ docker start enterprise-agent-platform-oc docker exec enterprise-agent-platform-oc doctor docker exec enterprise-agent-platform-oc agents list docker exec enterprise-agent-platform-oc logs +docker exec enterprise-agent-platform-oc /bin/bash -lc "ps -ef | grep '[m]qtt-bash-exec-channel'" +docker exec enterprise-agent-platform-oc /bin/bash -lc "tail -n 50 /var/platform_data/openclaw/logs/bash-exec-channel.log" ``` 说明: @@ -373,6 +380,7 @@ docker exec enterprise-agent-platform-oc logs - 只要继续挂载原来的 DATA 目录,已有 agent、workspace、日志和运行态配置都会被保留。 - 当前镜像启动时会自动识别 `/var/platform_data/platform-version.json`,并按初始化、同版本复用或升级流程处理受管文件。 - 如果某个 DATA 目录已经被更高版本 image 升级过,再用低版本 image 启动时会被拒绝,避免降级破坏数据。 +- 新 image 内置了 `mqtt-bash-exec-channel`。当 `env.json` 中同时提供 `BASH_EXEC_CHANNEL_MQTT_BROKER_URL`、`BASH_EXEC_CHANNEL_MQTT_USERNAME`、`BASH_EXEC_CHANNEL_MQTT_PASSWORD`、`BASH_EXEC_CHANNEL_REQUEST_TOPIC`、`BASH_EXEC_CHANNEL_RESPONSE_TOPIC` 时,容器启动会自动拉起该进程,并在进程退出后自动重启。 ### 4.4 创建 Agent(数字员工) @@ -405,6 +413,7 @@ Agent 执行器内置了一些创建的操作命令,均可通过 `docker exec` `logs` 命令:查看执行器的运行日志 - logs:查看 OpenClaw gateway 的运行日志;如果需要看容器入口脚本或容器级重启信息,再配合 `docker logs ` 一起看。 +- `mqtt-bash-exec-channel` 的独立日志文件位于 `/var/platform_data/openclaw/logs/bash-exec-channel.log`,不通过 `logs` 子命令输出。 `doctor` 命令:用于执行配置自检,确保配置层面的完整性 @@ -416,6 +425,7 @@ Agent 执行器基于 `OpenClaw`,内置了以下常用组件(CLI 程序 / AP - QMD:用于对 Memory 进行语义检索,提升效率 - Agent-Browser:基于 Chrome 的无头浏览器 +- mqtt-bash-exec-channel:基于 MQTT 的 bash 命令执行通道,按 `env.json` 中的 `BASH_EXEC_CHANNEL_*` 配置自动启动 - Tesseract-ocr:类库,用于 OCR 你和 AI 都可以在使用过程中通过 `npm` 或 `pip` 安装并运行 JavaScript/TypeScript 或 Python 的脚本。 @@ -443,8 +453,10 @@ Agent 执行器基于 `OpenClaw`,内置了以下常用组件(CLI 程序 / AP - MQTT_BROKER_USER : 步骤2中记录的用户名 - MQTT_BROKER_PASSWORD : 步骤2中记录的密码 - OPENCLAW_CLIENT_NAME : 你喜欢的名字,会出现在日志中,如使用你的公司名 -- MQTT_RES_CHANNEL_NAME : agents/cli/req 和步骤4中 `HZG_CLI_REQUEST_TOPIC` 一致 -- MQTT_REQ_CHANNEL_NAME : agents/cli/res 和步骤4中 `HZG_CLI_RESPONSE_TOPIC` 一致 +- MQTT_RES_CHANNEL_NAME : agents/cli/res 和步骤4中 `HZG_CLI_REQUEST_TOPIC` 一致 +- MQTT_REQ_CHANNEL_NAME : agents/cli/req 和步骤4中 `HZG_CLI_RESPONSE_TOPIC` 一致 +- MQTT_ADMIN_RES_CHANNEL_NAME : agents/admin/res 和步骤4中 `BASH_EXEC_CHANNEL_RESPONSE_TOPIC` 一致 +- MQTT_ADMIN_REQ_CHANNEL_NAME : agents/admin/req 和步骤4中 `BASH_EXEC_CHANNEL_REQUEST_TOPIC` 一致 - S3_ENDPOINT : 步骤3中记录的 S3 Endpoint - S3_REGION :步骤3中记录的 Region - S3_AK : 步骤3中记录的 AK diff --git a/env.json b/env.json index b5a6814..f3a5f29 100644 --- a/env.json +++ b/env.json @@ -15,5 +15,10 @@ "S3_ACCESS_KEY_ID": "xxx", "S3_SECRET_ACCESS_KEY": "xxx", "S3_ENDPOINT_URL": "https://s3.cn-east-1.qiniucs.com", - "S3_REGION": "cn-east-1" + "S3_REGION": "cn-east-1", + "BASH_EXEC_CHANNEL_MQTT_BROKER_URL": "mqtts://example.ala.cn-hangzhou.emqxsl.cn:8883", + "BASH_EXEC_CHANNEL_MQTT_USERNAME": "xxx", + "BASH_EXEC_CHANNEL_MQTT_PASSWORD": "xxx", + "BASH_EXEC_CHANNEL_REQUEST_TOPIC": "agents/admin/req", + "BASH_EXEC_CHANNEL_RESPONSE_TOPIC": "agents/admin/res" } diff --git a/platform_home/scripts/docker-entrypoint.sh b/platform_home/scripts/docker-entrypoint.sh index 02c76f0..fa2fe66 100644 --- a/platform_home/scripts/docker-entrypoint.sh +++ b/platform_home/scripts/docker-entrypoint.sh @@ -6,6 +6,7 @@ init_runtime_context persisted_runtime_config_path="${platform_runtime_config_dir}/openclaw.json" gateway_supervisor_child_pid="" gateway_supervisor_shutdown_requested="0" +bash_exec_channel_supervisor_pid="" cleanup_gateway_supervisor_state() { rm -f "${gateway_supervisor_child_pid_file}" "${gateway_restart_request_file}" @@ -20,6 +21,104 @@ signal_gateway_supervisor_child() { handle_gateway_supervisor_shutdown_signal() { gateway_supervisor_shutdown_requested="1" signal_gateway_supervisor_child + stop_bash_exec_channel_supervisor +} + +bash_exec_channel_env_has_any_value() { + local env_name + + for env_name in \ + BASH_EXEC_CHANNEL_MQTT_BROKER_URL \ + BASH_EXEC_CHANNEL_MQTT_USERNAME \ + BASH_EXEC_CHANNEL_MQTT_PASSWORD \ + BASH_EXEC_CHANNEL_REQUEST_TOPIC \ + BASH_EXEC_CHANNEL_RESPONSE_TOPIC; do + [[ -n "${!env_name:-}" ]] && return 0 + done + + return 1 +} + +bash_exec_channel_env_is_complete() { + local env_name + local missing_names=() + + for env_name in \ + BASH_EXEC_CHANNEL_MQTT_BROKER_URL \ + BASH_EXEC_CHANNEL_MQTT_USERNAME \ + BASH_EXEC_CHANNEL_MQTT_PASSWORD \ + BASH_EXEC_CHANNEL_REQUEST_TOPIC \ + BASH_EXEC_CHANNEL_RESPONSE_TOPIC; do + if [[ -z "${!env_name:-}" ]]; then + missing_names+=("${env_name}") + fi + done + + if [[ "${#missing_names[@]}" -gt 0 ]]; then + echo "Skipping mqtt-bash-exec-channel because required env vars are missing: ${missing_names[*]}" >&2 + return 1 + fi + + return 0 +} + +run_bash_exec_channel_supervisor() { + local child_pid="" + local child_exit_code=0 + local shutdown_requested="0" + + mkdir -p "$(dirname "${bash_exec_channel_supervisor_child_pid_file}")" "$(dirname "${bash_exec_channel_log_file}")" + rm -f "${bash_exec_channel_supervisor_child_pid_file}" + + trap 'shutdown_requested="1"; if [[ -n "${child_pid}" ]]; then kill -TERM "${child_pid}" 2>/dev/null || true; fi' TERM INT + + while true; do + printf '[%s] Starting mqtt-bash-exec-channel.\n' "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" >> "${bash_exec_channel_log_file}" + mqtt-bash-exec-channel >> "${bash_exec_channel_log_file}" 2>&1 & + child_pid="$!" + printf '%s\n' "${child_pid}" > "${bash_exec_channel_supervisor_child_pid_file}" + + set +e + wait "${child_pid}" + child_exit_code=$? + set -e + + child_pid="" + rm -f "${bash_exec_channel_supervisor_child_pid_file}" + + if [[ "${shutdown_requested}" == "1" ]]; then + exit "${child_exit_code}" + fi + + printf '[%s] mqtt-bash-exec-channel exited with code %s. Restarting in 1 second.\n' \ + "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \ + "${child_exit_code}" >> "${bash_exec_channel_log_file}" + sleep 1 + done +} + +start_bash_exec_channel_supervisor() { + if ! bash_exec_channel_env_has_any_value; then + return 0 + fi + + if ! bash_exec_channel_env_is_complete; then + return 0 + fi + + stop_bash_exec_channel_supervisor + run_bash_exec_channel_supervisor & + bash_exec_channel_supervisor_pid="$!" +} + +stop_bash_exec_channel_supervisor() { + if [[ -n "${bash_exec_channel_supervisor_pid}" ]]; then + kill -TERM "${bash_exec_channel_supervisor_pid}" 2>/dev/null || true + wait "${bash_exec_channel_supervisor_pid}" 2>/dev/null || true + bash_exec_channel_supervisor_pid="" + fi + + rm -f "${bash_exec_channel_supervisor_child_pid_file}" } run_foreground_gateway_supervisor() { @@ -30,6 +129,7 @@ run_foreground_gateway_supervisor() { while true; do reload_runtime_state_from_disk + start_bash_exec_channel_supervisor "$@" & gateway_supervisor_child_pid="$!" printf '%s\n' "${gateway_supervisor_child_pid}" > "${gateway_supervisor_child_pid_file}" @@ -44,15 +144,18 @@ run_foreground_gateway_supervisor() { if [[ "${gateway_supervisor_shutdown_requested}" == "1" ]]; then cleanup_gateway_supervisor_state + stop_bash_exec_channel_supervisor exit "${child_exit_code}" fi if [[ -f "${gateway_restart_request_file}" ]]; then rm -f "${gateway_restart_request_file}" + stop_bash_exec_channel_supervisor continue fi cleanup_gateway_supervisor_state + stop_bash_exec_channel_supervisor exit "${child_exit_code}" done } diff --git a/platform_home/scripts/platform-common.sh b/platform_home/scripts/platform-common.sh index 4bda02b..9e89456 100644 --- a/platform_home/scripts/platform-common.sh +++ b/platform_home/scripts/platform-common.sh @@ -35,6 +35,8 @@ init_runtime_context() { platform_runtime_home_parent="$(dirname "${platform_runtime_home}")" gateway_restart_request_file="${PLATFORM_GATEWAY_RESTART_REQUEST_FILE:-${platform_runtime_config_dir}/gateway-restart.request}" gateway_supervisor_child_pid_file="${PLATFORM_GATEWAY_SUPERVISOR_CHILD_PID_FILE:-${platform_runtime_config_dir}/gateway-supervisor-child.pid}" + bash_exec_channel_supervisor_child_pid_file="${PLATFORM_BASH_EXEC_CHANNEL_SUPERVISOR_CHILD_PID_FILE:-${platform_runtime_config_dir}/bash-exec-channel-child.pid}" + bash_exec_channel_log_file="${PLATFORM_BASH_EXEC_CHANNEL_LOG_FILE:-${logs_root}/bash-exec-channel.log}" } reexec_as_platform_user_if_needed() { diff --git a/test/docker_test_common.py b/test/docker_test_common.py index 42ebc82..dddafa5 100644 --- a/test/docker_test_common.py +++ b/test/docker_test_common.py @@ -850,9 +850,275 @@ def mqtt_roundtrip_step( return result +def bash_exec_roundtrip_step( + test_run: TestRun, + name: str, + data_path: Path, + timeout_seconds: int, +) -> CommandResult: + started = time.time() + timeout_seconds = max(timeout_seconds, 30) + command_line = f"host mqtt bash-exec roundtrip via {data_path / 'env.json'}" + + if paho_mqtt is None: + result = synthetic_result( + command_line=command_line, + stderr="Missing Python dependency paho-mqtt. Install it with: pip install -r test/requirements.txt", + exit_code=1, + duration_ms=0, + ) + test_run.add_result(name, "FAIL", result) + return result + + env_path = data_path / "env.json" + if not env_path.exists(): + result = synthetic_result( + command_line=command_line, + stderr=f"Missing env.json: {env_path}", + exit_code=1, + duration_ms=0, + ) + test_run.add_result(name, "FAIL", result) + return result + + try: + env_data = json.loads(env_path.read_text(encoding="utf-8")) + except Exception as exc: + result = synthetic_result( + command_line=command_line, + stderr=f"Failed to parse env.json: {exc}", + exit_code=1, + duration_ms=0, + ) + test_run.add_result(name, "FAIL", result) + return result + + broker_url = str(env_data.get("BASH_EXEC_CHANNEL_MQTT_BROKER_URL", "")).strip() + username = str(env_data.get("BASH_EXEC_CHANNEL_MQTT_USERNAME", "")).strip() + password = str(env_data.get("BASH_EXEC_CHANNEL_MQTT_PASSWORD", "")) + request_topic = str(env_data.get("BASH_EXEC_CHANNEL_REQUEST_TOPIC", "")).strip() + response_topic = str(env_data.get("BASH_EXEC_CHANNEL_RESPONSE_TOPIC", "")).strip() + parsed_broker = urlparse(broker_url) + host = parsed_broker.hostname or "" + scheme = parsed_broker.scheme or "" + port = parsed_broker.port or (8883 if scheme == "mqtts" else 1883) + + if scheme not in {"mqtt", "mqtts"} or not host: + result = synthetic_result( + command_line=command_line, + stderr=f"Invalid broker URL in env.json: {broker_url}", + exit_code=1, + duration_ms=0, + ) + test_run.add_result(name, "FAIL", result) + return result + + if not username or not password: + result = synthetic_result( + command_line=command_line, + stderr="Missing BASH_EXEC_CHANNEL_MQTT_USERNAME or BASH_EXEC_CHANNEL_MQTT_PASSWORD in env.json.", + exit_code=1, + duration_ms=0, + ) + test_run.add_result(name, "FAIL", result) + return result + + if not request_topic or not response_topic: + result = synthetic_result( + command_line=command_line, + stderr="Missing BASH_EXEC_CHANNEL_REQUEST_TOPIC or BASH_EXEC_CHANNEL_RESPONSE_TOPIC in env.json.", + exit_code=1, + duration_ms=0, + ) + test_run.add_result(name, "FAIL", result) + return result + + thread_id = f"bash-exec-test-{int(time.time())}-{os.getpid()}" + published_payload = { + "input": "echo ok", + "threadId": thread_id, + } + diagnostics: dict[str, object] = { + "brokerUrl": broker_url, + "host": host, + "port": port, + "requestTopic": request_topic, + "responseTopic": response_topic, + "threadId": thread_id, + "publishedPayload": published_payload, + "publishAttempts": [], + "receivedMessages": [], + } + + lock = threading.Lock() + done_event = threading.Event() + subscribed_event = threading.Event() + outcome: dict[str, object] = {"status": "TIMEOUT"} + publish_attempts: list[dict[str, object]] = [] + if hasattr(paho_mqtt, "CallbackAPIVersion"): + client = paho_mqtt.Client( + paho_mqtt.CallbackAPIVersion.VERSION2, + client_id=f"bash-exec-test-{int(time.time())}", + clean_session=True, + ) + else: + client = paho_mqtt.Client(client_id=f"bash-exec-test-{int(time.time())}", clean_session=True) + client.username_pw_set(username, password) + if scheme == "mqtts": + client.tls_set_context(ssl.create_default_context()) + + def settle(status: str, error: str = "", matched_message: Optional[dict[str, object]] = None) -> None: + with lock: + if done_event.is_set(): + return + outcome["status"] = status + if error: + outcome["error"] = error + if matched_message is not None: + outcome["matchedMessage"] = matched_message + done_event.set() + + def publish_request() -> None: + publish_info = client.publish(request_topic, json.dumps(published_payload, ensure_ascii=False), qos=1) + attempt_record = { + "at": time.strftime("%Y-%m-%dT%H:%M:%S%z"), + "rc": getattr(publish_info, "rc", None), + } + publish_attempts.append(attempt_record) + diagnostics["publishAttempts"] = publish_attempts + if getattr(publish_info, "rc", 0) != 0: + settle("FAIL", f"MQTT publish failed with code: {publish_info.rc}") + + def on_connect(_client, _userdata, _flags, reason_code, _properties=None): + diagnostics["connectReasonCode"] = str(reason_code) + rc = getattr(reason_code, "value", reason_code) + if rc != 0: + settle("FAIL", f"MQTT connect failed with reason code: {reason_code}") + return + subscribe_result, _mid = client.subscribe(response_topic, qos=1) + if subscribe_result != 0: + settle("FAIL", f"MQTT subscribe failed with code: {subscribe_result}") + + def on_subscribe(_client, _userdata, _mid, _granted_qos, _properties=None): + diagnostics["subscribed"] = True + subscribed_event.set() + + def on_message(_client, _userdata, msg): + raw_text = msg.payload.decode("utf-8", errors="replace") + parsed_payload = None + try: + parsed_payload = json.loads(raw_text) + except Exception: + parsed_payload = None + + message_record = { + "topic": msg.topic, + "rawText": raw_text, + "parsed": parsed_payload, + } + diagnostics["receivedMessages"].append(message_record) + + if msg.topic != response_topic: + return + if not isinstance(parsed_payload, dict): + return + if str(parsed_payload.get("threadId", "")).strip() != thread_id: + return + + output_text = str(parsed_payload.get("output", "")) + normalized_output = output_text.strip() + matched_message = { + "topic": msg.topic, + "threadId": str(parsed_payload.get("threadId", "")), + "output": output_text, + "normalizedOutput": normalized_output, + } + if normalized_output == "ok": + settle("PASS", matched_message=matched_message) + return + + settle( + "FAIL", + error=f"Received bash exec response for matching threadId but output was not ok: {normalized_output!r}", + matched_message=matched_message, + ) + + def on_disconnect(_client, _userdata, *callback_args): + reason_code = callback_args[-2] if len(callback_args) >= 2 else (callback_args[0] if callback_args else 0) + rc = getattr(reason_code, "value", reason_code) + if not done_event.is_set() and rc not in (0, None): + settle("FAIL", f"MQTT disconnected before bash exec reply with reason code: {reason_code}") + + client.on_connect = on_connect + client.on_subscribe = on_subscribe + client.on_message = on_message + client.on_disconnect = on_disconnect + + try: + client.connect(host, port, keepalive=60) + client.loop_start() + completed_in_time = False + deadline = time.time() + timeout_seconds + last_publish_at = 0.0 + while time.time() < deadline: + if done_event.wait(timeout=0.2): + completed_in_time = True + break + if subscribed_event.is_set() and (time.time() - last_publish_at) >= 2.0: + publish_request() + last_publish_at = time.time() + if not completed_in_time and done_event.is_set(): + completed_in_time = True + except Exception as exc: + settle("FAIL", f"Host-side bash exec MQTT roundtrip raised exception: {exc}") + completed_in_time = True + finally: + try: + client.loop_stop() + except Exception: + pass + try: + client.disconnect() + except Exception: + pass + + if not completed_in_time: + settle("TIMEOUT", f"Timed out waiting {timeout_seconds}s for bash exec MQTT reply.") + + status = str(outcome.get("status", "FAIL")) + error_text = str(outcome.get("error", "") or "") + matched_message = outcome.get("matchedMessage") + if matched_message is not None: + diagnostics["matchedMessage"] = matched_message + if error_text: + diagnostics["error"] = error_text + + result = synthetic_result( + command_line=command_line, + stdout=json.dumps(diagnostics, ensure_ascii=False, indent=2), + stderr=error_text, + exit_code=0 if status == "PASS" else (124 if status == "TIMEOUT" else 1), + timed_out=(status == "TIMEOUT"), + duration_ms=round((time.time() - started) * 1000), + ) + test_run.add_result(name, status, result) + return result + + def quick_phase(test_run: TestRun, phase_name: str, container: str, agent_id: str, gateway_timeout_seconds: int = 240) -> None: wait_for_container_running(test_run, f"{phase_name}_container_running", container, timeout_seconds=120) wait_for_gateway_ready(test_run, f"{phase_name}_gateway_ready", container, timeout_seconds=gateway_timeout_seconds) + invoke_test_step( + test_run, + f"{phase_name}_bash_exec_channel_running", + docker_exec_args( + container, + "/bin/bash", + "-lc", + "command -v mqtt-bash-exec-channel >/dev/null && ps -ef | grep '[m]qtt-bash-exec-channel'", + ), + timeout_seconds=60, + ) invoke_test_step(test_run, f"{phase_name}_doctor", docker_exec_args(container, "doctor"), timeout_seconds=90) invoke_test_step( @@ -972,6 +1238,12 @@ def run_full_test(data_path: str, container_name: str = "", image: str = "", out quick_phase(test_run, "phase1", resolved_container, agent_one) invoke_test_step(test_run, "docker_restart_before_phase2", ["docker", "restart", resolved_container], timeout_seconds=120) quick_phase(test_run, "phase2", resolved_container, agent_two) + bash_exec_roundtrip_step( + test_run, + "bash_exec_channel_roundtrip", + data_path_obj, + 90, + ) invoke_test_step(test_run, "ai_agents_add", docker_exec_args(resolved_container, "agents", "add", ai_agent, "--no-restart"), timeout_seconds=120) invoke_test_step(test_run, "docker_restart_before_ai", ["docker", "restart", resolved_container], timeout_seconds=120) -- Gitee From 3cbbe116dc63e570a4e1ed844fcd5b0edab287ef Mon Sep 17 00:00:00 2001 From: NingWei Date: Thu, 30 Apr 2026 18:23:51 +0800 Subject: [PATCH 2/6] =?UTF-8?q?=E5=A2=9E=E5=8A=A0MQTT=E7=AE=A1=E7=90=86?= =?UTF-8?q?=E7=95=8C=E9=9D=A2=EF=BC=8C=E5=AE=8C=E5=96=84restart=E5=91=BD?= =?UTF-8?q?=E4=BB=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DEV_GUIDE.md | 26 ++++++------- Dockerfile | 6 ++- MANUAL_DEPLOY.md | 2 + README.md | 12 ++++-- platform_home/scripts/agents.sh | 48 +++++++++++++++++++----- platform_home/scripts/platform-common.sh | 22 +++++++---- platform_home/scripts/restart.sh | 20 ++++++++++ test/docker_test_common.py | 8 +++- 8 files changed, 105 insertions(+), 39 deletions(-) create mode 100644 platform_home/scripts/restart.sh diff --git a/DEV_GUIDE.md b/DEV_GUIDE.md index 9c8a913..8f25649 100644 --- a/DEV_GUIDE.md +++ b/DEV_GUIDE.md @@ -222,16 +222,17 @@ agents add [--no-restart] agents list agents info agents delete [--no-restart] -agents inject [agent-name] +agents inject [agent-name] [--no-restart] ``` 说明: - 只管理通过 `agents` 命令创建的受管 agent。 -- `agents add` 会创建 workspace、写入 managed 配置、写入模板文件。 +- `agents add` 会创建 workspace、写入 managed 配置、完成模板注入,并在这些动作全部完成后只做一次 gateway 重启。 - `agents delete` 会删除 managed 配置、清理 managed workspace 文件。 - `agents inject` 会用 templates 中的三个模板文件覆盖目标 workspace 下的 `AGENTS.md`、`SOUL.md` 和 `USER.md`。 -- `agents add/delete/inject` 执行完成后,需要在宿主机执行一次 `docker restart "${CONTAINER_NAME}"` 使变更生效。 +- `agents add/delete/inject` 默认会在容器内自动重启 OpenClaw gateway 使变更生效。 +- 如果只想改配置而不立即重启,可为 `agents add/delete/inject` 传 `--no-restart`。 - `agents list` 当前输出列为 `AGENT_ID / ACCOUNT_ID / WORKSPACE / INBOUND / OUTBOUND`。 宿主机常用调用方式: @@ -239,12 +240,10 @@ agents inject [agent-name] ```bash docker exec "${CONTAINER_NAME}" agents list docker exec "${CONTAINER_NAME}" agents add demo -docker restart "${CONTAINER_NAME}" docker exec "${CONTAINER_NAME}" agents info demo docker exec "${CONTAINER_NAME}" agents inject demo -docker restart "${CONTAINER_NAME}" docker exec "${CONTAINER_NAME}" agents delete demo -docker restart "${CONTAINER_NAME}" +docker exec "${CONTAINER_NAME}" restart ``` ### 6.2 `doctor` @@ -279,22 +278,21 @@ docker exec "${CONTAINER_NAME}" /bin/bash -lc "tail -n 50 /var/platform_data/ope - `logs` 包装命令当前面向 OpenClaw gateway 日志。 - `mqtt-bash-exec-channel` 的独立日志文件固定为 `/var/platform_data/openclaw/logs/bash-exec-channel.log`。 -### 6.4 `docker restart` +### 6.4 `restart` -当前实现中不再提供容器内 `restart` 命令。 +容器内提供 `restart` 命令,用于通过 entrypoint supervisor 热重启 OpenClaw gateway,而不重建整个 container。 -需要重新加载 `env.json`、`config.json`、`openclaw.json` 初始化结果或 workspace 模板文件时,统一在宿主机执行: +需要重新加载 `env.json`、`config.json`、`openclaw.json` 初始化结果或 workspace 模板文件时,可在容器内执行: ```bash -docker restart "${CONTAINER_NAME}" +docker exec "${CONTAINER_NAME}" restart ``` 适用场景: -- 执行完 `agents add` -- 执行完 `agents delete` -- 执行完 `agents inject` +- 执行了 `agents add/delete --no-restart` - 手工修改了 `env.json` 或 `config.json` +- 希望在不重建整个 container 的前提下重新加载 gateway 进程 ### 6.5 `huozige-web-app-cli` @@ -321,7 +319,7 @@ docker exec "${CONTAINER_NAME}" agent-browser --session smoke close 1. `agents` 2. `doctor` 3. `logs` -4. `docker restart ` +4. `restart` 5. `openclaw ...` ## 7. 编译流程 diff --git a/Dockerfile b/Dockerfile index 4e6622d..b17c947 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ ARG OPENCLAW_VERSION=2026.4.15 ARG IMAGE_VERSION=dev ARG QMD_VERSION=2.1.0 ARG MQTT_CHANNEL_VERSION=2.2.0 -ARG MQTT_BASH_EXEC_CHANNEL_VERSION=0.1.0 +ARG MQTT_BASH_EXEC_CHANNEL_VERSION=0.2.0 ARG HZG_CLI_VERSION=2.0.0 ARG CHROME_FOR_TESTING_VERSION=147.0.7727.57 ARG CLAWHUB_TOKEN @@ -334,6 +334,7 @@ COPY platform_home/scripts/platform-common.sh /usr/local/lib/platform/common.sh COPY platform_home/scripts/agents.sh /usr/local/bin/agents COPY platform_home/scripts/doctor.sh /usr/local/bin/doctor COPY platform_home/scripts/logs.sh /usr/local/bin/logs +COPY platform_home/scripts/restart.sh /usr/local/bin/restart COPY platform_home/scripts/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh # Layer 11: generate image metadata for version-aware runtime upgrades. @@ -368,12 +369,13 @@ RUN jq -n \ # Layer 12: normalize line endings and set execute bits on bundled scripts. RUN sed -i 's/\r$//' /usr/local/lib/platform/common.sh \ - && sed -i 's/\r$//' /usr/local/bin/agents /usr/local/bin/doctor /usr/local/bin/logs /usr/local/bin/docker-entrypoint.sh \ + && sed -i 's/\r$//' /usr/local/bin/agents /usr/local/bin/doctor /usr/local/bin/logs /usr/local/bin/restart /usr/local/bin/docker-entrypoint.sh \ && find ${BUNDLED_FILES_ROOT}/platform_home/scripts -type f -name '*.sh' -exec sed -i 's/\r$//' {} + \ && chmod +x /usr/local/lib/platform/common.sh \ && chmod +x /usr/local/bin/agents \ && chmod +x /usr/local/bin/doctor \ && chmod +x /usr/local/bin/logs \ + && chmod +x /usr/local/bin/restart \ && chmod +x /usr/local/bin/docker-entrypoint.sh # Health checks depend directly on the gateway endpoint. diff --git a/MANUAL_DEPLOY.md b/MANUAL_DEPLOY.md index 6a2813a..2da5f4a 100644 --- a/MANUAL_DEPLOY.md +++ b/MANUAL_DEPLOY.md @@ -248,6 +248,8 @@ docker start enterprise-agent-platform-oc docker exec enterprise-agent-platform-oc doctor docker exec enterprise-agent-platform-oc agents list docker exec enterprise-agent-platform-oc logs +docker exec enterprise-agent-platform-oc restart +docker exec enterprise-agent-platform-oc /bin/bash -lc "tail -n 50 /var/platform_data/openclaw/logs/bash-exec-channel.log" docker exec enterprise-agent-platform-oc /bin/bash -lc "ps -ef | grep '[m]qtt-bash-exec-channel'" docker exec enterprise-agent-platform-oc /bin/bash -lc "tail -n 50 /var/platform_data/openclaw/logs/bash-exec-channel.log" ``` diff --git a/README.md b/README.md index 947ede8..ee37b8e 100644 --- a/README.md +++ b/README.md @@ -389,7 +389,7 @@ docker exec enterprise-agent-platform-oc /bin/bash -lc "tail -n 50 /var/platform ```bash docker exec enterprise-agent-platform-oc agents add tester docker exec enterprise-agent-platform-oc agents inject tester -docker restart enterprise-agent-platform-oc +docker exec enterprise-agent-platform-oc restart ``` 执行 `agents list` 命令,获取并记录下新创建 agent 的 `inbound` 和 `outbound` topic @@ -404,11 +404,15 @@ Agent 执行器内置了一些创建的操作命令,均可通过 `docker exec` `agents` 命令:用于操作实际执行操作的agent -- agents add :创建一个 agent,并更新运行态配置与 workspace 文件;执行完成后,请手工执行 `docker restart ` 使变更生效。 +- agents add :创建一个 agent,完成运行态配置、binding、channel 和 workspace 模板注入;默认会在这些动作全部完成后,在容器内自动重启一次 OpenClaw gateway 使变更生效。如只想改配置不立即重启,可改用 `agents add --no-restart`。 - agents list:列出所有 agent 的信息,会输出 `AGENT_ID / ACCOUNT_ID / WORKSPACE / INBOUND / OUTBOUND` 五列,适合在注册到业务系统前先核对 topic 和 workspace。 -- agents delete :删除一个 agent,并更新运行态配置;执行完成后,请手工执行 `docker restart ` 使变更生效。 +- agents delete :删除一个 agent,并更新运行态配置;默认会在容器内自动重启 OpenClaw gateway 使变更生效。如只想改配置不立即重启,可改用 `agents delete --no-restart`。 - agents info : 获取指定 agent 的信息 -- agents inject [agent_name]:为指定的 agent 更新 AGENTS.md、SOUL.md 和 USER.md;执行完成后,请手工执行 `docker restart ` 使变更生效。 +- agents inject [agent_name]:为指定的 agent 更新 AGENTS.md、SOUL.md 和 USER.md;默认会在容器内自动重启 OpenClaw gateway 使变更生效,也支持 `agents inject [agent_name] --no-restart`。 + +`restart` 命令:用于在容器内单独重启 OpenClaw gateway + +- restart:通过容器入口脚本的 supervisor 热重启 gateway 进程,不重建整个 container;默认最长等待 240 秒,适合在手工修改 `env.json`、`config.json` 或使用了 `--no-restart` 选项后执行。 `logs` 命令:查看执行器的运行日志 diff --git a/platform_home/scripts/agents.sh b/platform_home/scripts/agents.sh index bdf4f4c..9f22e3e 100644 --- a/platform_home/scripts/agents.sh +++ b/platform_home/scripts/agents.sh @@ -15,7 +15,7 @@ Usage: agents list agents info agents delete [--no-restart] - agents inject [agent-name] + agents inject [agent-name] [--no-restart] Notes: - This command only manages agents created through `agents`. @@ -208,7 +208,17 @@ cmd_add() { validate_agents_state_file "${desired_state_file}" || fail "Generated invalid managed state for ${agent_id}" if managed_agent_exists "${previous_state_file}" "${agent_id}"; then - commit_state_transition "${previous_state_file}" "${desired_state_file}" "${restart_gateway}" || fail "Failed to update managed agent: ${agent_id}" + commit_state_transition "${previous_state_file}" "${desired_state_file}" "no" || fail "Failed to update managed agent: ${agent_id}" + if ! inject_single_agent "${desired_state_file}" "${agent_id}"; then + rm -f "${previous_state_file}" "${desired_state_file}" + fail "Failed to inject managed workspace files for ${agent_id}" + fi + if [[ "${restart_gateway}" == "yes" ]]; then + if ! restart_gateway_if_running; then + rm -f "${previous_state_file}" "${desired_state_file}" + fail "Failed to restart gateway after updating managed agent: ${agent_id}" + fi + fi rm -f "${previous_state_file}" "${desired_state_file}" echo "Added managed agent ${agent_id}" return 0 @@ -234,10 +244,10 @@ cmd_add() { fail "Failed to persist managed agent config: ${agent_id}" fi - if ! write_workspace_managed_files "${workspace_path}"; then + if ! inject_single_agent "${desired_state_file}" "${agent_id}"; then rollback_native_managed_agent_add "${agent_id}" "${workspace_path}" "${config_backup_file}" rm -f "${previous_state_file}" "${desired_state_file}" "${config_backup_file}" "${native_add_output_file}" - fail "Failed to write managed workspace files for ${agent_id}" + fail "Failed to inject managed workspace files for ${agent_id}" fi if [[ "${restart_gateway}" == "yes" ]]; then @@ -359,7 +369,24 @@ cmd_delete() { } cmd_inject() { - local state_file agent_id="${1:-}" + local state_file agent_id="" + local restart_gateway="yes" + + while [[ $# -gt 0 ]]; do + case "$1" in + --no-restart) + restart_gateway="no" + shift + ;; + *) + if [[ -n "${agent_id}" ]]; then + fail "agents inject accepts at most one optional " + fi + agent_id="$1" + shift + ;; + esac + done state_file="$(mktemp)" create_temp_state "${state_file}" @@ -372,9 +399,11 @@ cmd_inject() { echo "Replaced AGENTS.md, SOUL.md and USER.md for all managed agents" fi - if ! restart_gateway_if_running; then - rm -f "${state_file}" - fail "Failed to restart gateway after injecting workspace templates." + if [[ "${restart_gateway}" == "yes" ]]; then + if ! restart_gateway_if_running; then + rm -f "${state_file}" + fail "Failed to restart gateway after injecting workspace templates." + fi fi rm -f "${state_file}" @@ -418,8 +447,7 @@ main() { cmd_delete "$@" ;; inject) - [[ $# -le 1 ]] || fail "agents inject accepts at most one optional " - cmd_inject "${1:-}" + cmd_inject "$@" ;; help|-h|--help) usage diff --git a/platform_home/scripts/platform-common.sh b/platform_home/scripts/platform-common.sh index 9e89456..29ef0e1 100644 --- a/platform_home/scripts/platform-common.sh +++ b/platform_home/scripts/platform-common.sh @@ -1484,6 +1484,8 @@ live_binding_exists() { request_foreground_gateway_restart() { local current_child_pid="" local restarted_child_pid="" + local restart_timeout_seconds="${PLATFORM_GATEWAY_RESTART_TIMEOUT_SEC:-240}" + local deadline_epoch=0 mkdir -p "$(dirname "${gateway_restart_request_file}")" : > "${gateway_restart_request_file}" @@ -1509,7 +1511,13 @@ request_foreground_gateway_restart() { return 1 } - for _ in $(seq 1 60); do + if ! [[ "${restart_timeout_seconds}" =~ ^[0-9]+$ ]] || [[ "${restart_timeout_seconds}" -le 0 ]]; then + restart_timeout_seconds=240 + fi + + deadline_epoch=$(( $(date +%s) + restart_timeout_seconds )) + + while [[ "$(date +%s)" -lt "${deadline_epoch}" ]]; do sleep 1 if [[ -r "${gateway_supervisor_child_pid_file}" ]]; then @@ -1528,17 +1536,17 @@ request_foreground_gateway_restart() { fi done - echo "Timed out waiting for foreground gateway restart." >&2 + echo "Timed out waiting ${restart_timeout_seconds}s for foreground gateway restart." >&2 return 1 } restart_gateway_if_running() { - if [[ -f "/.dockerenv" ]]; then - echo "Runtime state updated. Restart the container with 'docker restart ' to apply changes." >&2 - return 0 - fi - if is_gateway_running; then + if [[ -f "/.dockerenv" ]]; then + request_foreground_gateway_restart + return $? + fi + echo "Runtime state updated. Restart the runtime process externally to apply changes." >&2 else echo "Platform gateway is not running; skipped restart." >&2 diff --git a/platform_home/scripts/restart.sh b/platform_home/scripts/restart.sh new file mode 100644 index 0000000..a6b8a0c --- /dev/null +++ b/platform_home/scripts/restart.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +source /usr/local/lib/platform/common.sh +init_runtime_context +reexec_as_platform_user_if_needed "$@" +export_openclaw_runtime_env + +if [[ $# -gt 0 ]]; then + echo "Usage: restart" >&2 + exit 1 +fi + +if request_foreground_gateway_restart; then + echo "Gateway restarted." + exit 0 +fi + +echo "Gateway restart failed." >&2 +exit 1 diff --git a/test/docker_test_common.py b/test/docker_test_common.py index dddafa5..c923029 100644 --- a/test/docker_test_common.py +++ b/test/docker_test_common.py @@ -1148,13 +1148,15 @@ def quick_phase(test_run: TestRun, phase_name: str, container: str, agent_id: st docker_exec_args(container, "/bin/bash", "-lc", template_validation_script(agent_id)), timeout_seconds=60, ) - invoke_test_step(test_run, f"{phase_name}_agents_inject", docker_exec_args(container, "agents", "inject", agent_id), timeout_seconds=90) + invoke_test_step(test_run, f"{phase_name}_agents_inject", docker_exec_args(container, "agents", "inject", agent_id, "--no-restart"), timeout_seconds=90) invoke_test_step( test_run, f"{phase_name}_templates_after_inject", docker_exec_args(container, "/bin/bash", "-lc", template_validation_script(agent_id)), timeout_seconds=60, ) + invoke_test_step(test_run, f"{phase_name}_internal_restart", docker_exec_args(container, "restart"), timeout_seconds=120) + wait_for_gateway_ready(test_run, f"{phase_name}_gateway_ready_after_internal_restart", container, timeout_seconds=gateway_timeout_seconds) invoke_test_step(test_run, f"{phase_name}_logs", docker_exec_args(container, "logs", "--limit", "20", "--plain"), timeout_seconds=60) delete_managed_agent_step(test_run, f"{phase_name}_agents_delete", container, agent_id, timeout_seconds=120) validate_agent_list_step(test_run, f"{phase_name}_agents_list_after_delete", container, agent_id, should_exist=False) @@ -1261,11 +1263,13 @@ def run_full_test(data_path: str, container_name: str = "", image: str = "", out ) def file_output_status(result: CommandResult) -> str: + if extract_file_output_uri(combined_command_text(result)): + return "PASS" if result.timed_out: return "TIMEOUT" if result.exit_code != 0: return "FAIL" - return "PASS" if extract_file_output_uri(combined_command_text(result)) else "FAIL" + return "FAIL" file_output_step = invoke_test_step( test_run, -- Gitee From d0b87ac25bb4c2e3b0b3473187b1502cd442142f Mon Sep 17 00:00:00 2001 From: NingWei Date: Fri, 8 May 2026 19:45:44 +0800 Subject: [PATCH 3/6] =?UTF-8?q?=E5=8D=87=E7=BA=A7openclaw=E5=92=8Cmqtt?= =?UTF-8?q?=E6=8F=92=E4=BB=B6=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 28 +- config.json | 3 + platform_data/.openclaw/exec-approvals.json | 8 + platform_data/config.json | 68 +++++ platform_home/scripts/docker-entrypoint.sh | 271 ++++++++++++++++++++ platform_home/scripts/doctor.sh | 56 ++++ platform_home/scripts/platform-common.sh | 34 ++- test/docker_test_common.py | 54 ++++ 8 files changed, 513 insertions(+), 9 deletions(-) create mode 100644 platform_data/.openclaw/exec-approvals.json create mode 100644 platform_data/config.json diff --git a/Dockerfile b/Dockerfile index b17c947..cb908bf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,10 +5,10 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] # Build arguments are grouped by platform version, bundled tools, and mirrors. ARG TARGETARCH ARG NODE_VERSION=24.14.1 -ARG OPENCLAW_VERSION=2026.4.15 -ARG IMAGE_VERSION=dev +ARG OPENCLAW_VERSION=2026.5.6 +ARG IMAGE_VERSION=20260508.02 ARG QMD_VERSION=2.1.0 -ARG MQTT_CHANNEL_VERSION=2.2.0 +ARG MQTT_CHANNEL_VERSION=2.3.1 ARG MQTT_BASH_EXEC_CHANNEL_VERSION=0.2.0 ARG HZG_CLI_VERSION=2.0.0 ARG CHROME_FOR_TESTING_VERSION=147.0.7727.57 @@ -259,6 +259,8 @@ RUN chown -R platform:platform ${PLATFORM_BUNDLED_QMD_CACHE_ROOT} # Layer 7: install the default MQTT channel into a build-only OpenClaw state dir, # then copy the resolved plugin payload into a read-only bundled path. +# openclaw plugins install resolves the package payload but does not materialize +# its runtime npm dependencies under that copied folder, so install them here. RUN build_state_dir=/tmp/openclaw-build-state \ && build_home_dir=/tmp/openclaw-build-home \ && install_log=/tmp/mqtt-channel-install.log \ @@ -275,8 +277,24 @@ RUN build_state_dir=/tmp/openclaw-build-state \ fi; \ exit "${install_rc}"; \ fi \ - && test -d "${build_state_dir}/extensions/mqtt-channel" \ - && cp -a "${build_state_dir}/extensions/mqtt-channel" "${OPENCLAW_BUNDLED_PLUGIN_ROOT}/mqtt-channel" \ + && plugin_manifest="$(grep -Rsl --include package.json '"name"[[:space:]]*:[[:space:]]*"@kadbbz/mqtt-channel"' "${build_state_dir}" "${build_home_dir}" 2>/dev/null | head -n 1)" \ + && if [ -z "${plugin_manifest}" ]; then \ + echo "Failed to locate installed mqtt-channel plugin payload after openclaw plugins install." >&2; \ + echo "Searched under: ${build_state_dir} ${build_home_dir}" >&2; \ + find "${build_state_dir}" "${build_home_dir}" -maxdepth 6 \( -type d -name '*mqtt*' -o -name package.json \) -print 2>/dev/null | sort >&2; \ + exit 1; \ + fi \ + && plugin_dir="$(dirname "${plugin_manifest}")" \ + && bundled_plugin_dir="${OPENCLAW_BUNDLED_PLUGIN_ROOT}/mqtt-channel" \ + && cp -a "${plugin_dir}" "${bundled_plugin_dir}" \ + && mapfile -t runtime_deps < <(jq -r '(.dependencies // {}) | to_entries[] | "\(.key)@\(.value)"' "${bundled_plugin_dir}/package.json") \ + && if [ "${#runtime_deps[@]}" -gt 0 ]; then \ + npm install --prefix "${bundled_plugin_dir}" --no-save --ignore-scripts --no-package-lock "${runtime_deps[@]}"; \ + fi \ + && mkdir -p "${bundled_plugin_dir}/node_modules" \ + && ln -sfn /usr/local/lib/node_modules/openclaw "${bundled_plugin_dir}/node_modules/openclaw" \ + && test -f "${bundled_plugin_dir}/node_modules/zod/package.json" \ + && test -f "${bundled_plugin_dir}/node_modules/mqtt/package.json" \ && chown -R root:root "${OPENCLAW_BUNDLED_PLUGIN_ROOT}/mqtt-channel" \ && chmod -R a=rX,u+w "${OPENCLAW_BUNDLED_PLUGIN_ROOT}/mqtt-channel" \ && rm -rf "${build_state_dir}" "${build_home_dir}" "${install_log}" diff --git a/config.json b/config.json index a32b61a..76704ec 100644 --- a/config.json +++ b/config.json @@ -2,6 +2,9 @@ "agents": { "defaults": { "workspace": "/var/platform_data/openclaw/workspaces/default", + "sandbox": { + "mode": "off" + }, "model": { "primary": "corp-openai/qwen3.5-plus" }, diff --git a/platform_data/.openclaw/exec-approvals.json b/platform_data/.openclaw/exec-approvals.json new file mode 100644 index 0000000..90b3f33 --- /dev/null +++ b/platform_data/.openclaw/exec-approvals.json @@ -0,0 +1,8 @@ +{ + "version": 1, + "defaults": { + "security": "full", + "ask": "off", + "askFallback": "full" + } +} diff --git a/platform_data/config.json b/platform_data/config.json new file mode 100644 index 0000000..76704ec --- /dev/null +++ b/platform_data/config.json @@ -0,0 +1,68 @@ +{ + "agents": { + "defaults": { + "workspace": "/var/platform_data/openclaw/workspaces/default", + "sandbox": { + "mode": "off" + }, + "model": { + "primary": "corp-openai/qwen3.5-plus" + }, + "models": { + "corp-openai/qwen3.5-plus": { + "alias": "qwen3.5-plus" + } + } + } + }, + "gateway": { + "mode": "local" + }, + "memory": { + "backend": "qmd" + }, + "models": { + "mode": "merge", + "providers": { + "corp-openai": { + "api": "openai-completions", + "baseUrl": "https://dashscope.aliyuncs.com/compatible-mode/v1", + "apiKey": { + "source": "env", + "provider": "default", + "id": "OC_OPENAI_API_KEY" + }, + "models": [ + { + "id": "qwen3.5-plus", + "name": "Qwen 3.5 Plus", + "contextTokens": 1000000 + } + ] + } + } + }, + "plugins": { + "entries": { + "mqtt-channel": { + "enabled": true + } + } + }, + "secrets": { + "providers": { + "default": { + "source": "env" + } + } + }, + "session": { + "dmScope": "per-channel-peer" + }, + "tools": { + "exec": { + "ask": "off", + "security": "full" + } + } +} diff --git a/platform_home/scripts/docker-entrypoint.sh b/platform_home/scripts/docker-entrypoint.sh index fa2fe66..6368e8b 100644 --- a/platform_home/scripts/docker-entrypoint.sh +++ b/platform_home/scripts/docker-entrypoint.sh @@ -111,6 +111,269 @@ start_bash_exec_channel_supervisor() { bash_exec_channel_supervisor_pid="$!" } +wait_for_gateway_health_ready() { + local timeout_seconds="${1:-90}" + local elapsed=0 + + while (( elapsed < timeout_seconds )); do + if is_gateway_running; then + return 0 + fi + + if [[ -n "${gateway_supervisor_child_pid}" ]] && ! kill -0 "${gateway_supervisor_child_pid}" 2>/dev/null; then + return 1 + fi + + sleep 1 + elapsed=$((elapsed + 1)) + done + + return 1 +} + +provision_local_cli_operator_device_auth() { + local probe_output_file="${1:-}" + local rc=0 + + PLATFORM_LOCAL_CLI_RUNTIME_ROOT="${platform_runtime_config_dir}" \ + PLATFORM_LOCAL_CLI_PROBE_JSON="${probe_output_file}" \ + python3 - <<'PY' +from __future__ import annotations + +import json +import os +import secrets +import sys +import time +from pathlib import Path + + +RUNTIME_ROOT = Path(os.environ["PLATFORM_LOCAL_CLI_RUNTIME_ROOT"]) +PROBE_JSON = os.environ.get("PLATFORM_LOCAL_CLI_PROBE_JSON", "") +DESIRED_SCOPES = [ + "operator.read", + "operator.write", + "operator.admin", + "operator.pairing", + "operator.approvals", + "operator.talk.secrets", +] + +device_json_path = RUNTIME_ROOT / "identity" / "device.json" +device_auth_path = RUNTIME_ROOT / "identity" / "device-auth.json" +paired_path = RUNTIME_ROOT / "devices" / "paired.json" +pending_path = RUNTIME_ROOT / "devices" / "pending.json" + + +def load_json(path: Path, default: object) -> object: + if not path.exists(): + return default + try: + with path.open("r", encoding="utf-8") as handle: + return json.load(handle) + except Exception: + return default + + +def write_json(path: Path, payload: object) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + + +def unique_scopes(scopes: list[str]) -> list[str]: + seen: set[str] = set() + ordered: list[str] = [] + for scope in scopes: + if scope and scope not in seen: + seen.add(scope) + ordered.append(scope) + return ordered + + +device_identity = load_json(device_json_path, {}) +probe_payload = load_json(Path(PROBE_JSON), {}) if PROBE_JSON else {} +paired_payload = load_json(paired_path, {}) +pending_payload = load_json(pending_path, {}) +device_auth_payload = load_json(device_auth_path, {}) + +if not isinstance(device_identity, dict): + device_identity = {} +if not isinstance(probe_payload, dict): + probe_payload = {} +if not isinstance(paired_payload, dict): + paired_payload = {} +if not isinstance(pending_payload, dict): + pending_payload = {} +if not isinstance(device_auth_payload, dict): + device_auth_payload = {} + +device_id = str(device_auth_payload.get("deviceId") or device_identity.get("deviceId") or "").strip() +paired_entry = paired_payload.get(device_id, {}) if device_id else {} +auth_operator = ((device_auth_payload.get("tokens") or {}).get("operator") or {}) if isinstance(device_auth_payload, dict) else {} +paired_operator = ((paired_entry.get("tokens") or {}).get("operator") or {}) if isinstance(paired_entry, dict) else {} + +current_auth_scopes = auth_operator.get("scopes") if isinstance(auth_operator.get("scopes"), list) else [] +current_paired_scopes = paired_entry.get("approvedScopes") if isinstance(paired_entry.get("approvedScopes"), list) else [] + +if device_id and all(scope in current_auth_scopes for scope in DESIRED_SCOPES) and all(scope in current_paired_scopes for scope in DESIRED_SCOPES): + raise SystemExit(10) + +probe_pending = probe_payload.get("pending") if isinstance(probe_payload, dict) else None +request: dict[str, object] | None = None +if isinstance(probe_pending, list): + candidates = [entry for entry in probe_pending if isinstance(entry, dict) and (entry.get("clientId") or "") == "cli" and (entry.get("clientMode") or "") == "cli"] + if device_id: + matching = [entry for entry in candidates if (entry.get("deviceId") or "") == device_id] + if matching: + request = matching[0] + if request is None and candidates: + request = candidates[0] + +if not device_id and request is not None: + device_id = str(request.get("deviceId") or "").strip() + +if not device_id: + raise SystemExit(11) + +if not isinstance(paired_entry, dict): + paired_entry = {} + +public_key = str( + paired_entry.get("publicKey") + or (request or {}).get("publicKey") + or "" +).strip() +platform_name = str( + paired_entry.get("platform") + or (request or {}).get("platform") + or "linux" +).strip() or "linux" +client_id = str( + paired_entry.get("clientId") + or (request or {}).get("clientId") + or "cli" +).strip() or "cli" +client_mode = str( + paired_entry.get("clientMode") + or (request or {}).get("clientMode") + or "cli" +).strip() or "cli" + +if not public_key: + raise SystemExit(11) + +token = str( + auth_operator.get("token") + or paired_operator.get("token") + or secrets.token_urlsafe(32) +).strip() +if not token: + token = secrets.token_urlsafe(32) + +now_ms = int(time.time() * 1000) +scopes = unique_scopes(DESIRED_SCOPES) + +paired_tokens = paired_entry.get("tokens") +if not isinstance(paired_tokens, dict): + paired_tokens = {} +paired_operator = paired_tokens.get("operator") +if not isinstance(paired_operator, dict): + paired_operator = {} + +paired_tokens["operator"] = { + **paired_operator, + "token": token, + "role": "operator", + "scopes": scopes, + "createdAtMs": int(paired_operator.get("createdAtMs") or now_ms), +} + +paired_payload[device_id] = { + **paired_entry, + "deviceId": device_id, + "publicKey": public_key, + "platform": platform_name, + "clientId": client_id, + "clientMode": client_mode, + "role": "operator", + "roles": ["operator"], + "scopes": scopes, + "approvedScopes": scopes, + "tokens": paired_tokens, + "createdAtMs": int(paired_entry.get("createdAtMs") or now_ms), + "approvedAtMs": now_ms, +} + +device_auth_tokens = device_auth_payload.get("tokens") +if not isinstance(device_auth_tokens, dict): + device_auth_tokens = {} +device_auth_operator = device_auth_tokens.get("operator") +if not isinstance(device_auth_operator, dict): + device_auth_operator = {} + +device_auth_tokens["operator"] = { + **device_auth_operator, + "token": token, + "role": "operator", + "scopes": scopes, + "updatedAtMs": now_ms, +} + +device_auth_payload = { + **device_auth_payload, + "version": int(device_auth_payload.get("version") or 1), + "deviceId": device_id, + "tokens": device_auth_tokens, +} + +if isinstance(pending_payload, dict) and pending_payload: + pending_payload = { + key: value + for key, value in pending_payload.items() + if not (isinstance(value, dict) and str(value.get("deviceId") or "").strip() == device_id) + } + +write_json(paired_path, paired_payload) +write_json(device_auth_path, device_auth_payload) +write_json(pending_path, pending_payload if isinstance(pending_payload, dict) else {}) +print(device_id) +PY + rc=$? + + return "${rc}" +} + +ensure_local_cli_operator_device_auth_ready() { + local probe_output_file="" + local rc=0 + + if provision_local_cli_operator_device_auth; then + return 0 + fi + rc=$? + if [[ "${rc}" -eq 10 ]]; then + return 1 + fi + + probe_output_file="$(mktemp)" + set +e + openclaw devices list --json > "${probe_output_file}" 2>/dev/null + set -e + + if provision_local_cli_operator_device_auth "${probe_output_file}"; then + rm -f "${probe_output_file}" + return 0 + fi + rc=$? + rm -f "${probe_output_file}" + + if [[ "${rc}" -eq 10 || "${rc}" -eq 11 ]]; then + return 1 + fi + + return 1 +} + stop_bash_exec_channel_supervisor() { if [[ -n "${bash_exec_channel_supervisor_pid}" ]]; then kill -TERM "${bash_exec_channel_supervisor_pid}" 2>/dev/null || true @@ -134,6 +397,14 @@ run_foreground_gateway_supervisor() { gateway_supervisor_child_pid="$!" printf '%s\n' "${gateway_supervisor_child_pid}" > "${gateway_supervisor_child_pid_file}" + if wait_for_gateway_health_ready 90; then + if ensure_local_cli_operator_device_auth_ready; then + printf '[%s] Provisioned local CLI operator device scopes; restarting gateway.\n' "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" >> "${logs_root}/gateway.log" + touch "${gateway_restart_request_file}" + signal_gateway_supervisor_child + fi + fi + set +e wait "${gateway_supervisor_child_pid}" child_exit_code=$? diff --git a/platform_home/scripts/doctor.sh b/platform_home/scripts/doctor.sh index fc06b0f..efcb674 100644 --- a/platform_home/scripts/doctor.sh +++ b/platform_home/scripts/doctor.sh @@ -102,6 +102,7 @@ main() { check_path_exists "${agents_template_file}" "AGENTS template" check_path_exists "${soul_template_file}" "SOUL template" check_path_exists "${user_template_file}" "USER template" + check_path_exists "${HOME}/.openclaw/exec-approvals.json" "exec approvals file" check_path_readable "${env_json}" "env.json" check_path_readable "${platform_release_manifest_path}" "platform release manifest" @@ -128,6 +129,18 @@ main() { else warn "config.json missing agents.defaults.workspace" fi + + if jq -e '.tools.exec.security == "full"' "${config_json}" >/dev/null; then + pass "config.json sets tools.exec.security to full" + else + warn "config.json should set tools.exec.security to full" + fi + + if jq -e '.tools.exec.ask == "off"' "${config_json}" >/dev/null; then + pass "config.json sets tools.exec.ask to off" + else + warn "config.json should set tools.exec.ask to off" + fi fi if [[ -r "${env_json}" ]]; then @@ -224,6 +237,26 @@ main() { else warn "runtime config does not enable QMD memory backend" fi + if jq -e '.agents.defaults.sandbox.mode == "off"' "${platform_runtime_config_path}" >/dev/null; then + pass "runtime config disables agent sandbox by default" + else + warn "runtime config should set agents.defaults.sandbox.mode to off" + fi + if jq -e '.tools.exec.security == "full"' "${platform_runtime_config_path}" >/dev/null; then + pass "runtime config sets tools.exec.security to full" + else + warn "runtime config should set tools.exec.security to full" + fi + if jq -e '.tools.exec.ask == "off"' "${platform_runtime_config_path}" >/dev/null; then + pass "runtime config sets tools.exec.ask to off" + else + warn "runtime config should set tools.exec.ask to off" + fi + if jq -e '.agents.defaults.sandbox.mode == "off"' "${platform_runtime_config_path}" >/dev/null; then + pass "runtime config disables agent sandbox by default" + else + warn "runtime config should set agents.defaults.sandbox.mode to off" + fi if run_openclaw_cli config validate >/dev/null 2>&1; then pass "openclaw config validate" else @@ -233,6 +266,29 @@ main() { warn "runtime config missing: ${platform_runtime_config_path}" fi + if [[ -r "${HOME}/.openclaw/exec-approvals.json" ]]; then + if jq -e '.version == 1' "${HOME}/.openclaw/exec-approvals.json" >/dev/null; then + pass "exec approvals file version is 1" + else + warn "exec approvals file version should be 1" + fi + if jq -e '.defaults.security == "full"' "${HOME}/.openclaw/exec-approvals.json" >/dev/null; then + pass "exec approvals defaults.security is full" + else + warn "exec approvals defaults.security should be full" + fi + if jq -e '.defaults.ask == "off"' "${HOME}/.openclaw/exec-approvals.json" >/dev/null; then + pass "exec approvals defaults.ask is off" + else + warn "exec approvals defaults.ask should be off" + fi + if jq -e '.defaults.askFallback == "full"' "${HOME}/.openclaw/exec-approvals.json" >/dev/null; then + pass "exec approvals defaults.askFallback is full" + else + warn "exec approvals defaults.askFallback should be full" + fi + fi + if [[ -r "${platform_version_state_file}" ]]; then persisted_version="$(read_release_version_from_manifest "${platform_version_state_file}")" if [[ -n "${persisted_version}" ]]; then diff --git a/platform_home/scripts/platform-common.sh b/platform_home/scripts/platform-common.sh index 29ef0e1..3fa166c 100644 --- a/platform_home/scripts/platform-common.sh +++ b/platform_home/scripts/platform-common.sh @@ -29,6 +29,7 @@ init_runtime_context() { platform_runtime_user="${PLATFORM_RUNTIME_USER:-platform}" platform_runtime_group="${PLATFORM_RUNTIME_GROUP:-${platform_runtime_user}}" platform_runtime_config_dir="${platform_runtime_state_dir}" + agent_browser_state_dir="${AGENT_BROWSER_HOME:-${platform_data_root}/.agent-browser}" qmd_shared_cache_home="${PLATFORM_QMD_SHARED_CACHE_HOME:-${platform_data_root}/.cache}" bundled_qmd_models_dir="${PLATFORM_BUNDLED_QMD_MODELS_DIR:-${platform_bundled_qmd_cache_root}/qmd/models}" shared_qmd_models_dir="${PLATFORM_QMD_SHARED_MODELS_DIR:-${qmd_shared_cache_home}/qmd/models}" @@ -135,7 +136,7 @@ merge_bundled_platform_data_skeleton() { [[ -d "${source_root}" ]] || return 0 - for source_path in "${source_root}"/*; do + for source_path in "${source_root}"/* "${source_root}"/.[!.]* "${source_root}"/..?*; do [[ -e "${source_path}" ]] || continue entry_name="$(basename "${source_path}")" @@ -483,6 +484,7 @@ ensure_platform_runtime_permissions() { "${ontology_root}" \ "${workspaces_root}" \ "${logs_root}" \ + "${agent_browser_state_dir}" \ "${npm_global_prefix}/bin" \ "${pip_target_bin_dir}" \ "${platform_runtime_config_dir}" @@ -497,6 +499,7 @@ ensure_platform_runtime_permissions() { ensure_directory_mode "${platform_home}" 755 644 ensure_directory_mode "${platform_data_root}" 755 644 ensure_directory_mode "${platform_runtime_config_dir}" 755 644 + ensure_directory_mode "${agent_browser_state_dir}" 755 644 ensure_directory_mode "${npm_global_prefix}" 755 644 ensure_directory_mode "${pip_target_dir}" 755 644 @@ -677,14 +680,37 @@ sync_runtime_config_from_default_template() { [[ -r "${platform_runtime_config_path}" ]] || return 0 [[ -r "${config_json}" ]] || return 0 + local bundled_default_config="${bundled_files_root}/platform_data/config.json" if ! jq -e 'type == "object"' "${config_json}" >/dev/null; then echo "Skipping runtime config sync because ${config_json} is not a JSON object." >&2 return 0 fi + if [[ ! -r "${bundled_default_config}" ]] || ! jq -e 'type == "object"' "${bundled_default_config}" >/dev/null; then + echo "Skipping runtime config sync because bundled default config is missing or invalid: ${bundled_default_config}" >&2 + return 0 + fi + if update_runtime_config_with_jq_filter ' - ($default_config[0] // {}) as $template + def deepmerge($base; $overlay): + if ($base | type) == "object" and ($overlay | type) == "object" then + reduce ((($base | keys_unsorted) + ($overlay | keys_unsorted) | unique)[]) as $key + ({}; + .[$key] = if ($base | has($key)) and ($overlay | has($key)) then + deepmerge($base[$key]; $overlay[$key]) + elif ($overlay | has($key)) then + $overlay[$key] + else + $base[$key] + end + ) + else + $overlay + end; + (($bundled_default_config[0] // {}) | if type == "object" then . else {} end) as $bundled + | (($default_config[0] // {}) | if type == "object" then . else {} end) as $user + | deepmerge($bundled; $user) as $template | .agents = ( (.agents // {}) + ( @@ -725,7 +751,7 @@ sync_runtime_config_from_default_template() { | .tools = ( if ($template | has("tools")) then $template.tools else .tools end ) - ' --slurpfile default_config "${config_json}"; then + ' --slurpfile default_config "${config_json}" --slurpfile bundled_default_config "${bundled_default_config}"; then rc=0 else rc=$? @@ -1448,7 +1474,7 @@ reload_runtime_state_from_disk() { ensure_openclaw_cli export PLATFORM_HOME="${platform_home}" - mkdir -p "${platform_runtime_config_dir}" "${workspaces_root}" "${logs_root}" "${platform_home}" + mkdir -p "${platform_runtime_config_dir}" "${workspaces_root}" "${logs_root}" "${platform_home}" "${agent_browser_state_dir}" ensure_platform_runtime_home_symlink install_runtime_env_shell_exports load_runtime_env_from_env_json diff --git a/test/docker_test_common.py b/test/docker_test_common.py index c923029..879a0f8 100644 --- a/test/docker_test_common.py +++ b/test/docker_test_common.py @@ -139,6 +139,9 @@ class TestRun: self.output_file.parent.mkdir(parents=True, exist_ok=True) self.output_file.write_text("\n".join(self.lines), encoding="utf-8") + def has_failures(self) -> bool: + return any(str(result["status"]) in {"FAIL", "TIMEOUT"} for result in self.results) + def invoke_test_step( test_run: TestRun, @@ -463,6 +466,28 @@ def config_sync_script() -> str: ) +def exec_policy_validation_script() -> str: + return "\n".join( + [ + "import json, sys", + "runtime = json.load(open('/var/platform_data/.openclaw/openclaw.json', 'r', encoding='utf-8'))", + "approvals = json.load(open('/var/platform_data/.openclaw/exec-approvals.json', 'r', encoding='utf-8'))", + "checks = {", + " 'runtimeSandboxOff': (((runtime.get('agents') or {}).get('defaults') or {}).get('sandbox') or {}).get('mode') == 'off',", + " 'runtimeExecSecurityFull': (((runtime.get('tools') or {}).get('exec') or {}).get('security') == 'full'),", + " 'runtimeExecAskOff': (((runtime.get('tools') or {}).get('exec') or {}).get('ask') == 'off'),", + " 'approvalsVersion1': approvals.get('version') == 1,", + " 'approvalsSecurityFull': ((approvals.get('defaults') or {}).get('security') == 'full'),", + " 'approvalsAskOff': ((approvals.get('defaults') or {}).get('ask') == 'off'),", + " 'approvalsAskFallbackFull': ((approvals.get('defaults') or {}).get('askFallback') == 'full'),", + "}", + "failed = [name for name, ok in checks.items() if not ok]", + "print(json.dumps({'checks': checks, 'failed': failed}, ensure_ascii=False, sort_keys=True))", + "sys.exit(1 if failed else 0)", + ] + ) + + def template_validation_script(agent_id: str) -> str: return "\n".join( [ @@ -1119,6 +1144,25 @@ def quick_phase(test_run: TestRun, phase_name: str, container: str, agent_id: st ), timeout_seconds=60, ) + invoke_test_step( + test_run, + f"{phase_name}_agent_browser_doctor", + docker_exec_args(container, "agent-browser", "doctor"), + timeout_seconds=90, + ) + invoke_test_step( + test_run, + f"{phase_name}_agent_browser_smoke", + docker_exec_args( + container, + "/bin/sh", + "-lc", + "agent-browser --session smoke open http://127.0.0.1:18789/healthz " + "&& agent-browser --session smoke get text body " + "&& agent-browser --session smoke close", + ), + timeout_seconds=120, + ) invoke_test_step(test_run, f"{phase_name}_doctor", docker_exec_args(container, "doctor"), timeout_seconds=90) invoke_test_step( @@ -1138,6 +1182,12 @@ def quick_phase(test_run: TestRun, phase_name: str, container: str, agent_id: st docker_exec_args(container, "python3", "-c", config_sync_script()), timeout_seconds=60, ) + invoke_test_step( + test_run, + f"{phase_name}_exec_policy_defaults", + docker_exec_args(container, "python3", "-c", exec_policy_validation_script()), + timeout_seconds=60, + ) invoke_test_step(test_run, f"{phase_name}_agents_add", docker_exec_args(container, "agents", "add", agent_id, "--no-restart"), timeout_seconds=120) validate_agent_list_step(test_run, f"{phase_name}_agents_list", container, agent_id, should_exist=True) @@ -1208,6 +1258,8 @@ def run_fast_test(data_path: str, container_name: str = "", image: str = "", out ) test_run.finalize() + if test_run.has_failures(): + raise RuntimeError(f"Docker fast regression test failed. See report: {resolved_output}") return resolved_output @@ -1311,4 +1363,6 @@ def run_full_test(data_path: str, container_name: str = "", image: str = "", out ) test_run.finalize() + if test_run.has_failures(): + raise RuntimeError(f"Docker full regression test failed. See report: {resolved_output}") return resolved_output -- Gitee From 21093dc3ebbcf81c9b712e07d3ca7fa9beabc292 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=81=E4=BC=9F?= <55093136@qq.com> Date: Sat, 9 May 2026 07:55:46 +0800 Subject: [PATCH 4/6] bugfix --- Dockerfile | 2 +- platform_home/scripts/doctor.sh | 1 + platform_home/scripts/platform-common.sh | 130 +++++++++-- platform_home/scripts/restart.sh | 11 +- test/docker_test_common.py | 277 +++++++++++++++++++---- 5 files changed, 362 insertions(+), 59 deletions(-) diff --git a/Dockerfile b/Dockerfile index cb908bf..9fd57c9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] ARG TARGETARCH ARG NODE_VERSION=24.14.1 ARG OPENCLAW_VERSION=2026.5.6 -ARG IMAGE_VERSION=20260508.02 +ARG IMAGE_VERSION=dev ARG QMD_VERSION=2.1.0 ARG MQTT_CHANNEL_VERSION=2.3.1 ARG MQTT_BASH_EXEC_CHANNEL_VERSION=0.2.0 diff --git a/platform_home/scripts/doctor.sh b/platform_home/scripts/doctor.sh index efcb674..e20f9a6 100644 --- a/platform_home/scripts/doctor.sh +++ b/platform_home/scripts/doctor.sh @@ -92,6 +92,7 @@ main() { init_runtime_context reexec_as_platform_user_if_needed "$@" + ensure_exec_approvals_defaults check_path_exists "${platform_home}" "platform_home" check_path_exists "${platform_data_root}" "platform_data" diff --git a/platform_home/scripts/platform-common.sh b/platform_home/scripts/platform-common.sh index 3fa166c..cfaaf4f 100644 --- a/platform_home/scripts/platform-common.sh +++ b/platform_home/scripts/platform-common.sh @@ -549,6 +549,55 @@ update_runtime_config_with_jq_filter() { return 0 } +ensure_exec_approvals_defaults() { + local approvals_path tmp_file + + approvals_path="${platform_runtime_config_dir}/exec-approvals.json" + mkdir -p "$(dirname "${approvals_path}")" + + if [[ ! -r "${approvals_path}" ]]; then + jq -n ' + { + version: 1, + defaults: { + security: "full", + ask: "off", + askFallback: "full" + } + } + ' > "${approvals_path}" + echo "Initialized persisted exec approvals defaults at ${approvals_path}." >&2 + return 0 + fi + + tmp_file="$(mktemp)" + if ! jq ' + if type == "object" then . else {} end + | .version = ( + if (.version | type) == "number" then .version else 1 end + ) + | .defaults = ( + (.defaults // {}) + | if type == "object" then . else {} end + | .security = "full" + | .ask = "off" + | .askFallback = "full" + ) + ' "${approvals_path}" > "${tmp_file}"; then + rm -f "${tmp_file}" + echo "Failed to normalize exec approvals defaults: ${approvals_path}" >&2 + return 1 + fi + + if ! cmp -s "${approvals_path}" "${tmp_file}"; then + mv "${tmp_file}" "${approvals_path}" + echo "Normalized persisted exec approvals defaults at ${approvals_path}." >&2 + return 0 + fi + + rm -f "${tmp_file}" +} + ensure_bundled_mqtt_channel_plugin_path() { local bundled_plugin_path local rc @@ -1141,17 +1190,17 @@ write_workspace_managed_files() { agents_target="${workspace_path}/AGENTS.md" soul_target="${workspace_path}/SOUL.md" user_target="${workspace_path}/USER.md" - agents_tmp="$(mktemp)" - soul_tmp="$(mktemp)" - user_tmp="$(mktemp)" + agents_tmp="$(mktemp "${workspace_path}/.AGENTS.md.tmp.XXXXXX")" + soul_tmp="$(mktemp "${workspace_path}/.SOUL.md.tmp.XXXXXX")" + user_tmp="$(mktemp "${workspace_path}/.USER.md.tmp.XXXXXX")" read_agents_template > "${agents_tmp}" read_soul_template > "${soul_tmp}" read_user_template > "${user_tmp}" - mv "${agents_tmp}" "${agents_target}" - mv "${soul_tmp}" "${soul_target}" - mv "${user_tmp}" "${user_target}" + mv -f "${agents_tmp}" "${agents_target}" + mv -f "${soul_tmp}" "${soul_target}" + mv -f "${user_tmp}" "${user_target}" } sync_all_runtime_workspace_managed_files() { @@ -1481,6 +1530,7 @@ reload_runtime_state_from_disk() { export_openclaw_runtime_env install_s3_cli_config_from_env + ensure_exec_approvals_defaults ensure_runtime_config_initialized migrate_persisted_runtime_config sync_managed_agents_runtime_state_from_env @@ -1516,16 +1566,8 @@ request_foreground_gateway_restart() { mkdir -p "$(dirname "${gateway_restart_request_file}")" : > "${gateway_restart_request_file}" - [[ -r "${gateway_supervisor_child_pid_file}" ]] || { - echo "Foreground gateway supervisor pid file missing: ${gateway_supervisor_child_pid_file}" >&2 - return 1 - } - - current_child_pid="$(tr -d '[:space:]' < "${gateway_supervisor_child_pid_file}")" - [[ "${current_child_pid}" =~ ^[0-9]+$ ]] || { - echo "Invalid foreground gateway child pid: ${current_child_pid}" >&2 - return 1 - } + current_child_pid="$(read_foreground_gateway_child_pid)" + [[ "${current_child_pid}" =~ ^[0-9]+$ ]] || return 1 kill -0 "${current_child_pid}" 2>/dev/null || { echo "Foreground gateway child is not running: ${current_child_pid}" >&2 @@ -1566,6 +1608,62 @@ request_foreground_gateway_restart() { return 1 } +discover_foreground_gateway_child_pid() { + local supervisor_pid="" + local child_pid="" + + supervisor_pid="$( + ps -eo pid=,command= \ + | awk ' + /\/usr\/local\/bin\/docker-entrypoint\.sh openclaw gateway (run|start)( |$)/ && $2 ~ /bash|sh/ { + print $1 + exit + } + ' + )" + + [[ "${supervisor_pid}" =~ ^[0-9]+$ ]] || return 1 + + child_pid="$( + ps -eo pid=,ppid=,command= \ + | awk -v supervisor_pid="${supervisor_pid}" ' + $2 == supervisor_pid && $3 != "tee" { + print $1 + exit + } + ' + )" + + [[ "${child_pid}" =~ ^[0-9]+$ ]] || return 1 + printf '%s' "${child_pid}" +} + +read_foreground_gateway_child_pid() { + local current_child_pid="" + + if [[ -r "${gateway_supervisor_child_pid_file}" ]]; then + current_child_pid="$(tr -d '[:space:]' < "${gateway_supervisor_child_pid_file}")" + if [[ "${current_child_pid}" =~ ^[0-9]+$ ]]; then + printf '%s' "${current_child_pid}" + return 0 + fi + fi + + current_child_pid="$(discover_foreground_gateway_child_pid || true)" + if [[ "${current_child_pid}" =~ ^[0-9]+$ ]]; then + printf '%s\n' "${current_child_pid}" > "${gateway_supervisor_child_pid_file}" + printf '%s' "${current_child_pid}" + return 0 + fi + + if [[ -r "${gateway_supervisor_child_pid_file}" ]]; then + echo "Invalid foreground gateway child pid: ${gateway_supervisor_child_pid_file}" >&2 + else + echo "Foreground gateway supervisor pid file missing: ${gateway_supervisor_child_pid_file}" >&2 + fi + return 1 +} + restart_gateway_if_running() { if is_gateway_running; then if [[ -f "/.dockerenv" ]]; then diff --git a/platform_home/scripts/restart.sh b/platform_home/scripts/restart.sh index a6b8a0c..694e8bb 100644 --- a/platform_home/scripts/restart.sh +++ b/platform_home/scripts/restart.sh @@ -3,14 +3,21 @@ set -euo pipefail source /usr/local/lib/platform/common.sh init_runtime_context -reexec_as_platform_user_if_needed "$@" -export_openclaw_runtime_env if [[ $# -gt 0 ]]; then echo "Usage: restart" >&2 exit 1 fi +if [[ -f "/.dockerenv" && "$(id -u)" -eq 0 ]]; then + nohup /bin/sh -c 'sleep 1; kill -TERM 1' >/dev/null 2>&1 & + echo "Gateway restart scheduled." + exit 0 +fi + +reexec_as_platform_user_if_needed "$@" +export_openclaw_runtime_env + if request_foreground_gateway_restart; then echo "Gateway restarted." exit 0 diff --git a/test/docker_test_common.py b/test/docker_test_common.py index 879a0f8..498bd68 100644 --- a/test/docker_test_common.py +++ b/test/docker_test_common.py @@ -175,6 +175,70 @@ def synthetic_result(command_line: str, stdout: str = "", stderr: str = "", exit ) +def pass_result_with_notes(test_run: TestRun, name: str, command_line: str, notes: str, stdout: str = "") -> CommandResult: + result = synthetic_result(command_line=command_line, stdout=stdout, exit_code=0, duration_ms=0) + test_run.add_result(name, "PASS", result, notes=notes) + return result + + +def read_json_map(path: Path) -> dict[str, object]: + data = json.loads(path.read_text(encoding="utf-8")) + if not isinstance(data, dict): + raise ValueError(f"Expected JSON object in {path}") + return data + + +def env_has_keys(env_data: dict[str, object], keys: Sequence[str]) -> bool: + return all(str(env_data.get(key, "")).strip() for key in keys) + + +def bash_exec_channel_is_configured(data_path: Path) -> bool: + env_path = data_path / "env.json" + if not env_path.exists(): + return False + try: + env_data = read_json_map(env_path) + except Exception: + return False + return env_has_keys( + env_data, + ( + "BASH_EXEC_CHANNEL_MQTT_BROKER_URL", + "BASH_EXEC_CHANNEL_MQTT_USERNAME", + "BASH_EXEC_CHANNEL_MQTT_PASSWORD", + "BASH_EXEC_CHANNEL_REQUEST_TOPIC", + "BASH_EXEC_CHANNEL_RESPONSE_TOPIC", + ), + ) + + +def container_architecture(container: str) -> str: + result = run_command(docker_exec_args(container, "uname", "-m"), timeout_seconds=30) + if result.exit_code != 0: + return "" + return (result.stdout or "").strip().lower() + + +def is_arm64_container(container: str) -> bool: + return container_architecture(container) in {"aarch64", "arm64"} + + +def configure_test_mqtt_tls(client: object, scheme: str, diagnostics: dict[str, object]) -> None: + if scheme != "mqtts": + diagnostics["tlsMode"] = "disabled" + return + + # Host-side regression tests run against customer-managed brokers whose CA + # chain is not always installed in the local Python runtime. Allow the + # roundtrip check to validate the application protocol without coupling it + # to host trust-store setup. + tls_context = ssl.create_default_context() + tls_context.check_hostname = False + tls_context.verify_mode = ssl.CERT_NONE + client.tls_set_context(tls_context) + diagnostics["tlsMode"] = "insecure-test-helper" + + def ensure_output_file(output_file: str, prefix: str, container_name: str) -> Path: if output_file: path = Path(output_file) @@ -488,6 +552,26 @@ def exec_policy_validation_script() -> str: ) +def exec_policy_corruption_script() -> str: + return "\n".join( + [ + "import json", + "payload = {", + " 'version': 1,", + " 'defaults': {", + " 'security': 'restricted',", + " 'ask': 'on',", + " 'askFallback': 'off',", + " },", + "}", + "with open('/var/platform_data/.openclaw/exec-approvals.json', 'w', encoding='utf-8') as handle:", + " json.dump(payload, handle, ensure_ascii=False, indent=2)", + " handle.write('\\n')", + "print(json.dumps(payload, ensure_ascii=False, sort_keys=True))", + ] + ) + + def template_validation_script(agent_id: str) -> str: return "\n".join( [ @@ -599,6 +683,55 @@ def delete_managed_agent_step( return result +def add_managed_agent_step( + test_run: TestRun, + name: str, + container: str, + agent_id: str, + timeout_seconds: int = 120, +) -> CommandResult: + result = run_command(docker_exec_args(container, "agents", "add", agent_id, "--no-restart"), timeout_seconds=timeout_seconds) + if result.timed_out: + status = "TIMEOUT" + notes = "" + elif result.exit_code == 0: + status = "PASS" + notes = "" + elif result.exit_code == 137: + verify_result = run_command(docker_exec_args(container, "agents", "list"), timeout_seconds=60) + if verify_result.exit_code == 0 and test_agent_listed(verify_result.stdout, agent_id): + inject_result = run_command(docker_exec_args(container, "agents", "inject", agent_id, "--no-restart"), timeout_seconds=60) + if inject_result.exit_code != 0: + status = "FAIL" + notes = ( + "agents add exited 137 and the follow-up template injection failed. " + f"Injection command: {inject_result.command_line}\n" + f"Injection stdout:\n{inject_result.stdout or '(empty)'}\n" + f"Injection stderr:\n{inject_result.stderr or '(empty)'}" + ) + test_run.add_result(name, status, result, notes=notes) + return result + status = "PASS" + notes = ( + "agents add was interrupted after config changes were applied; " + f"verified agent presence with: {verify_result.command_line} and refreshed templates with: {inject_result.command_line}" + ) + else: + status = "FAIL" + notes = ( + "agents add exited 137 and agent creation could not be verified. " + f"Verification command: {verify_result.command_line}\n" + f"Verification stdout:\n{verify_result.stdout or '(empty)'}\n" + f"Verification stderr:\n{verify_result.stderr or '(empty)'}" + ) + else: + status = "FAIL" + notes = "" + + test_run.add_result(name, status, result, notes=notes) + return result + + def mqtt_roundtrip_step( test_run: TestRun, name: str, @@ -740,8 +873,7 @@ def mqtt_roundtrip_step( else: client = paho_mqtt.Client(client_id=f"mqtt-test-{agent_id}-{int(time.time())}", clean_session=True) client.username_pw_set(username, password) - if scheme == "mqtts": - client.tls_set_context(ssl.create_default_context()) + configure_test_mqtt_tls(client, scheme, diagnostics) def settle(status: str, error: str = "", matched_message: Optional[dict[str, object]] = None) -> None: with lock: @@ -918,6 +1050,24 @@ def bash_exec_roundtrip_step( test_run.add_result(name, "FAIL", result) return result + if not env_has_keys( + env_data, + ( + "BASH_EXEC_CHANNEL_MQTT_BROKER_URL", + "BASH_EXEC_CHANNEL_MQTT_USERNAME", + "BASH_EXEC_CHANNEL_MQTT_PASSWORD", + "BASH_EXEC_CHANNEL_REQUEST_TOPIC", + "BASH_EXEC_CHANNEL_RESPONSE_TOPIC", + ), + ): + return pass_result_with_notes( + test_run, + name, + command_line, + notes="Skipped because env.json does not configure BASH_EXEC_CHANNEL_*.", + stdout=json.dumps({"configured": False}, ensure_ascii=False, sort_keys=True), + ) + broker_url = str(env_data.get("BASH_EXEC_CHANNEL_MQTT_BROKER_URL", "")).strip() username = str(env_data.get("BASH_EXEC_CHANNEL_MQTT_USERNAME", "")).strip() password = str(env_data.get("BASH_EXEC_CHANNEL_MQTT_PASSWORD", "")) @@ -989,8 +1139,7 @@ def bash_exec_roundtrip_step( else: client = paho_mqtt.Client(client_id=f"bash-exec-test-{int(time.time())}", clean_session=True) client.username_pw_set(username, password) - if scheme == "mqtts": - client.tls_set_context(ssl.create_default_context()) + configure_test_mqtt_tls(client, scheme, diagnostics) def settle(status: str, error: str = "", matched_message: Optional[dict[str, object]] = None) -> None: with lock: @@ -1130,39 +1279,75 @@ def bash_exec_roundtrip_step( return result -def quick_phase(test_run: TestRun, phase_name: str, container: str, agent_id: str, gateway_timeout_seconds: int = 240) -> None: +def quick_phase( + test_run: TestRun, + phase_name: str, + container: str, + agent_id: str, + data_path: Path, + gateway_timeout_seconds: int = 240, +) -> None: + bash_exec_configured = bash_exec_channel_is_configured(data_path) + arm64_container = is_arm64_container(container) + wait_for_container_running(test_run, f"{phase_name}_container_running", container, timeout_seconds=120) wait_for_gateway_ready(test_run, f"{phase_name}_gateway_ready", container, timeout_seconds=gateway_timeout_seconds) - invoke_test_step( - test_run, - f"{phase_name}_bash_exec_channel_running", - docker_exec_args( - container, - "/bin/bash", - "-lc", - "command -v mqtt-bash-exec-channel >/dev/null && ps -ef | grep '[m]qtt-bash-exec-channel'", - ), - timeout_seconds=60, - ) - invoke_test_step( - test_run, - f"{phase_name}_agent_browser_doctor", - docker_exec_args(container, "agent-browser", "doctor"), - timeout_seconds=90, - ) - invoke_test_step( - test_run, - f"{phase_name}_agent_browser_smoke", - docker_exec_args( - container, - "/bin/sh", - "-lc", - "agent-browser --session smoke open http://127.0.0.1:18789/healthz " - "&& agent-browser --session smoke get text body " - "&& agent-browser --session smoke close", - ), - timeout_seconds=120, - ) + if bash_exec_configured: + invoke_test_step( + test_run, + f"{phase_name}_bash_exec_channel_running", + docker_exec_args( + container, + "/bin/bash", + "-lc", + "command -v mqtt-bash-exec-channel >/dev/null && pgrep -af '(^|/| )mqtt-bash-exec-channel($| )'", + ), + timeout_seconds=60, + ) + else: + pass_result_with_notes( + test_run, + f"{phase_name}_bash_exec_channel_running", + f"docker exec {container} /bin/bash -lc '...mqtt-bash-exec-channel...'", + notes="Skipped because env.json does not configure BASH_EXEC_CHANNEL_*.", + stdout=json.dumps({"configured": False}, ensure_ascii=False, sort_keys=True), + ) + + if arm64_container: + pass_result_with_notes( + test_run, + f"{phase_name}_agent_browser_doctor", + f"docker exec {container} agent-browser doctor", + notes="Skipped on arm64 because agent-browser Chrome smoke is not supported there.", + stdout=json.dumps({"architecture": "arm64", "skipped": True}, ensure_ascii=False, sort_keys=True), + ) + pass_result_with_notes( + test_run, + f"{phase_name}_agent_browser_smoke", + f"docker exec {container} /bin/sh -lc 'agent-browser --session smoke ...'", + notes="Skipped on arm64 because agent-browser Chrome smoke is not supported there.", + stdout=json.dumps({"architecture": "arm64", "skipped": True}, ensure_ascii=False, sort_keys=True), + ) + else: + invoke_test_step( + test_run, + f"{phase_name}_agent_browser_doctor", + docker_exec_args(container, "agent-browser", "doctor"), + timeout_seconds=90, + ) + invoke_test_step( + test_run, + f"{phase_name}_agent_browser_smoke", + docker_exec_args( + container, + "/bin/sh", + "-lc", + "agent-browser --session smoke open http://127.0.0.1:18789/healthz " + "&& agent-browser --session smoke get text body " + "&& agent-browser --session smoke close", + ), + timeout_seconds=120, + ) invoke_test_step(test_run, f"{phase_name}_doctor", docker_exec_args(container, "doctor"), timeout_seconds=90) invoke_test_step( @@ -1189,7 +1374,7 @@ def quick_phase(test_run: TestRun, phase_name: str, container: str, agent_id: st timeout_seconds=60, ) - invoke_test_step(test_run, f"{phase_name}_agents_add", docker_exec_args(container, "agents", "add", agent_id, "--no-restart"), timeout_seconds=120) + add_managed_agent_step(test_run, f"{phase_name}_agents_add", container, agent_id, timeout_seconds=120) validate_agent_list_step(test_run, f"{phase_name}_agents_list", container, agent_id, should_exist=True) invoke_test_step(test_run, f"{phase_name}_agents_info", docker_exec_args(container, "agents", "info", agent_id), timeout_seconds=60) invoke_test_step( @@ -1205,8 +1390,20 @@ def quick_phase(test_run: TestRun, phase_name: str, container: str, agent_id: st docker_exec_args(container, "/bin/bash", "-lc", template_validation_script(agent_id)), timeout_seconds=60, ) + invoke_test_step( + test_run, + f"{phase_name}_exec_policy_corruption_seed", + docker_exec_args(container, "python3", "-c", exec_policy_corruption_script()), + timeout_seconds=60, + ) invoke_test_step(test_run, f"{phase_name}_internal_restart", docker_exec_args(container, "restart"), timeout_seconds=120) wait_for_gateway_ready(test_run, f"{phase_name}_gateway_ready_after_internal_restart", container, timeout_seconds=gateway_timeout_seconds) + invoke_test_step( + test_run, + f"{phase_name}_exec_policy_defaults_after_restart", + docker_exec_args(container, "python3", "-c", exec_policy_validation_script()), + timeout_seconds=60, + ) invoke_test_step(test_run, f"{phase_name}_logs", docker_exec_args(container, "logs", "--limit", "20", "--plain"), timeout_seconds=60) delete_managed_agent_step(test_run, f"{phase_name}_agents_delete", container, agent_id, timeout_seconds=120) validate_agent_list_step(test_run, f"{phase_name}_agents_list_after_delete", container, agent_id, should_exist=False) @@ -1247,9 +1444,9 @@ def run_fast_test(data_path: str, container_name: str = "", image: str = "", out agent_two = f"{agent_one}_rerun" create_and_start_container(test_run, resolved_container, str(data_path_obj), resolved_image) - quick_phase(test_run, "phase1", resolved_container, agent_one) + quick_phase(test_run, "phase1", resolved_container, agent_one, data_path_obj) invoke_test_step(test_run, "docker_restart_before_phase2", ["docker", "restart", resolved_container], timeout_seconds=120) - quick_phase(test_run, "phase2", resolved_container, agent_two) + quick_phase(test_run, "phase2", resolved_container, agent_two, data_path_obj) invoke_test_step( test_run, "container_status_final", @@ -1289,9 +1486,9 @@ def run_full_test(data_path: str, container_name: str = "", image: str = "", out ai_agent = f"fileprobe_{time.strftime('%m%d%H%M%S')}" create_and_start_container(test_run, resolved_container, str(data_path_obj), resolved_image) - quick_phase(test_run, "phase1", resolved_container, agent_one) + quick_phase(test_run, "phase1", resolved_container, agent_one, data_path_obj) invoke_test_step(test_run, "docker_restart_before_phase2", ["docker", "restart", resolved_container], timeout_seconds=120) - quick_phase(test_run, "phase2", resolved_container, agent_two) + quick_phase(test_run, "phase2", resolved_container, agent_two, data_path_obj) bash_exec_roundtrip_step( test_run, "bash_exec_channel_roundtrip", -- Gitee From 0ce70a566cf31cafa95ccec46db223a96a58ce09 Mon Sep 17 00:00:00 2001 From: NingWei Date: Sat, 9 May 2026 13:56:20 +0800 Subject: [PATCH 5/6] =?UTF-8?q?=E7=BB=A7=E7=BB=AD=E4=BF=AEbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 ++- platform_home/scripts/restart.sh | 6 ---- test/docker_test_common.py | 58 ++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index ee37b8e..89f6049 100644 --- a/README.md +++ b/README.md @@ -309,7 +309,7 @@ Agent 执行器的镜像保存在阿里云,您需要将其拉取到 `OC端`。 docker pull crpi-4auaoyyj6r36p6lb.cn-hangzhou.personal.cr.aliyuncs.com/huozige_lab/enterprise-agent-platform-oc-x64:{版本号} ``` -版本号请参考本方案的 release ,如 `20260429.02`。如需在 arm64 架构的服务器上部署,请发邮件联系:`will.ning@grapecity.com` +版本号请参考本方案的 release ,如 `20260509.01`。如需在 arm64 架构的服务器上部署,请发邮件联系:`will.ning@grapecity.com` 拉取完成后,您需要执行命令并启动名为 `enterprise-agent-platform-oc` 的容器。 @@ -443,7 +443,7 @@ Agent 执行器基于 `OpenClaw`,内置了以下常用组件(CLI 程序 / AP ## 5、配置活字格 Agent 门户应用 -使用活字格设计器打开 `https://gitee.com/low-code-dev-lab/open-claw-enterprise-terminal` 示例工程,将其导入到您的工程,适配您的数据库、用户管理、界面风格后再发布到 `服务器端`,成为 AI 工作台应用(如命名为 `claw`)。 +使用活字格设计器打开 `https://gitee.com/low-code-dev-lab/hzg-ai-workstation` 示例工程,将其导入到您的工程,适配您的数据库、用户管理、界面风格后再发布到 `服务器端`,成为 AI 工作台应用(如命名为 `claw`)。 您也可以直接将该示例工程修改认证方式为普通认证后发布到 `服务器端`,做学习和验证使用,默认应用管理员角色为:`OpenClaw管理员`。 @@ -465,7 +465,6 @@ Agent 执行器基于 `OpenClaw`,内置了以下常用组件(CLI 程序 / AP - S3_REGION :步骤3中记录的 Region - S3_AK : 步骤3中记录的 AK - S3_SK : 步骤3中记录的 SK -- S3_BUCKET_INBOX :固定为 `agents-in` ### 5.2 Agent 门户应用 diff --git a/platform_home/scripts/restart.sh b/platform_home/scripts/restart.sh index 694e8bb..ddf9ff6 100644 --- a/platform_home/scripts/restart.sh +++ b/platform_home/scripts/restart.sh @@ -9,12 +9,6 @@ if [[ $# -gt 0 ]]; then exit 1 fi -if [[ -f "/.dockerenv" && "$(id -u)" -eq 0 ]]; then - nohup /bin/sh -c 'sleep 1; kill -TERM 1' >/dev/null 2>&1 & - echo "Gateway restart scheduled." - exit 0 -fi - reexec_as_platform_user_if_needed "$@" export_openclaw_runtime_env diff --git a/test/docker_test_common.py b/test/docker_test_common.py index 498bd68..23c4777 100644 --- a/test/docker_test_common.py +++ b/test/docker_test_common.py @@ -367,6 +367,46 @@ def wait_for_gateway_ready(test_run: TestRun, name: str, container: str, timeout return timeout_result +def assert_gateway_child_pid_changed( + test_run: TestRun, + name: str, + before_result: CommandResult, + after_result: CommandResult, +) -> CommandResult: + before_pid = (before_result.stdout or "").strip() + after_pid = (after_result.stdout or "").strip() + diagnostics = { + "beforePid": before_pid, + "afterPid": after_pid, + "beforeExitCode": before_result.exit_code, + "afterExitCode": after_result.exit_code, + } + status = "PASS" + notes = "restart returned after the supervisor child pid changed." + exit_code = 0 + + if before_result.exit_code != 0 or after_result.exit_code != 0: + status = "FAIL" + notes = "failed to read the gateway supervisor child pid before or after restart." + exit_code = 1 + elif not before_pid or not after_pid: + status = "FAIL" + notes = "gateway supervisor child pid was empty before or after restart." + exit_code = 1 + elif before_pid == after_pid: + status = "FAIL" + notes = "restart returned before the gateway supervisor child pid changed." + exit_code = 1 + + result = synthetic_result( + command_line=f"assert gateway supervisor child pid changed for {name}", + stdout=json.dumps(diagnostics, ensure_ascii=False, sort_keys=True), + exit_code=exit_code, + ) + test_run.add_result(name, status, result, notes=notes) + return result + + def wait_for_managed_mqtt_ready( test_run: TestRun, name: str, @@ -1396,7 +1436,25 @@ def quick_phase( docker_exec_args(container, "python3", "-c", exec_policy_corruption_script()), timeout_seconds=60, ) + gateway_child_pid_before_restart = invoke_test_step( + test_run, + f"{phase_name}_gateway_child_pid_before_internal_restart", + docker_exec_args(container, "/bin/sh", "-lc", "cat /var/platform_data/.openclaw/gateway-supervisor-child.pid"), + timeout_seconds=30, + ) invoke_test_step(test_run, f"{phase_name}_internal_restart", docker_exec_args(container, "restart"), timeout_seconds=120) + gateway_child_pid_after_restart = invoke_test_step( + test_run, + f"{phase_name}_gateway_child_pid_after_internal_restart", + docker_exec_args(container, "/bin/sh", "-lc", "cat /var/platform_data/.openclaw/gateway-supervisor-child.pid"), + timeout_seconds=30, + ) + assert_gateway_child_pid_changed( + test_run, + f"{phase_name}_internal_restart_waits_for_new_gateway_child", + gateway_child_pid_before_restart, + gateway_child_pid_after_restart, + ) wait_for_gateway_ready(test_run, f"{phase_name}_gateway_ready_after_internal_restart", container, timeout_seconds=gateway_timeout_seconds) invoke_test_step( test_run, -- Gitee From 8fea4fb4c3e53fd29403cd032da58f854aadd402 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=81=E4=BC=9F?= <55093136@qq.com> Date: Sat, 9 May 2026 22:29:35 +0800 Subject: [PATCH 6/6] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E6=96=87=E5=AD=97?= =?UTF-8?q?=E6=8F=8F=E8=BF=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 89f6049..8f9fb68 100644 --- a/README.md +++ b/README.md @@ -457,8 +457,8 @@ Agent 执行器基于 `OpenClaw`,内置了以下常用组件(CLI 程序 / AP - MQTT_BROKER_USER : 步骤2中记录的用户名 - MQTT_BROKER_PASSWORD : 步骤2中记录的密码 - OPENCLAW_CLIENT_NAME : 你喜欢的名字,会出现在日志中,如使用你的公司名 -- MQTT_RES_CHANNEL_NAME : agents/cli/res 和步骤4中 `HZG_CLI_REQUEST_TOPIC` 一致 -- MQTT_REQ_CHANNEL_NAME : agents/cli/req 和步骤4中 `HZG_CLI_RESPONSE_TOPIC` 一致 +- MQTT_RES_CHANNEL_NAME : agents/cli/res 和步骤4中 `HZG_CLI_RESPONSE_TOPIC` 一致 +- MQTT_REQ_CHANNEL_NAME : agents/cli/req 和步骤4中 `HZG_CLI_REQUEST_TOPIC` 一致 - MQTT_ADMIN_RES_CHANNEL_NAME : agents/admin/res 和步骤4中 `BASH_EXEC_CHANNEL_RESPONSE_TOPIC` 一致 - MQTT_ADMIN_REQ_CHANNEL_NAME : agents/admin/req 和步骤4中 `BASH_EXEC_CHANNEL_REQUEST_TOPIC` 一致 - S3_ENDPOINT : 步骤3中记录的 S3 Endpoint -- Gitee