diff --git a/docs/AGV_机械臂_技术说明文档.md b/docs/AGV_机械臂_技术说明文档.md index 5086aa6..440e43f 100644 --- a/docs/AGV_机械臂_技术说明文档.md +++ b/docs/AGV_机械臂_技术说明文档.md @@ -766,7 +766,7 @@ sudo systemctl start arm_server # === AGV 端 === # 2. 完整启动 ROS2 导航栈 + Flask cd ~/work/smart-inspection -./scripts/start_all.sh +./scripts/prod-backend.sh ``` ### 9.3 部署命令 @@ -779,8 +779,8 @@ uv sync --locked # 部署后验证远程文件 ssh elephant@192.168.60.80 "grep 'def _lookup_model' /home/elephant/work/smart-inspection/agv_app/utils/mission_executor.py" -# 重启 Flask -ssh elephant@192.168.60.80 'bash -s' < scripts/restart_flask.sh +# 重启生产后端 +ssh elephant@192.168.60.80 'cd ~/work/smart-inspection && ./scripts/prod-backend.sh' # 清空 Python 缓存(关键!修改后必须清) ssh elephant@192.168.60.80 "find /home/elephant/work/smart-inspection/agv_app -name '*.pyc' -delete; find /home/elephant/work/smart-inspection/agv_app -name '__pycache__' -type d -exec rm -rf {} +" @@ -864,7 +864,7 @@ agv_app/ ``` 启动脚本位于仓库顶层 `scripts/`。LiDAR 时间戳修复脚本部署在 AGV 的 -`/home/elephant/work/scan_fixer/`,由 `scripts/start_all.sh` 调用。 +`/home/elephant/work/smart-inspection/scan_fixer/`,由 `scripts/prod-backend.sh` 调用。 ## 附录 B:关键依赖 diff --git a/scripts/README.md b/scripts/README.md index 11801b9..eb06ed3 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -4,11 +4,11 @@ ``` scripts/ -├── start_all.sh ← 生产环境完整启动(ROS2 + Nav2 + Flask) +├── prod-backend.sh ← 生产环境完整启动(ROS2 + Nav2 + Flask) ├── stop_all.sh ← 生产环境完整停止 -├── start_flask.sh ← 仅重启 Flask(修改代码后快速部署) -├── restart_flask.sh ← 语法检查 + 清缓存 + 重启 Flask + 验证 -└── dev_start.sh ← 本地开发用(前台运行,不启动 ROS2) +├── dev-backend.sh ← 本地后端开发启动(Mock 硬件模式) +├── dev-frontend.sh ← 本地前端开发启动 +└── stop.sh ← 停止本地开发服务 ``` `scan_fixer/` 是生产启动链路的一部分:`clock_publisher.py` 发布 `/clock`, @@ -42,25 +42,25 @@ sudo apt install -y ffmpeg libzbar0 # 在 AGV 上执行 cd ~/work/smart-inspection ./scripts/stop_all.sh # 先彻底清理 -./scripts/start_all.sh # 完整启动 +./scripts/prod-backend.sh # 完整启动 ``` -### 2. 修改代码后快速部署 +### 2. 修改代码后重新启动生产后端 ```bash # 部署文件到 AGV 后 -ssh elephant@192.168.60.80 'bash -s' < scripts/restart_flask.sh +ssh elephant@192.168.60.80 'cd ~/work/smart-inspection && ./scripts/prod-backend.sh' ``` ### 3. 本地开发调试(不连硬件) ```bash -# 在本机执行,仅启动 Flask -./scripts/dev_start.sh +# 在本机执行,仅启动 Mock 后端 +./scripts/dev-backend.sh # 访问 http://127.0.0.1:5000 ``` -### 4. 远程轻量重启(ROS2 已运行) +### 4. 本地前端开发 ```bash -ssh elephant@192.168.60.80 'bash -s' < scripts/start_flask.sh +./scripts/dev-frontend.sh ``` ## 环境变量 @@ -72,8 +72,13 @@ ssh elephant@192.168.60.80 'bash -s' < scripts/start_flask.sh | `AGV_PROJECT_DIR` | `/home/elephant/work/smart-inspection` | 仓库根目录 | | `AGV_APP_DIR` | `$AGV_PROJECT_DIR/agv_app` | Flask 应用目录 | | `AGV_ROS2_DIR` | `/home/elephant/agv_pro_ros2` | ROS2 工作空间 | +| `ROS_SETUP` | `/opt/ros/humble/setup.bash` | ROS2 环境脚本 | +| `ROS_WORKSPACE_SETUP` | `$AGV_ROS2_DIR/install/setup.bash` | ROS2 工作空间环境脚本 | | `SCAN_FIXER_DIR` | `$AGV_PROJECT_DIR/scan_fixer` | 时间戳修正工具目录 | -| `FIXER_SCRIPT` | `fix_scan_timestamp_v6.py` | fixer 脚本名 | +| `LOG_DIR` | `/tmp` | 日志目录 | +| `FASTRTPS_SHM_DIR` | `/dev/shm` | FastRTPS 共享内存目录 | +| `AGV_CONTROLLER_DEVICE` | `/dev/agvpro_controller` | AGV 控制器设备 | +| `ROS_DOMAIN_ID` | `1` | ROS2 通信域 ID | ## 日志位置(AGV 上) diff --git a/scripts/prod-backend.sh b/scripts/prod-backend.sh index d98196c..3bb18d9 100755 --- a/scripts/prod-backend.sh +++ b/scripts/prod-backend.sh @@ -1,351 +1,233 @@ #!/bin/bash # ============================================================ -# Robot AGV 全量启动脚本 v4.0 +# Robot AGV 全量启动脚本 v5.0 # 修复: +# - v5.0: 使用公共库重构,减少代码重复 # - v4.0: 彻底杀死 ros2 daemon 进程 + 启动前进程数量检查 # - v3.0: 彻底清理 FastRTPS 共享内存文件(永久修复 DDS 通信问题) # - v2.7: 添加 ROS_DOMAIN_ID 环境变量传递 # - v2.6: 清理 scan_fixer lock 文件防残留 # ============================================================ -set -e +set -euo pipefail -# ---- 可配置项(环境变量覆盖默认值) ---- +# 加载公共库 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_DIR="$(dirname "$SCRIPT_DIR")" -AGV_PROJECT_DIR="${AGV_PROJECT_DIR:-$PROJECT_DIR}" -AGV_APP_DIR="${AGV_APP_DIR:-$AGV_PROJECT_DIR/agv_app}" -AGV_ROS2_DIR="${AGV_ROS2_DIR:-$HOME/agv_pro_ros2}" -ROS_DISTRO="${ROS_DISTRO:-humble}" -ROS_SETUP="${ROS_SETUP:-/opt/ros/$ROS_DISTRO/setup.bash}" -ROS_WORKSPACE_SETUP="${ROS_WORKSPACE_SETUP:-$AGV_ROS2_DIR/install/setup.bash}" -SCAN_FIXER_DIR="${SCAN_FIXER_DIR:-$AGV_PROJECT_DIR/scan_fixer}" -FIXER_SCRIPT="${FIXER_SCRIPT:-fix_scan_timestamp_v6.py}" -LOG_DIR="${LOG_DIR:-/tmp}" -LOCK_DIR="${LOCK_DIR:-/tmp}" -FASTRTPS_SHM_DIR="${FASTRTPS_SHM_DIR:-/dev/shm}" -AGV_CONTROLLER_DEVICE="${AGV_CONTROLLER_DEVICE:-/dev/agvpro_controller}" -ROS_DOMAIN_ID_VAL=1 +source "$SCRIPT_DIR/ros-common.sh" -BRINGUP_LOG="$LOG_DIR/ros2_bringup.log" -NAV2_LOG="$LOG_DIR/ros2_nav2.log" -CLOCK_LOG="$LOG_DIR/clock_publisher.log" -SCAN_FIXER_LOG="$LOG_DIR/scan_fixer.log" -FLASK_LOG="$LOG_DIR/agv_flask.log" +# ============================================================================ +# 主流程 +# ============================================================================ -mkdir -p "$LOG_DIR" +section "Robot AGV 全量启动 v5.0" -echo "==========================================" -echo " Robot AGV 全量启动 v4.0" -echo "==========================================" -echo "" +# ============================================================================ +# 1. 清理旧环境 +# ============================================================================ +step "1/8" "清理旧进程和共享内存" -# ---------- 1. 清理旧进程 + FastRTPS 共享内存 ---------- -echo "[1/8] 清理旧进程和共享内存..." +kill_all_soft +kill_all_hard +stop_ros2_daemon +cleanup_fastrtps -# 杀掉所有相关进程(先软杀,再硬杀确保干净) -pkill -f "ros2 launch agv_pro_bringup" 2>/dev/null || true -pkill -f "ros2 launch agv_pro_navigation2" 2>/dev/null || true -pkill -f "agv_pro_node" 2>/dev/null || true -pkill -f "lslidar_driver_node" 2>/dev/null || true -pkill -f "component_container" 2>/dev/null || true -pkill -f "robot_state_publisher" 2>/dev/null || true -pkill -f "fix_scan_timestamp" 2>/dev/null || true -pkill -f "clock_publisher" 2>/dev/null || true -pkill -f "python.*app.py" 2>/dev/null || true -pkill -f "uv run .*python app.py" 2>/dev/null || true -sleep 2 - -# 【关键】硬杀确保干净 -echo " 硬杀残留进程..." -pkill -9 -f "agv_pro_node" 2>/dev/null || true -pkill -9 -f "lslidar_driver_node" 2>/dev/null || true -pkill -9 -f "component_container" 2>/dev/null || true -pkill -9 -f "clock_publisher" 2>/dev/null || true -pkill -9 -f "fix_scan_timestamp" 2>/dev/null || true -pkill -9 -f "app.py" 2>/dev/null || true -sleep 1 - -# 【关键】杀死 ros2 daemon 进程本身(不是只 stop,而是杀进程) -echo " 重置 ros2 daemon..." -pkill -f "ros2-daemon" 2>/dev/null || true -pkill -9 -f "ros2-daemon" 2>/dev/null || true -sleep 2 - -# 【关键】清理 FastRTPS 共享内存文件(杀进程后立即清理) -echo " 清理 FastRTPS 共享内存文件..." -FASTRTPS_COUNT=$(ls "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null | wc -l || echo 0) -if [ "$FASTRTPS_COUNT" -gt 0 ]; then - rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_* - echo " 已清理 $FASTRTPS_COUNT 个 FastRTPS 文件" -else - echo " 无 FastRTPS 文件残留" -fi - -# 清理 scan_fixer 锁文件 -rm -f "$LOCK_DIR/scan_fixer.lock" - -# 【关键】验证进程已全部停止 +# 验证进程已停止 echo " 验证进程停止..." -PROC_COUNT=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|app.py' | grep -v grep | wc -l || echo 0) +PROC_COUNT=$(count_residual_processes) echo " 残留进程数: $PROC_COUNT" if [ "$PROC_COUNT" -gt 0 ]; then - echo " ⚠️ 仍有进程残留,强制终止..." - pkill -9 -f "agv_pro_node" 2>/dev/null || true - pkill -9 -f "lslidar_driver_node" 2>/dev/null || true - pkill -9 -f "component_container" 2>/dev/null || true - pkill -9 -f "fix_scan_timestamp" 2>/dev/null || true - pkill -9 -f "app.py" 2>/dev/null || true - sleep 2 - PROC_COUNT2=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|app.py' | grep -v grep | wc -l || echo 0) - echo " 清理后残留: $PROC_COUNT2" + echo " [WARN] 仍有进程残留,再次强制终止..." + kill_all_hard + PROC_COUNT=$(count_residual_processes) + echo " 清理后残留: $PROC_COUNT" fi -echo " ✅ 清理完成" +info "ok" "清理完成" -# ---------- 2. 启动 ros2 daemon ---------- -echo "[2/8] 启动 ros2 daemon..." +# ============================================================================ +# 2. 启动 ros2 daemon +# ============================================================================ +step "2/8" "启动 ros2 daemon" source "$ROS_SETUP" 2>/dev/null || true - -# 再次确保没有残留共享内存(启动 daemon 前) -rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null || true - -# 使用 bash -c 确保环境变量正确传递 -nohup bash -c "source \"$ROS_SETUP\" && export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL && ros2 daemon start" >/dev/null 2>&1 & -sleep 4 - -# 验证 daemon 是否就绪(用简单的 topic list 测试) -DAEMON_OK=0 -for i in $(seq 1 5); do - DAEMON_TOPICS=$(bash -c "source \"$ROS_SETUP\" && ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 3 ros2 topic list 2>&1" | wc -l || echo 0) - if [ "$DAEMON_TOPICS" -gt 0 ]; then - DAEMON_OK=1 - echo " ✅ ros2 daemon 就绪" - break - fi - sleep 2 -done -if [ "$DAEMON_OK" -eq 0 ]; then - echo " ⚠️ ros2 daemon 可能有问题,继续尝试启动组件..." -fi - -# ---------- 3. 启动 bringup (含激光雷达) ---------- -echo "[3/8] 启动 AGV Bringup..." -source "$ROS_SETUP" 2>/dev/null || true - -# 【关键】启动前最后确认没有残留共享内存 rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null || true +start_ros2_daemon || true +# ============================================================================ +# 3. 启动 bringup (含激光雷达) +# ============================================================================ +step "3/8" "启动 AGV Bringup" cd "$AGV_ROS2_DIR" source "$ROS_WORKSPACE_SETUP" -nohup bash -c "export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL && ros2 launch agv_pro_bringup agv_pro_bringup.launch.py port_name:=$AGV_CONTROLLER_DEVICE" > "$BRINGUP_LOG" 2>&1 & +rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null || true + +nohup bash -c "export ROS_DOMAIN_ID=$ROS_DOMAIN_ID && \ + ros2 launch agv_pro_bringup agv_pro_bringup.launch.py port_name:=$AGV_CONTROLLER_DEVICE" \ + > "$BRINGUP_LOG" 2>&1 & BRINGUP_PID=$! echo " bringup PID: $BRINGUP_PID" -echo " 等待 bringup 就绪..." -BRINGUP_OK=0 -for i in $(seq 1 20); do - if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/odom'; then - echo " ✅ bringup 已就绪 (${i}x2秒)" - BRINGUP_OK=1 - break - fi - sleep 2 -done -if [ "$BRINGUP_OK" -eq 0 ]; then - echo " ⚠️ bringup 未检测到 /odom,继续启动后续组件..." - tail -5 "$BRINGUP_LOG" 2>/dev/null || true -fi +# 等待 /odom 话题 +wait_for_topic /odom 40 || show_log_tail "$BRINGUP_LOG" -# ---------- 3.5 启动系统时钟发布器 ---------- -echo "[3.5/8] 启动系统时钟发布器 (clock_publisher)..." +# ============================================================================ +# 4. 启动系统时钟发布器 +# ============================================================================ +step "4/8" "启动系统时钟发布器 (clock_publisher)" nohup bash -c "source \"$ROS_SETUP\" && \ - ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 \"$SCAN_FIXER_DIR/clock_publisher.py\"" \ + ROS_DOMAIN_ID=$ROS_DOMAIN_ID python3 \"$SCAN_FIXER_DIR/clock_publisher.py\"" \ > "$CLOCK_LOG" 2>&1 & CLOCK_PID=$! echo " clock_publisher PID: $CLOCK_PID" sleep 2 -# 验证 /clock 话题 -if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/clock'; then - echo " ✅ /clock 已上线" -else - echo " ⚠️ /clock 未上线,检查日志:" - tail -5 "$CLOCK_LOG" 2>/dev/null || true -fi +wait_for_topic /clock 10 || show_log_tail "$CLOCK_LOG" -# ---------- 4. 启动激光时间戳修正节点 ---------- -echo "[4/8] 启动激光时间戳修正节点..." +# ============================================================================ +# 5. 启动激光时间戳修正节点 +# ============================================================================ +step "5/8" "启动激光时间戳修正节点" -# 确保 /scan 存在 -SCAN_OK=0 -for i in $(seq 1 10); do - if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/scan'; then - echo " /scan 话题已上线" - SCAN_OK=1 - break - fi - sleep 2 -done -if [ "$SCAN_OK" -eq 0 ]; then - echo " ⚠️ /scan 未上线,检查 bringup 日志" +# 先等待 /scan 话题 +if ! wait_for_topic /scan 20; then + echo " [WARN] /scan 未上线,检查 bringup 日志" fi nohup bash -c "source \"$ROS_SETUP\" && \ - ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 \"$SCAN_FIXER_DIR/$FIXER_SCRIPT\"" \ + ROS_DOMAIN_ID=$ROS_DOMAIN_ID python3 \"$SCAN_FIXER_DIR/fix_scan_timestamp_v6.py\"" \ > "$SCAN_FIXER_LOG" 2>&1 & FIXER_PID=$! echo " fix_scan_timestamp PID: $FIXER_PID" sleep 5 -# 验证 fixer 进程和 scan_corrected -FIXER_COUNT=$(ps aux | grep -c "[f]ix_scan_timestamp" 2>/dev/null || echo 0) +# 检查是否有多个 fixer 进程 +FIXER_COUNT=$(count_matching_processes "fix_scan_timestamp") if [ "$FIXER_COUNT" -gt 1 ]; then - echo " ⚠️ 发现 $FIXER_COUNT 个 fixer 进程,杀掉多余的..." + echo " [WARN] 发现 $FIXER_COUNT 个 fixer 进程,重启..." pkill -f "fix_scan_timestamp" 2>/dev/null || true pkill -f "clock_publisher" 2>/dev/null || true sleep 2 - rm -f "$LOCK_DIR/scan_fixer.lock" + rm -f /tmp/scan_fixer.lock nohup bash -c "source \"$ROS_SETUP\" && \ - ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 \"$SCAN_FIXER_DIR/$FIXER_SCRIPT\"" \ + ROS_DOMAIN_ID=$ROS_DOMAIN_ID python3 \"$SCAN_FIXER_DIR/fix_scan_timestamp_v6.py\"" \ > "$SCAN_FIXER_LOG" 2>&1 & FIXER_PID=$! sleep 3 fi -if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/scan_corrected'; then - echo " ✅ /scan_corrected 已上线" -else - echo " ⚠️ /scan_corrected 未上线,检查日志:" - tail -5 "$SCAN_FIXER_LOG" 2>/dev/null || true -fi +wait_for_topic /scan_corrected 15 || show_log_tail "$SCAN_FIXER_LOG" -# ---------- 5. 启动 Nav2 ---------- -echo "[5/8] 启动 Nav2 导航..." -source "$ROS_SETUP" 2>/dev/null || true -cd "$AGV_ROS2_DIR" -source "$ROS_WORKSPACE_SETUP" +# ============================================================================ +# 6. 启动 Nav2 +# ============================================================================ +step "6/8" "启动 Nav2 导航" -nohup bash -c "source \"$ROS_SETUP\" && \ - source \"$ROS_WORKSPACE_SETUP\" && \ - export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL && \ - ros2 launch agv_pro_navigation2 navigation2_active.launch.py \ - autostart:=True" > "$NAV2_LOG" 2>&1 & +nohup bash -c "source \"$ROS_SETUP\" && source \"$ROS_WORKSPACE_SETUP\" && \ + export ROS_DOMAIN_ID=$ROS_DOMAIN_ID && \ + ros2 launch agv_pro_navigation2 navigation2_active.launch.py autostart:=True" \ + > "$NAV2_LOG" 2>&1 & NAV2_PID=$! echo " Nav2 PID: $NAV2_PID" sleep 12 echo " 等待 Nav2 节点就绪..." -NAV2_OK=0 -for i in $(seq 1 15); do - NODES=$(ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 node list 2>/dev/null | \ - grep -cE 'lifecycle_manager_navigation|bt_navigator|controller_server' 2>/dev/null || echo 0) - NODES=$(echo "$NODES" | tr -d '\n' | awk '{print $1}') - if [ "$NODES" -ge 3 ] 2>/dev/null; then - echo " ✅ Nav2 节点已就绪 ($NODES 个)" - NAV2_OK=1 - break - fi - sleep 3 -done -if [ "$NAV2_OK" -eq 0 ]; then - echo " ⚠️ Nav2 节点未完全就绪,继续..." -fi +wait_for_nodes 'lifecycle_manager_navigation|bt_navigator|controller_server' 3 45 || true -# ---------- 6. 设置精度参数 ---------- -echo "[6/8] 设置导航精度参数 (xy_goal_tolerance=0.05m)..." -source "$ROS_SETUP" 2>/dev/null || true -cd "$AGV_ROS2_DIR" -source "$ROS_WORKSPACE_SETUP" +# ============================================================================ +# 7. 设置精度参数 +# ============================================================================ +step "7/8" "设置导航精度参数 (xy_goal_tolerance=0.05m)" for NODE in /controller_server /bt_navigator /planner_server; do - ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set $NODE general_goal_checker.xy_goal_tolerance 0.05 2>/dev/null || true - ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set $NODE general_goal_checker.yaw_goal_tolerance 0.05 2>/dev/null || true + ros2_exec timeout 1 ros2 param set $NODE general_goal_checker.xy_goal_tolerance 0.05 2>/dev/null || true + ros2_exec timeout 1 ros2 param set $NODE general_goal_checker.yaw_goal_tolerance 0.05 2>/dev/null || true done -ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set /controller_server FollowPath.xy_goal_tolerance 0.05 2>/dev/null || true -ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set /controller_server general_goal_checker.stateful True 2>/dev/null || true -ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set /controller_server FollowPath.stateful True 2>/dev/null || true -echo " ✅ 精度参数已设置" +ros2_exec timeout 1 ros2 param set /controller_server FollowPath.xy_goal_tolerance 0.05 2>/dev/null || true +ros2_exec timeout 1 ros2 param set /controller_server general_goal_checker.stateful True 2>/dev/null || true +ros2_exec timeout 1 ros2 param set /controller_server FollowPath.stateful True 2>/dev/null || true + +info "ok" "精度参数已设置" + +# ============================================================================ +# 8. 启动 Flask API +# ============================================================================ +step "8/8" "启动 Flask API" -# ---------- 7. 启动 Flask ---------- -echo "[7/8] 启动 Flask API..." -export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL cd "$AGV_APP_DIR" nohup uv run --locked python app.py > "$FLASK_LOG" 2>&1 & FLASK_PID=$! echo " Flask PID: $FLASK_PID" sleep 4 -# ---------- 8. 最终全面验证 ---------- -echo "" -echo "==========================================" -echo " 系统全面验证" -echo "==========================================" +# ============================================================================ +# 9. 最终验证 +# ============================================================================ +section "系统全面验证" -# 8a. 验证 ros2 topic list(核心指标) +# 验证话题数量 echo "" echo "验证 ros2 topic list..." -TOPIC_COUNT=$(bash -c "source \"$ROS_SETUP\" && ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 5 ros2 topic list 2>/dev/null" | wc -l || echo 0) +TOPIC_COUNT=$(ros2_topic_count 5) echo " 话题数量: $TOPIC_COUNT" if [ "$TOPIC_COUNT" -gt 10 ]; then - echo " ✅ ros2 daemon 正常 (${TOPIC_COUNT} 个话题)" + info "ok" "ros2 daemon 正常 (${TOPIC_COUNT} 个话题)" else - echo " ❌ ros2 topic list 异常 (${TOPIC_COUNT} 个话题,可能 DDS 有问题)" + info "err" "ros2 topic list 异常 (${TOPIC_COUNT} 个话题,可能 DDS 有问题)" echo " 手动执行: rm -rf \"$FASTRTPS_SHM_DIR\"/fastrtps_* && ros2 daemon stop && ros2 daemon start" fi -# 8b. 验证关键话题 +# 验证关键话题 echo "" echo "验证关键话题..." -for TOPIC in /odom /scan /cmd_vel /tf; do - if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q "$TOPIC"; then - echo " ✅ $TOPIC" +for TOPIC in /odom /scan /cmd_vel /tf /clock /scan_corrected; do + if topic_exists "$TOPIC"; then + info "ok" "$TOPIC" else - echo " ⚠️ $TOPIC 未找到" + info "warn" "$TOPIC 未找到" fi done -# 8c. 验证进程数量(确保没有重复启动) +# 验证进程数量 echo "" echo "验证进程数量..." -BRINGUP_PROCS=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node' | grep -v grep | wc -l || echo 0) +BRINGUP_PROCS=$(count_matching_processes 'agv_pro_node|lslidar_driver_node') echo " AGV 核心进程: $BRINGUP_PROCS (应为 2)" if [ "$BRINGUP_PROCS" -eq 2 ]; then - echo " ✅ 进程数量正常(无重复)" + info "ok" "进程数量正常(无重复)" elif [ "$BRINGUP_PROCS" -gt 2 ]; then - echo " ⚠️ 发现 $BRINGUP_PROCS 个核心进程(可能有残留),建议重启" + info "warn" "发现 $BRINGUP_PROCS 个核心进程(可能有残留),建议重启" else - echo " ⚠️ 进程数量异常" + info "warn" "进程数量异常" fi -# 8d. FastRTPS 共享内存状态 +# FastRTPS 共享内存状态 echo "" echo "FastRTPS 共享内存状态:" -FASTRTPS_NEW=$(ls "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null | wc -l || echo 0) +FASTRTPS_NEW=$(count_fastrtps_files) echo " 当前文件数: $FASTRTPS_NEW (正常运行时会有一些)" -# 8e. Flask API 测试 +# Flask API 状态 echo "" echo "验证 Flask API..." -FLASK_RUNNING=$(pgrep -f "app.py" | wc -l || echo 0) -if [ "$FLASK_RUNNING" -gt 0 ]; then - echo " ✅ Flask 进程运行中" +if pgrep -f "app.py" >/dev/null 2>&1; then + info "ok" "Flask 进程运行中" else - echo " ❌ Flask 未运行" + info "err" "Flask 未运行" fi -# ---------- 完成 ---------- -echo "" -echo "==========================================" -echo " ✅ 启动完成" -echo "==========================================" +# ============================================================================ +# 完成 +# ============================================================================ +section "[OK] 启动完成" echo "" echo " 进程状态:" -for PROC in "bringup:$BRINGUP_PID" "Nav2:$NAV2_PID" "fixer:$FIXER_PID" "Flask:$FLASK_PID"; do - NAME="${PROC%%:*}" - PID="${PROC##*:}" - STATUS=$(ps aux | grep -w "$PID" | grep -v grep | awk '{print "运行中"}' || echo '已退出') - echo " $NAME : $STATUS" +for proc_info in "bringup:$BRINGUP_PID" "Nav2:$NAV2_PID" "fixer:$FIXER_PID" "Flask:$FLASK_PID"; do + name="${proc_info%%:*}" + pid="${proc_info##*:}" + if ps aux | grep -w "$pid" | grep -v grep >/dev/null 2>&1; then + echo " $name : 运行中 (PID: $pid)" + else + echo " $name : 已退出" + fi done echo "" echo " 日志文件:" @@ -357,4 +239,5 @@ echo "" echo " 如果仍有问题,请依次执行:" echo " 1. ./scripts/stop_all.sh" echo " 2. rm -rf \"$FASTRTPS_SHM_DIR\"/fastrtps_*" -echo " 3. ./scripts/start_all.sh" +echo " 3. ./scripts/prod-backend.sh" +echo "" diff --git a/scripts/ros-common.sh b/scripts/ros-common.sh new file mode 100755 index 0000000..d5819b5 --- /dev/null +++ b/scripts/ros-common.sh @@ -0,0 +1,328 @@ +#!/bin/bash +# ROS AGV 公共库 +# 提供生产脚本共享的配置、清理与验证函数 + +set -euo pipefail + +# ============================================================================ +# 配置(可通过环境变量覆盖) +# ============================================================================ +readonly AGV_PROJECT_DIR="${AGV_PROJECT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}" +readonly AGV_APP_DIR="${AGV_APP_DIR:-$AGV_PROJECT_DIR/agv_app}" +readonly AGV_ROS2_DIR="${AGV_ROS2_DIR:-$HOME/agv_pro_ros2}" +readonly ROS_SETUP="${ROS_SETUP:-/opt/ros/humble/setup.bash}" +readonly ROS_WORKSPACE_SETUP="${ROS_WORKSPACE_SETUP:-$AGV_ROS2_DIR/install/setup.bash}" +readonly SCAN_FIXER_DIR="${SCAN_FIXER_DIR:-$AGV_PROJECT_DIR/scan_fixer}" +readonly LOG_DIR="${LOG_DIR:-/tmp}" +readonly FASTRTPS_SHM_DIR="${FASTRTPS_SHM_DIR:-/dev/shm}" +readonly AGV_CONTROLLER_DEVICE="${AGV_CONTROLLER_DEVICE:-/dev/agvpro_controller}" +export ROS_DOMAIN_ID="${ROS_DOMAIN_ID:-1}" + +# 日志文件 +readonly BRINGUP_LOG="$LOG_DIR/ros2_bringup.log" +readonly NAV2_LOG="$LOG_DIR/ros2_nav2.log" +readonly CLOCK_LOG="$LOG_DIR/clock_publisher.log" +readonly SCAN_FIXER_LOG="$LOG_DIR/scan_fixer.log" +readonly FLASK_LOG="$LOG_DIR/agv_flask.log" + +# ============================================================================ +# 进程管理 +# ============================================================================ + +# ROS 相关进程列表(用于清理) +readonly ROS_PROCESSES=( + "ros2 launch agv_pro_bringup" + "ros2 launch agv_pro_navigation2" + "agv_pro_node" + "lslidar_driver_node" + "component_container" + "robot_state_publisher" + "fix_scan_timestamp" + "clock_publisher" + "python.*app.py" + "uv run .*python app.py" +) + +# 软杀所有进程 +kill_all_soft() { + echo " 软杀进程中..." + for proc in "${ROS_PROCESSES[@]}"; do + pkill -f "$proc" 2>/dev/null || true + done + sleep 2 +} + +# 硬杀所有进程 +kill_all_hard() { + echo " 强制终止中..." + for proc in "${ROS_PROCESSES[@]}"; do + pkill -9 -f "$proc" 2>/dev/null || true + done + sleep 1 +} + +# 统计匹配进程数 +count_matching_processes() { + local pattern=$1 + local current_pid=$$ + local shell_pid=$BASHPID + local parent_pid=$PPID + local count=0 + local pid + local args + + while read -r pid args; do + if [ -z "${pid:-}" ]; then + continue + fi + if [ "$pid" = "$current_pid" ] || [ "$pid" = "$shell_pid" ] || [ "$pid" = "$parent_pid" ]; then + continue + fi + if [[ "${args:-}" =~ $pattern ]]; then + count=$((count + 1)) + fi + done < <(ps -eo pid=,args=) + + echo "$count" +} + +# 统计残留进程数 +count_residual_processes() { + count_matching_processes "agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|clock_publisher|app.py|ros2-daemon" +} + +# ============================================================================ +# FastRTPS 清理 +# ============================================================================ + +# 清理 FastRTPS 共享内存 +cleanup_fastrtps() { + local count + count=$(count_fastrtps_files) + + if [ "$count" -gt 0 ]; then + rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_* + echo " 已清理 $count 个 FastRTPS 文件" + else + echo " 无 FastRTPS 文件残留" + fi + + # 清理锁文件 + rm -f /tmp/scan_fixer.lock /tmp/clock_publisher.lock +} + +# 统计 FastRTPS 文件数 +count_fastrtps_files() ( + shopt -s nullglob + local files=("$FASTRTPS_SHM_DIR"/fastrtps_*) + echo "${#files[@]}" +) + +# 载入 ROS2 环境后执行命令 +ros2_exec() { + bash -c ' + source "$1" || exit 1 + if [ -f "$2" ]; then + source "$2" || exit 1 + fi + export ROS_DOMAIN_ID="$3" + shift 3 + "$@" + ' _ "$ROS_SETUP" "$ROS_WORKSPACE_SETUP" "$ROS_DOMAIN_ID" "$@" +} + +# ============================================================================ +# ROS2 环境操作 +# ============================================================================ + +ros2_topic_list() { + ros2_exec ros2 topic list 2>/dev/null || true +} + +ros2_topic_count() { + local topics + topics=$(ros2_exec timeout "${1:-5}" ros2 topic list 2>/dev/null || true) + if [ -z "$topics" ]; then + echo 0 + else + printf '%s\n' "$topics" | sed '/^$/d' | wc -l + fi +} + +topic_exists() { + ros2_topic_list | grep -Fxq "$1" +} + +# 启动 ROS2 daemon +start_ros2_daemon() { + echo " 启动 ros2 daemon..." + rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null || true + + nohup bash -c ' + source "$1" || exit 1 + export ROS_DOMAIN_ID="$2" + ros2 daemon start + ' _ "$ROS_SETUP" "$ROS_DOMAIN_ID" >/dev/null 2>&1 & + sleep 4 + + # 等待 daemon 就绪 + for _ in $(seq 1 5); do + if ros2_exec timeout 3 ros2 topic list &>/dev/null; then + echo " [OK] ros2 daemon 已就绪" + return 0 + fi + sleep 2 + done + echo " [WARN] ros2 daemon 可能有问题" + return 1 +} + +# 停止 ROS2 daemon +stop_ros2_daemon() { + echo " 重置 ros2 daemon..." + pkill -f "ros2-daemon" 2>/dev/null || true + pkill -9 -f "ros2-daemon" 2>/dev/null || true + sleep 2 + + source "$ROS_SETUP" 2>/dev/null || true + ros2 daemon stop 2>/dev/null || true + echo " [OK] ros2 daemon 已重置" +} + +# ============================================================================ +# 等待/验证函数 +# ============================================================================ + +# 等待话题出现 +# 用法: wait_for_topic <话题名> <最大等待秒数> +wait_for_topic() { + local topic=$1 + local max_wait=${2:-30} + local elapsed=0 + + while [ "$elapsed" -lt "$max_wait" ]; do + if topic_exists "$topic"; then + echo " [OK] $topic 已上线" + return 0 + fi + sleep 2 + elapsed=$((elapsed + 2)) + done + echo " [WARN] $topic 未在 $max_wait 秒内上线" + return 1 +} + +# 等待节点出现(匹配数量) +# 用法: wait_for_nodes <节点模式> <期望数量> <最大等待秒数> +wait_for_nodes() { + local pattern=$1 + local expected=$2 + local max_wait=${3:-30} + local elapsed=0 + local count=0 + + while [ "$elapsed" -lt "$max_wait" ]; do + local nodes + nodes=$(ros2_exec ros2 node list 2>/dev/null || true) + count=$(printf '%s\n' "$nodes" | grep -cE "$pattern" || true) + if [ "$count" -ge "$expected" ]; then + echo " [OK] 已检测到 $count 个节点" + return 0 + fi + sleep 2 + elapsed=$((elapsed + 2)) + done + echo " [WARN] 仅检测到 $count 个节点(期望 $expected 个)" + return 1 +} + +# ============================================================================ +# 日志/输出辅助 +# ============================================================================ + +# 打印分节标题 +section() { + echo "" + echo "==========================================" + echo " $1" + echo "==========================================" +} + +# 打印步骤 +step() { + echo "" + echo "[$1] $2" +} + +# 打印带状态的信息 +info() { + local status=$1 + local msg=$2 + if [ "$status" = "ok" ]; then + echo " [OK] $msg" + elif [ "$status" = "warn" ]; then + echo " [WARN] $msg" + elif [ "$status" = "err" ]; then + echo " [ERROR] $msg" + else + echo " $msg" + fi +} + +# 显示日志尾部 +show_log_tail() { + local log_file=$1 + local lines=${2:-5} + if [ -f "$log_file" ]; then + echo " --- 日志尾部 ($log_file) ---" + tail -"$lines" "$log_file" 2>/dev/null | sed 's/^/ /' || true + fi +} + +# ============================================================================ +# 完整清理流程 +# ============================================================================ + +# 执行完整清理(供 stop_all.sh 使用) +full_cleanup() { + section "Robot AGV 全量停止" + + step "1/5" "软杀所有相关进程" + kill_all_soft + + step "2/5" "强制终止残留进程" + kill_all_hard + + step "3/5" "重置 ros2 daemon" + stop_ros2_daemon + + step "4/5" "清理 FastRTPS 共享内存" + cleanup_fastrtps + + step "5/5" "验证清理结果" + local proc_count=$(count_residual_processes) + local fastrtps_left=$(count_fastrtps_files) + + echo " 残留进程数: $proc_count" + echo " FastRTPS 文件数: $fastrtps_left" + + if [ "$proc_count" -eq 0 ] && [ "$fastrtps_left" -eq 0 ]; then + section "[OK] 停止完成 - 系统已完全清理" + else + section "[WARN] 停止完成 - 部分残留可能需要手动清理" + echo "" + echo " 手动清理命令(如需要):" + echo " pkill -9 -f 'agv_pro_node|lslidar|component_container'" + echo " pkill -9 -f 'fix_scan_timestamp|app.py'" + echo " pkill -9 -f 'ros2-daemon'" + echo " rm -rf \"$FASTRTPS_SHM_DIR\"/fastrtps_*" + fi + echo "" + echo " 现在可以安全运行 ./scripts/prod-backend.sh" + echo "" +} + +# ============================================================================ +# 初始化(确保目录存在) +# ============================================================================ +mkdir -p "$LOG_DIR" diff --git a/scripts/stop_all.sh b/scripts/stop_all.sh index af31dda..cf4b5c4 100755 --- a/scripts/stop_all.sh +++ b/scripts/stop_all.sh @@ -1,97 +1,16 @@ #!/bin/bash # ============================================================ # stop_all.sh - 关闭 AGV 拍摄系统所有相关进程 -# 版本: v2.0 +# 版本: v3.0 # 修复: +# - v3.0: 使用公共库重构,减少代码重复 # - v2.0: 添加 FastRTPS 清理 + ros2 daemon 重置 # ============================================================ -set -e +set -euo pipefail -ROS_DISTRO="${ROS_DISTRO:-humble}" -ROS_SETUP="${ROS_SETUP:-/opt/ros/$ROS_DISTRO/setup.bash}" -LOCK_DIR="${LOCK_DIR:-/tmp}" -FASTRTPS_SHM_DIR="${FASTRTPS_SHM_DIR:-/dev/shm}" +# 加载公共库 +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/ros-common.sh" -echo "==========================================" -echo " Robot AGV 全量停止" -echo "==========================================" -echo "" - -# ---------- 1. 软杀所有相关进程 ---------- -echo "[1/5] 软杀所有相关进程..." -pkill -f "python.*app.py" 2>/dev/null || true -pkill -f "uv run .*python app.py" 2>/dev/null || true -pkill -f "agv_pro_bringup" 2>/dev/null || true -pkill -f "agv_pro_navigation2" 2>/dev/null || true -pkill -f "agv_pro_node" 2>/dev/null || true -pkill -f "lslidar_driver_node" 2>/dev/null || true -pkill -f "component_container" 2>/dev/null || true -pkill -f "fix_scan_timestamp" 2>/dev/null || true -pkill -f "clock_publisher" 2>/dev/null || true -pkill -f "robot_state_publisher" 2>/dev/null || true -pkill -f "start_all.sh" 2>/dev/null || true -sleep 2 - -# ---------- 2. 硬杀确保干净 ---------- -echo "[2/5] 硬杀残留进程..." -pkill -9 -f "app.py" 2>/dev/null || true -pkill -9 -f "agv_pro_node" 2>/dev/null || true -pkill -9 -f "lslidar_driver_node" 2>/dev/null || true -pkill -9 -f "component_container" 2>/dev/null || true -pkill -9 -f "fix_scan_timestamp" 2>/dev/null || true -pkill -9 -f "agv_pro_bringup" 2>/dev/null || true -pkill -9 -f "agv_pro_navigation2" 2>/dev/null || true -sleep 1 - -# ---------- 3. 【关键】清理 FastRTPS 共享内存 ---------- -echo "[3/5] 清理 FastRTPS 共享内存..." -FASTRTPS_COUNT=$(ls "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null | wc -l || echo 0) -if [ "$FASTRTPS_COUNT" -gt 0 ]; then - rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_* - echo " 已清理 $FASTRTPS_COUNT 个 FastRTPS 文件" -else - echo " 无 FastRTPS 文件残留" -fi - -# 清理 scan_fixer 锁文件 -rm -f "$LOCK_DIR/scan_fixer.lock" -rm -f "$LOCK_DIR/clock_publisher.lock" -echo " ✅ FastRTPS 清理完成" - -# ---------- 4. 【关键】重置 ros2 daemon ---------- -echo "[4/5] 重置 ros2 daemon..." -pkill -f "ros2-daemon" 2>/dev/null || true -pkill -9 -f "ros2-daemon" 2>/dev/null || true -sleep 2 -source "$ROS_SETUP" 2>/dev/null || true -ros2 daemon stop 2>/dev/null || true -echo " ✅ ros2 daemon 已重置" - -# ---------- 5. 验证清理结果 ---------- -echo "[5/5] 验证清理结果..." -PROC_COUNT=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|clock_publisher|app.py|ros2-daemon' | grep -v grep | wc -l || echo 0) -FASTRTPS_LEFT=$(ls "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null | wc -l || echo 0) - -echo " 残留进程数: $PROC_COUNT" -echo " FastRTPS 文件数: $FASTRTPS_LEFT" - -if [ "$PROC_COUNT" -eq 0 ] && [ "$FASTRTPS_LEFT" -eq 0 ]; then - echo "" - echo "==========================================" - echo " ✅ 停止完成 - 系统已完全清理" - echo "==========================================" -else - echo "" - echo "==========================================" - echo " ⚠️ 停止完成 - 部分残留可能需要手动清理" - echo "==========================================" - echo "" - echo " 手动清理命令(如需要):" - echo " pkill -9 -f 'agv_pro_node|lslidar|component_container'" - echo " pkill -9 -f 'fix_scan_timestamp|app.py'" - echo " pkill -9 -f 'ros2-daemon'" - echo " rm -rf \"$FASTRTPS_SHM_DIR\"/fastrtps_*" -fi -echo "" -echo " 现在可以安全运行 ./start_all.sh" -echo "" +# 执行完整清理 +full_cleanup