Refactor ROS startup scripts

This commit is contained in:
2026-06-22 13:06:31 +08:00
parent 62cccfbcc6
commit 55f646053d
5 changed files with 480 additions and 345 deletions
+4 -4
View File
@@ -766,7 +766,7 @@ sudo systemctl start arm_server
# === AGV 端 ===
# 2. 完整启动 ROS2 导航栈 + Flask
cd ~/work/smart-inspection
./scripts/start_all.sh
./scripts/prod-backend.sh
```
### 9.3 部署命令
@@ -779,8 +779,8 @@ uv sync --locked
# 部署后验证远程文件
ssh elephant@192.168.60.80 "grep 'def _lookup_model' /home/elephant/work/smart-inspection/agv_app/utils/mission_executor.py"
# 重启 Flask
ssh elephant@192.168.60.80 'bash -s' < scripts/restart_flask.sh
# 重启生产后端
ssh elephant@192.168.60.80 'cd ~/work/smart-inspection && ./scripts/prod-backend.sh'
# 清空 Python 缓存(关键!修改后必须清)
ssh elephant@192.168.60.80 "find /home/elephant/work/smart-inspection/agv_app -name '*.pyc' -delete; find /home/elephant/work/smart-inspection/agv_app -name '__pycache__' -type d -exec rm -rf {} +"
@@ -864,7 +864,7 @@ agv_app/
```
启动脚本位于仓库顶层 `scripts/`。LiDAR 时间戳修复脚本部署在 AGV 的
`/home/elephant/work/scan_fixer/`,由 `scripts/start_all.sh` 调用。
`/home/elephant/work/smart-inspection/scan_fixer/`,由 `scripts/prod-backend.sh` 调用。
## 附录 B:关键依赖
+17 -12
View File
@@ -4,11 +4,11 @@
```
scripts/
├── start_all.sh ← 生产环境完整启动(ROS2 + Nav2 + Flask
├── prod-backend.sh ← 生产环境完整启动(ROS2 + Nav2 + Flask
├── stop_all.sh ← 生产环境完整停止
├── start_flask.sh ← 仅重启 Flask(修改代码后快速部署
├── restart_flask.sh ← 语法检查 + 清缓存 + 重启 Flask + 验证
└── dev_start.sh ← 本地开发用(前台运行,不启动 ROS2)
├── dev-backend.sh ← 本地后端开发启动(Mock 硬件模式
├── dev-frontend.sh ← 本地前端开发启动
└── stop.sh ← 停止本地开发服务
```
`scan_fixer/` 是生产启动链路的一部分:`clock_publisher.py` 发布 `/clock`
@@ -42,25 +42,25 @@ sudo apt install -y ffmpeg libzbar0
# 在 AGV 上执行
cd ~/work/smart-inspection
./scripts/stop_all.sh # 先彻底清理
./scripts/start_all.sh # 完整启动
./scripts/prod-backend.sh # 完整启动
```
### 2. 修改代码后快速部署
### 2. 修改代码后重新启动生产后端
```bash
# 部署文件到 AGV 后
ssh elephant@192.168.60.80 'bash -s' < scripts/restart_flask.sh
ssh elephant@192.168.60.80 'cd ~/work/smart-inspection && ./scripts/prod-backend.sh'
```
### 3. 本地开发调试(不连硬件)
```bash
# 在本机执行,仅启动 Flask
./scripts/dev_start.sh
# 在本机执行,仅启动 Mock 后端
./scripts/dev-backend.sh
# 访问 http://127.0.0.1:5000
```
### 4. 远程轻量重启(ROS2 已运行)
### 4. 本地前端开发
```bash
ssh elephant@192.168.60.80 'bash -s' < scripts/start_flask.sh
./scripts/dev-frontend.sh
```
## 环境变量
@@ -72,8 +72,13 @@ ssh elephant@192.168.60.80 'bash -s' < scripts/start_flask.sh
| `AGV_PROJECT_DIR` | `/home/elephant/work/smart-inspection` | 仓库根目录 |
| `AGV_APP_DIR` | `$AGV_PROJECT_DIR/agv_app` | Flask 应用目录 |
| `AGV_ROS2_DIR` | `/home/elephant/agv_pro_ros2` | ROS2 工作空间 |
| `ROS_SETUP` | `/opt/ros/humble/setup.bash` | ROS2 环境脚本 |
| `ROS_WORKSPACE_SETUP` | `$AGV_ROS2_DIR/install/setup.bash` | ROS2 工作空间环境脚本 |
| `SCAN_FIXER_DIR` | `$AGV_PROJECT_DIR/scan_fixer` | 时间戳修正工具目录 |
| `FIXER_SCRIPT` | `fix_scan_timestamp_v6.py` | fixer 脚本名 |
| `LOG_DIR` | `/tmp` | 日志目录 |
| `FASTRTPS_SHM_DIR` | `/dev/shm` | FastRTPS 共享内存目录 |
| `AGV_CONTROLLER_DEVICE` | `/dev/agvpro_controller` | AGV 控制器设备 |
| `ROS_DOMAIN_ID` | `1` | ROS2 通信域 ID |
## 日志位置(AGV 上)
+123 -240
View File
@@ -1,351 +1,233 @@
#!/bin/bash
# ============================================================
# Robot AGV 全量启动脚本 v4.0
# Robot AGV 全量启动脚本 v5.0
# 修复:
# - v5.0: 使用公共库重构,减少代码重复
# - v4.0: 彻底杀死 ros2 daemon 进程 + 启动前进程数量检查
# - v3.0: 彻底清理 FastRTPS 共享内存文件(永久修复 DDS 通信问题)
# - v2.7: 添加 ROS_DOMAIN_ID 环境变量传递
# - v2.6: 清理 scan_fixer lock 文件防残留
# ============================================================
set -e
set -euo pipefail
# ---- 可配置项(环境变量覆盖默认值) ----
# 加载公共库
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
AGV_PROJECT_DIR="${AGV_PROJECT_DIR:-$PROJECT_DIR}"
AGV_APP_DIR="${AGV_APP_DIR:-$AGV_PROJECT_DIR/agv_app}"
AGV_ROS2_DIR="${AGV_ROS2_DIR:-$HOME/agv_pro_ros2}"
ROS_DISTRO="${ROS_DISTRO:-humble}"
ROS_SETUP="${ROS_SETUP:-/opt/ros/$ROS_DISTRO/setup.bash}"
ROS_WORKSPACE_SETUP="${ROS_WORKSPACE_SETUP:-$AGV_ROS2_DIR/install/setup.bash}"
SCAN_FIXER_DIR="${SCAN_FIXER_DIR:-$AGV_PROJECT_DIR/scan_fixer}"
FIXER_SCRIPT="${FIXER_SCRIPT:-fix_scan_timestamp_v6.py}"
LOG_DIR="${LOG_DIR:-/tmp}"
LOCK_DIR="${LOCK_DIR:-/tmp}"
FASTRTPS_SHM_DIR="${FASTRTPS_SHM_DIR:-/dev/shm}"
AGV_CONTROLLER_DEVICE="${AGV_CONTROLLER_DEVICE:-/dev/agvpro_controller}"
ROS_DOMAIN_ID_VAL=1
source "$SCRIPT_DIR/ros-common.sh"
BRINGUP_LOG="$LOG_DIR/ros2_bringup.log"
NAV2_LOG="$LOG_DIR/ros2_nav2.log"
CLOCK_LOG="$LOG_DIR/clock_publisher.log"
SCAN_FIXER_LOG="$LOG_DIR/scan_fixer.log"
FLASK_LOG="$LOG_DIR/agv_flask.log"
# ============================================================================
# 主流程
# ============================================================================
mkdir -p "$LOG_DIR"
section "Robot AGV 全量启动 v5.0"
echo "=========================================="
echo " Robot AGV 全量启动 v4.0"
echo "=========================================="
echo ""
# ============================================================================
# 1. 清理旧环境
# ============================================================================
step "1/8" "清理旧进程和共享内存"
# ---------- 1. 清理旧进程 + FastRTPS 共享内存 ----------
echo "[1/8] 清理旧进程和共享内存..."
kill_all_soft
kill_all_hard
stop_ros2_daemon
cleanup_fastrtps
# 杀掉所有相关进程(先软杀,再硬杀确保干净)
pkill -f "ros2 launch agv_pro_bringup" 2>/dev/null || true
pkill -f "ros2 launch agv_pro_navigation2" 2>/dev/null || true
pkill -f "agv_pro_node" 2>/dev/null || true
pkill -f "lslidar_driver_node" 2>/dev/null || true
pkill -f "component_container" 2>/dev/null || true
pkill -f "robot_state_publisher" 2>/dev/null || true
pkill -f "fix_scan_timestamp" 2>/dev/null || true
pkill -f "clock_publisher" 2>/dev/null || true
pkill -f "python.*app.py" 2>/dev/null || true
pkill -f "uv run .*python app.py" 2>/dev/null || true
sleep 2
# 【关键】硬杀确保干净
echo " 硬杀残留进程..."
pkill -9 -f "agv_pro_node" 2>/dev/null || true
pkill -9 -f "lslidar_driver_node" 2>/dev/null || true
pkill -9 -f "component_container" 2>/dev/null || true
pkill -9 -f "clock_publisher" 2>/dev/null || true
pkill -9 -f "fix_scan_timestamp" 2>/dev/null || true
pkill -9 -f "app.py" 2>/dev/null || true
sleep 1
# 【关键】杀死 ros2 daemon 进程本身(不是只 stop,而是杀进程)
echo " 重置 ros2 daemon..."
pkill -f "ros2-daemon" 2>/dev/null || true
pkill -9 -f "ros2-daemon" 2>/dev/null || true
sleep 2
# 【关键】清理 FastRTPS 共享内存文件(杀进程后立即清理)
echo " 清理 FastRTPS 共享内存文件..."
FASTRTPS_COUNT=$(ls "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null | wc -l || echo 0)
if [ "$FASTRTPS_COUNT" -gt 0 ]; then
rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_*
echo " 已清理 $FASTRTPS_COUNT 个 FastRTPS 文件"
else
echo " 无 FastRTPS 文件残留"
fi
# 清理 scan_fixer 锁文件
rm -f "$LOCK_DIR/scan_fixer.lock"
# 【关键】验证进程已全部停止
# 验证进程已停止
echo " 验证进程停止..."
PROC_COUNT=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|app.py' | grep -v grep | wc -l || echo 0)
PROC_COUNT=$(count_residual_processes)
echo " 残留进程数: $PROC_COUNT"
if [ "$PROC_COUNT" -gt 0 ]; then
echo " ⚠️ 仍有进程残留,强制终止..."
pkill -9 -f "agv_pro_node" 2>/dev/null || true
pkill -9 -f "lslidar_driver_node" 2>/dev/null || true
pkill -9 -f "component_container" 2>/dev/null || true
pkill -9 -f "fix_scan_timestamp" 2>/dev/null || true
pkill -9 -f "app.py" 2>/dev/null || true
sleep 2
PROC_COUNT2=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|app.py' | grep -v grep | wc -l || echo 0)
echo " 清理后残留: $PROC_COUNT2"
echo " [WARN] 仍有进程残留,再次强制终止..."
kill_all_hard
PROC_COUNT=$(count_residual_processes)
echo " 清理后残留: $PROC_COUNT"
fi
echo " ✅ 清理完成"
info "ok" "清理完成"
# ---------- 2. 启动 ros2 daemon ----------
echo "[2/8] 启动 ros2 daemon..."
# ============================================================================
# 2. 启动 ros2 daemon
# ============================================================================
step "2/8" "启动 ros2 daemon"
source "$ROS_SETUP" 2>/dev/null || true
# 再次确保没有残留共享内存(启动 daemon 前)
rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null || true
# 使用 bash -c 确保环境变量正确传递
nohup bash -c "source \"$ROS_SETUP\" && export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL && ros2 daemon start" >/dev/null 2>&1 &
sleep 4
# 验证 daemon 是否就绪(用简单的 topic list 测试)
DAEMON_OK=0
for i in $(seq 1 5); do
DAEMON_TOPICS=$(bash -c "source \"$ROS_SETUP\" && ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 3 ros2 topic list 2>&1" | wc -l || echo 0)
if [ "$DAEMON_TOPICS" -gt 0 ]; then
DAEMON_OK=1
echo " ✅ ros2 daemon 就绪"
break
fi
sleep 2
done
if [ "$DAEMON_OK" -eq 0 ]; then
echo " ⚠️ ros2 daemon 可能有问题,继续尝试启动组件..."
fi
# ---------- 3. 启动 bringup (含激光雷达) ----------
echo "[3/8] 启动 AGV Bringup..."
source "$ROS_SETUP" 2>/dev/null || true
# 【关键】启动前最后确认没有残留共享内存
rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null || true
start_ros2_daemon || true
# ============================================================================
# 3. 启动 bringup (含激光雷达)
# ============================================================================
step "3/8" "启动 AGV Bringup"
cd "$AGV_ROS2_DIR"
source "$ROS_WORKSPACE_SETUP"
nohup bash -c "export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL && ros2 launch agv_pro_bringup agv_pro_bringup.launch.py port_name:=$AGV_CONTROLLER_DEVICE" > "$BRINGUP_LOG" 2>&1 &
rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null || true
nohup bash -c "export ROS_DOMAIN_ID=$ROS_DOMAIN_ID && \
ros2 launch agv_pro_bringup agv_pro_bringup.launch.py port_name:=$AGV_CONTROLLER_DEVICE" \
> "$BRINGUP_LOG" 2>&1 &
BRINGUP_PID=$!
echo " bringup PID: $BRINGUP_PID"
echo " 等待 bringup 就绪..."
BRINGUP_OK=0
for i in $(seq 1 20); do
if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/odom'; then
echo " ✅ bringup 已就绪 (${i}x2秒)"
BRINGUP_OK=1
break
fi
sleep 2
done
if [ "$BRINGUP_OK" -eq 0 ]; then
echo " ⚠️ bringup 未检测到 /odom,继续启动后续组件..."
tail -5 "$BRINGUP_LOG" 2>/dev/null || true
fi
# 等待 /odom 话题
wait_for_topic /odom 40 || show_log_tail "$BRINGUP_LOG"
# ---------- 3.5 启动系统时钟发布器 ----------
echo "[3.5/8] 启动系统时钟发布器 (clock_publisher)..."
# ============================================================================
# 4. 启动系统时钟发布器
# ============================================================================
step "4/8" "启动系统时钟发布器 (clock_publisher)"
nohup bash -c "source \"$ROS_SETUP\" && \
ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 \"$SCAN_FIXER_DIR/clock_publisher.py\"" \
ROS_DOMAIN_ID=$ROS_DOMAIN_ID python3 \"$SCAN_FIXER_DIR/clock_publisher.py\"" \
> "$CLOCK_LOG" 2>&1 &
CLOCK_PID=$!
echo " clock_publisher PID: $CLOCK_PID"
sleep 2
# 验证 /clock 话题
if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/clock'; then
echo " ✅ /clock 已上线"
else
echo " ⚠️ /clock 未上线,检查日志:"
tail -5 "$CLOCK_LOG" 2>/dev/null || true
fi
wait_for_topic /clock 10 || show_log_tail "$CLOCK_LOG"
# ---------- 4. 启动激光时间戳修正节点 ----------
echo "[4/8] 启动激光时间戳修正节点..."
# ============================================================================
# 5. 启动激光时间戳修正节点
# ============================================================================
step "5/8" "启动激光时间戳修正节点"
# 确保 /scan 存在
SCAN_OK=0
for i in $(seq 1 10); do
if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/scan'; then
echo " /scan 话题已上线"
SCAN_OK=1
break
fi
sleep 2
done
if [ "$SCAN_OK" -eq 0 ]; then
echo " ⚠️ /scan 未上线,检查 bringup 日志"
# 先等待 /scan 话题
if ! wait_for_topic /scan 20; then
echo " [WARN] /scan 未上线,检查 bringup 日志"
fi
nohup bash -c "source \"$ROS_SETUP\" && \
ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 \"$SCAN_FIXER_DIR/$FIXER_SCRIPT\"" \
ROS_DOMAIN_ID=$ROS_DOMAIN_ID python3 \"$SCAN_FIXER_DIR/fix_scan_timestamp_v6.py\"" \
> "$SCAN_FIXER_LOG" 2>&1 &
FIXER_PID=$!
echo " fix_scan_timestamp PID: $FIXER_PID"
sleep 5
# 验证 fixer 进程和 scan_corrected
FIXER_COUNT=$(ps aux | grep -c "[f]ix_scan_timestamp" 2>/dev/null || echo 0)
# 检查是否有多个 fixer 进程
FIXER_COUNT=$(count_matching_processes "fix_scan_timestamp")
if [ "$FIXER_COUNT" -gt 1 ]; then
echo " ⚠️ 发现 $FIXER_COUNT 个 fixer 进程,杀掉多余的..."
echo " [WARN] 发现 $FIXER_COUNT 个 fixer 进程,重启..."
pkill -f "fix_scan_timestamp" 2>/dev/null || true
pkill -f "clock_publisher" 2>/dev/null || true
sleep 2
rm -f "$LOCK_DIR/scan_fixer.lock"
rm -f /tmp/scan_fixer.lock
nohup bash -c "source \"$ROS_SETUP\" && \
ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 \"$SCAN_FIXER_DIR/$FIXER_SCRIPT\"" \
ROS_DOMAIN_ID=$ROS_DOMAIN_ID python3 \"$SCAN_FIXER_DIR/fix_scan_timestamp_v6.py\"" \
> "$SCAN_FIXER_LOG" 2>&1 &
FIXER_PID=$!
sleep 3
fi
if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/scan_corrected'; then
echo " ✅ /scan_corrected 已上线"
else
echo " ⚠️ /scan_corrected 未上线,检查日志:"
tail -5 "$SCAN_FIXER_LOG" 2>/dev/null || true
fi
wait_for_topic /scan_corrected 15 || show_log_tail "$SCAN_FIXER_LOG"
# ---------- 5. 启动 Nav2 ----------
echo "[5/8] 启动 Nav2 导航..."
source "$ROS_SETUP" 2>/dev/null || true
cd "$AGV_ROS2_DIR"
source "$ROS_WORKSPACE_SETUP"
# ============================================================================
# 6. 启动 Nav2
# ============================================================================
step "6/8" "启动 Nav2 导航"
nohup bash -c "source \"$ROS_SETUP\" && \
source \"$ROS_WORKSPACE_SETUP\" && \
export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL && \
ros2 launch agv_pro_navigation2 navigation2_active.launch.py \
autostart:=True" > "$NAV2_LOG" 2>&1 &
nohup bash -c "source \"$ROS_SETUP\" && source \"$ROS_WORKSPACE_SETUP\" && \
export ROS_DOMAIN_ID=$ROS_DOMAIN_ID && \
ros2 launch agv_pro_navigation2 navigation2_active.launch.py autostart:=True" \
> "$NAV2_LOG" 2>&1 &
NAV2_PID=$!
echo " Nav2 PID: $NAV2_PID"
sleep 12
echo " 等待 Nav2 节点就绪..."
NAV2_OK=0
for i in $(seq 1 15); do
NODES=$(ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 node list 2>/dev/null | \
grep -cE 'lifecycle_manager_navigation|bt_navigator|controller_server' 2>/dev/null || echo 0)
NODES=$(echo "$NODES" | tr -d '\n' | awk '{print $1}')
if [ "$NODES" -ge 3 ] 2>/dev/null; then
echo " ✅ Nav2 节点已就绪 ($NODES 个)"
NAV2_OK=1
break
fi
sleep 3
done
if [ "$NAV2_OK" -eq 0 ]; then
echo " ⚠️ Nav2 节点未完全就绪,继续..."
fi
wait_for_nodes 'lifecycle_manager_navigation|bt_navigator|controller_server' 3 45 || true
# ---------- 6. 设置精度参数 ----------
echo "[6/8] 设置导航精度参数 (xy_goal_tolerance=0.05m)..."
source "$ROS_SETUP" 2>/dev/null || true
cd "$AGV_ROS2_DIR"
source "$ROS_WORKSPACE_SETUP"
# ============================================================================
# 7. 设置精度参数
# ============================================================================
step "7/8" "设置导航精度参数 (xy_goal_tolerance=0.05m)"
for NODE in /controller_server /bt_navigator /planner_server; do
ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set $NODE general_goal_checker.xy_goal_tolerance 0.05 2>/dev/null || true
ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set $NODE general_goal_checker.yaw_goal_tolerance 0.05 2>/dev/null || true
ros2_exec timeout 1 ros2 param set $NODE general_goal_checker.xy_goal_tolerance 0.05 2>/dev/null || true
ros2_exec timeout 1 ros2 param set $NODE general_goal_checker.yaw_goal_tolerance 0.05 2>/dev/null || true
done
ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set /controller_server FollowPath.xy_goal_tolerance 0.05 2>/dev/null || true
ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set /controller_server general_goal_checker.stateful True 2>/dev/null || true
ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set /controller_server FollowPath.stateful True 2>/dev/null || true
echo " ✅ 精度参数已设置"
ros2_exec timeout 1 ros2 param set /controller_server FollowPath.xy_goal_tolerance 0.05 2>/dev/null || true
ros2_exec timeout 1 ros2 param set /controller_server general_goal_checker.stateful True 2>/dev/null || true
ros2_exec timeout 1 ros2 param set /controller_server FollowPath.stateful True 2>/dev/null || true
info "ok" "精度参数已设置"
# ============================================================================
# 8. 启动 Flask API
# ============================================================================
step "8/8" "启动 Flask API"
# ---------- 7. 启动 Flask ----------
echo "[7/8] 启动 Flask API..."
export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL
cd "$AGV_APP_DIR"
nohup uv run --locked python app.py > "$FLASK_LOG" 2>&1 &
FLASK_PID=$!
echo " Flask PID: $FLASK_PID"
sleep 4
# ---------- 8. 最终全面验证 ----------
echo ""
echo "=========================================="
echo " 系统全面验证"
echo "=========================================="
# ============================================================================
# 9. 最终验证
# ============================================================================
section "系统全面验证"
# 8a. 验证 ros2 topic list(核心指标)
# 验证话题数量
echo ""
echo "验证 ros2 topic list..."
TOPIC_COUNT=$(bash -c "source \"$ROS_SETUP\" && ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 5 ros2 topic list 2>/dev/null" | wc -l || echo 0)
TOPIC_COUNT=$(ros2_topic_count 5)
echo " 话题数量: $TOPIC_COUNT"
if [ "$TOPIC_COUNT" -gt 10 ]; then
echo "ros2 daemon 正常 (${TOPIC_COUNT} 个话题)"
info "ok" "ros2 daemon 正常 (${TOPIC_COUNT} 个话题)"
else
echo "ros2 topic list 异常 (${TOPIC_COUNT} 个话题,可能 DDS 有问题)"
info "err" "ros2 topic list 异常 (${TOPIC_COUNT} 个话题,可能 DDS 有问题)"
echo " 手动执行: rm -rf \"$FASTRTPS_SHM_DIR\"/fastrtps_* && ros2 daemon stop && ros2 daemon start"
fi
# 8b. 验证关键话题
# 验证关键话题
echo ""
echo "验证关键话题..."
for TOPIC in /odom /scan /cmd_vel /tf; do
if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q "$TOPIC"; then
echo "$TOPIC"
for TOPIC in /odom /scan /cmd_vel /tf /clock /scan_corrected; do
if topic_exists "$TOPIC"; then
info "ok" "$TOPIC"
else
echo " ⚠️ $TOPIC 未找到"
info "warn" "$TOPIC 未找到"
fi
done
# 8c. 验证进程数量(确保没有重复启动)
# 验证进程数量
echo ""
echo "验证进程数量..."
BRINGUP_PROCS=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node' | grep -v grep | wc -l || echo 0)
BRINGUP_PROCS=$(count_matching_processes 'agv_pro_node|lslidar_driver_node')
echo " AGV 核心进程: $BRINGUP_PROCS (应为 2)"
if [ "$BRINGUP_PROCS" -eq 2 ]; then
echo " ✅ 进程数量正常(无重复)"
info "ok" "进程数量正常(无重复)"
elif [ "$BRINGUP_PROCS" -gt 2 ]; then
echo " ⚠️ 发现 $BRINGUP_PROCS 个核心进程(可能有残留),建议重启"
info "warn" "发现 $BRINGUP_PROCS 个核心进程(可能有残留),建议重启"
else
echo " ⚠️ 进程数量异常"
info "warn" "进程数量异常"
fi
# 8d. FastRTPS 共享内存状态
# FastRTPS 共享内存状态
echo ""
echo "FastRTPS 共享内存状态:"
FASTRTPS_NEW=$(ls "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null | wc -l || echo 0)
FASTRTPS_NEW=$(count_fastrtps_files)
echo " 当前文件数: $FASTRTPS_NEW (正常运行时会有一些)"
# 8e. Flask API 测试
# Flask API 状态
echo ""
echo "验证 Flask API..."
FLASK_RUNNING=$(pgrep -f "app.py" | wc -l || echo 0)
if [ "$FLASK_RUNNING" -gt 0 ]; then
echo " ✅ Flask 进程运行中"
if pgrep -f "app.py" >/dev/null 2>&1; then
info "ok" "Flask 进程运行中"
else
echo " ❌ Flask 未运行"
info "err" "Flask 未运行"
fi
# ---------- 完成 ----------
echo ""
echo "=========================================="
echo " ✅ 启动完成"
echo "=========================================="
# ============================================================================
# 完成
# ============================================================================
section "[OK] 启动完成"
echo ""
echo " 进程状态:"
for PROC in "bringup:$BRINGUP_PID" "Nav2:$NAV2_PID" "fixer:$FIXER_PID" "Flask:$FLASK_PID"; do
NAME="${PROC%%:*}"
PID="${PROC##*:}"
STATUS=$(ps aux | grep -w "$PID" | grep -v grep | awk '{print "运行中"}' || echo '已退出')
echo " $NAME : $STATUS"
for proc_info in "bringup:$BRINGUP_PID" "Nav2:$NAV2_PID" "fixer:$FIXER_PID" "Flask:$FLASK_PID"; do
name="${proc_info%%:*}"
pid="${proc_info##*:}"
if ps aux | grep -w "$pid" | grep -v grep >/dev/null 2>&1; then
echo " $name : 运行中 (PID: $pid)"
else
echo " $name : 已退出"
fi
done
echo ""
echo " 日志文件:"
@@ -357,4 +239,5 @@ echo ""
echo " 如果仍有问题,请依次执行:"
echo " 1. ./scripts/stop_all.sh"
echo " 2. rm -rf \"$FASTRTPS_SHM_DIR\"/fastrtps_*"
echo " 3. ./scripts/start_all.sh"
echo " 3. ./scripts/prod-backend.sh"
echo ""
+328
View File
@@ -0,0 +1,328 @@
#!/bin/bash
# ROS AGV 公共库
# 提供生产脚本共享的配置、清理与验证函数
set -euo pipefail
# ============================================================================
# 配置(可通过环境变量覆盖)
# ============================================================================
readonly AGV_PROJECT_DIR="${AGV_PROJECT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
readonly AGV_APP_DIR="${AGV_APP_DIR:-$AGV_PROJECT_DIR/agv_app}"
readonly AGV_ROS2_DIR="${AGV_ROS2_DIR:-$HOME/agv_pro_ros2}"
readonly ROS_SETUP="${ROS_SETUP:-/opt/ros/humble/setup.bash}"
readonly ROS_WORKSPACE_SETUP="${ROS_WORKSPACE_SETUP:-$AGV_ROS2_DIR/install/setup.bash}"
readonly SCAN_FIXER_DIR="${SCAN_FIXER_DIR:-$AGV_PROJECT_DIR/scan_fixer}"
readonly LOG_DIR="${LOG_DIR:-/tmp}"
readonly FASTRTPS_SHM_DIR="${FASTRTPS_SHM_DIR:-/dev/shm}"
readonly AGV_CONTROLLER_DEVICE="${AGV_CONTROLLER_DEVICE:-/dev/agvpro_controller}"
export ROS_DOMAIN_ID="${ROS_DOMAIN_ID:-1}"
# 日志文件
readonly BRINGUP_LOG="$LOG_DIR/ros2_bringup.log"
readonly NAV2_LOG="$LOG_DIR/ros2_nav2.log"
readonly CLOCK_LOG="$LOG_DIR/clock_publisher.log"
readonly SCAN_FIXER_LOG="$LOG_DIR/scan_fixer.log"
readonly FLASK_LOG="$LOG_DIR/agv_flask.log"
# ============================================================================
# 进程管理
# ============================================================================
# ROS 相关进程列表(用于清理)
readonly ROS_PROCESSES=(
"ros2 launch agv_pro_bringup"
"ros2 launch agv_pro_navigation2"
"agv_pro_node"
"lslidar_driver_node"
"component_container"
"robot_state_publisher"
"fix_scan_timestamp"
"clock_publisher"
"python.*app.py"
"uv run .*python app.py"
)
# 软杀所有进程
kill_all_soft() {
echo " 软杀进程中..."
for proc in "${ROS_PROCESSES[@]}"; do
pkill -f "$proc" 2>/dev/null || true
done
sleep 2
}
# 硬杀所有进程
kill_all_hard() {
echo " 强制终止中..."
for proc in "${ROS_PROCESSES[@]}"; do
pkill -9 -f "$proc" 2>/dev/null || true
done
sleep 1
}
# 统计匹配进程数
count_matching_processes() {
local pattern=$1
local current_pid=$$
local shell_pid=$BASHPID
local parent_pid=$PPID
local count=0
local pid
local args
while read -r pid args; do
if [ -z "${pid:-}" ]; then
continue
fi
if [ "$pid" = "$current_pid" ] || [ "$pid" = "$shell_pid" ] || [ "$pid" = "$parent_pid" ]; then
continue
fi
if [[ "${args:-}" =~ $pattern ]]; then
count=$((count + 1))
fi
done < <(ps -eo pid=,args=)
echo "$count"
}
# 统计残留进程数
count_residual_processes() {
count_matching_processes "agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|clock_publisher|app.py|ros2-daemon"
}
# ============================================================================
# FastRTPS 清理
# ============================================================================
# 清理 FastRTPS 共享内存
cleanup_fastrtps() {
local count
count=$(count_fastrtps_files)
if [ "$count" -gt 0 ]; then
rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_*
echo " 已清理 $count 个 FastRTPS 文件"
else
echo " 无 FastRTPS 文件残留"
fi
# 清理锁文件
rm -f /tmp/scan_fixer.lock /tmp/clock_publisher.lock
}
# 统计 FastRTPS 文件数
count_fastrtps_files() (
shopt -s nullglob
local files=("$FASTRTPS_SHM_DIR"/fastrtps_*)
echo "${#files[@]}"
)
# 载入 ROS2 环境后执行命令
ros2_exec() {
bash -c '
source "$1" || exit 1
if [ -f "$2" ]; then
source "$2" || exit 1
fi
export ROS_DOMAIN_ID="$3"
shift 3
"$@"
' _ "$ROS_SETUP" "$ROS_WORKSPACE_SETUP" "$ROS_DOMAIN_ID" "$@"
}
# ============================================================================
# ROS2 环境操作
# ============================================================================
ros2_topic_list() {
ros2_exec ros2 topic list 2>/dev/null || true
}
ros2_topic_count() {
local topics
topics=$(ros2_exec timeout "${1:-5}" ros2 topic list 2>/dev/null || true)
if [ -z "$topics" ]; then
echo 0
else
printf '%s\n' "$topics" | sed '/^$/d' | wc -l
fi
}
topic_exists() {
ros2_topic_list | grep -Fxq "$1"
}
# 启动 ROS2 daemon
start_ros2_daemon() {
echo " 启动 ros2 daemon..."
rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null || true
nohup bash -c '
source "$1" || exit 1
export ROS_DOMAIN_ID="$2"
ros2 daemon start
' _ "$ROS_SETUP" "$ROS_DOMAIN_ID" >/dev/null 2>&1 &
sleep 4
# 等待 daemon 就绪
for _ in $(seq 1 5); do
if ros2_exec timeout 3 ros2 topic list &>/dev/null; then
echo " [OK] ros2 daemon 已就绪"
return 0
fi
sleep 2
done
echo " [WARN] ros2 daemon 可能有问题"
return 1
}
# 停止 ROS2 daemon
stop_ros2_daemon() {
echo " 重置 ros2 daemon..."
pkill -f "ros2-daemon" 2>/dev/null || true
pkill -9 -f "ros2-daemon" 2>/dev/null || true
sleep 2
source "$ROS_SETUP" 2>/dev/null || true
ros2 daemon stop 2>/dev/null || true
echo " [OK] ros2 daemon 已重置"
}
# ============================================================================
# 等待/验证函数
# ============================================================================
# 等待话题出现
# 用法: wait_for_topic <话题名> <最大等待秒数>
wait_for_topic() {
local topic=$1
local max_wait=${2:-30}
local elapsed=0
while [ "$elapsed" -lt "$max_wait" ]; do
if topic_exists "$topic"; then
echo " [OK] $topic 已上线"
return 0
fi
sleep 2
elapsed=$((elapsed + 2))
done
echo " [WARN] $topic 未在 $max_wait 秒内上线"
return 1
}
# 等待节点出现(匹配数量)
# 用法: wait_for_nodes <节点模式> <期望数量> <最大等待秒数>
wait_for_nodes() {
local pattern=$1
local expected=$2
local max_wait=${3:-30}
local elapsed=0
local count=0
while [ "$elapsed" -lt "$max_wait" ]; do
local nodes
nodes=$(ros2_exec ros2 node list 2>/dev/null || true)
count=$(printf '%s\n' "$nodes" | grep -cE "$pattern" || true)
if [ "$count" -ge "$expected" ]; then
echo " [OK] 已检测到 $count 个节点"
return 0
fi
sleep 2
elapsed=$((elapsed + 2))
done
echo " [WARN] 仅检测到 $count 个节点(期望 $expected 个)"
return 1
}
# ============================================================================
# 日志/输出辅助
# ============================================================================
# 打印分节标题
section() {
echo ""
echo "=========================================="
echo " $1"
echo "=========================================="
}
# 打印步骤
step() {
echo ""
echo "[$1] $2"
}
# 打印带状态的信息
info() {
local status=$1
local msg=$2
if [ "$status" = "ok" ]; then
echo " [OK] $msg"
elif [ "$status" = "warn" ]; then
echo " [WARN] $msg"
elif [ "$status" = "err" ]; then
echo " [ERROR] $msg"
else
echo " $msg"
fi
}
# 显示日志尾部
show_log_tail() {
local log_file=$1
local lines=${2:-5}
if [ -f "$log_file" ]; then
echo " --- 日志尾部 ($log_file) ---"
tail -"$lines" "$log_file" 2>/dev/null | sed 's/^/ /' || true
fi
}
# ============================================================================
# 完整清理流程
# ============================================================================
# 执行完整清理(供 stop_all.sh 使用)
full_cleanup() {
section "Robot AGV 全量停止"
step "1/5" "软杀所有相关进程"
kill_all_soft
step "2/5" "强制终止残留进程"
kill_all_hard
step "3/5" "重置 ros2 daemon"
stop_ros2_daemon
step "4/5" "清理 FastRTPS 共享内存"
cleanup_fastrtps
step "5/5" "验证清理结果"
local proc_count=$(count_residual_processes)
local fastrtps_left=$(count_fastrtps_files)
echo " 残留进程数: $proc_count"
echo " FastRTPS 文件数: $fastrtps_left"
if [ "$proc_count" -eq 0 ] && [ "$fastrtps_left" -eq 0 ]; then
section "[OK] 停止完成 - 系统已完全清理"
else
section "[WARN] 停止完成 - 部分残留可能需要手动清理"
echo ""
echo " 手动清理命令(如需要):"
echo " pkill -9 -f 'agv_pro_node|lslidar|component_container'"
echo " pkill -9 -f 'fix_scan_timestamp|app.py'"
echo " pkill -9 -f 'ros2-daemon'"
echo " rm -rf \"$FASTRTPS_SHM_DIR\"/fastrtps_*"
fi
echo ""
echo " 现在可以安全运行 ./scripts/prod-backend.sh"
echo ""
}
# ============================================================================
# 初始化(确保目录存在)
# ============================================================================
mkdir -p "$LOG_DIR"
+8 -89
View File
@@ -1,97 +1,16 @@
#!/bin/bash
# ============================================================
# stop_all.sh - 关闭 AGV 拍摄系统所有相关进程
# 版本: v2.0
# 版本: v3.0
# 修复:
# - v3.0: 使用公共库重构,减少代码重复
# - v2.0: 添加 FastRTPS 清理 + ros2 daemon 重置
# ============================================================
set -e
set -euo pipefail
ROS_DISTRO="${ROS_DISTRO:-humble}"
ROS_SETUP="${ROS_SETUP:-/opt/ros/$ROS_DISTRO/setup.bash}"
LOCK_DIR="${LOCK_DIR:-/tmp}"
FASTRTPS_SHM_DIR="${FASTRTPS_SHM_DIR:-/dev/shm}"
# 加载公共库
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/ros-common.sh"
echo "=========================================="
echo " Robot AGV 全量停止"
echo "=========================================="
echo ""
# ---------- 1. 软杀所有相关进程 ----------
echo "[1/5] 软杀所有相关进程..."
pkill -f "python.*app.py" 2>/dev/null || true
pkill -f "uv run .*python app.py" 2>/dev/null || true
pkill -f "agv_pro_bringup" 2>/dev/null || true
pkill -f "agv_pro_navigation2" 2>/dev/null || true
pkill -f "agv_pro_node" 2>/dev/null || true
pkill -f "lslidar_driver_node" 2>/dev/null || true
pkill -f "component_container" 2>/dev/null || true
pkill -f "fix_scan_timestamp" 2>/dev/null || true
pkill -f "clock_publisher" 2>/dev/null || true
pkill -f "robot_state_publisher" 2>/dev/null || true
pkill -f "start_all.sh" 2>/dev/null || true
sleep 2
# ---------- 2. 硬杀确保干净 ----------
echo "[2/5] 硬杀残留进程..."
pkill -9 -f "app.py" 2>/dev/null || true
pkill -9 -f "agv_pro_node" 2>/dev/null || true
pkill -9 -f "lslidar_driver_node" 2>/dev/null || true
pkill -9 -f "component_container" 2>/dev/null || true
pkill -9 -f "fix_scan_timestamp" 2>/dev/null || true
pkill -9 -f "agv_pro_bringup" 2>/dev/null || true
pkill -9 -f "agv_pro_navigation2" 2>/dev/null || true
sleep 1
# ---------- 3. 【关键】清理 FastRTPS 共享内存 ----------
echo "[3/5] 清理 FastRTPS 共享内存..."
FASTRTPS_COUNT=$(ls "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null | wc -l || echo 0)
if [ "$FASTRTPS_COUNT" -gt 0 ]; then
rm -rf "$FASTRTPS_SHM_DIR"/fastrtps_*
echo " 已清理 $FASTRTPS_COUNT 个 FastRTPS 文件"
else
echo " 无 FastRTPS 文件残留"
fi
# 清理 scan_fixer 锁文件
rm -f "$LOCK_DIR/scan_fixer.lock"
rm -f "$LOCK_DIR/clock_publisher.lock"
echo " ✅ FastRTPS 清理完成"
# ---------- 4. 【关键】重置 ros2 daemon ----------
echo "[4/5] 重置 ros2 daemon..."
pkill -f "ros2-daemon" 2>/dev/null || true
pkill -9 -f "ros2-daemon" 2>/dev/null || true
sleep 2
source "$ROS_SETUP" 2>/dev/null || true
ros2 daemon stop 2>/dev/null || true
echo " ✅ ros2 daemon 已重置"
# ---------- 5. 验证清理结果 ----------
echo "[5/5] 验证清理结果..."
PROC_COUNT=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|clock_publisher|app.py|ros2-daemon' | grep -v grep | wc -l || echo 0)
FASTRTPS_LEFT=$(ls "$FASTRTPS_SHM_DIR"/fastrtps_* 2>/dev/null | wc -l || echo 0)
echo " 残留进程数: $PROC_COUNT"
echo " FastRTPS 文件数: $FASTRTPS_LEFT"
if [ "$PROC_COUNT" -eq 0 ] && [ "$FASTRTPS_LEFT" -eq 0 ]; then
echo ""
echo "=========================================="
echo " ✅ 停止完成 - 系统已完全清理"
echo "=========================================="
else
echo ""
echo "=========================================="
echo " ⚠️ 停止完成 - 部分残留可能需要手动清理"
echo "=========================================="
echo ""
echo " 手动清理命令(如需要):"
echo " pkill -9 -f 'agv_pro_node|lslidar|component_container'"
echo " pkill -9 -f 'fix_scan_timestamp|app.py'"
echo " pkill -9 -f 'ros2-daemon'"
echo " rm -rf \"$FASTRTPS_SHM_DIR\"/fastrtps_*"
fi
echo ""
echo " 现在可以安全运行 ./start_all.sh"
echo ""
# 执行完整清理
full_cleanup