#!/bin/bash # ============================================================ # Robot AGV 全量启动脚本 v4.0 # 修复: # - v4.0: 彻底杀死 ros2 daemon 进程 + 启动前进程数量检查 # - v3.0: 彻底清理 FastRTPS 共享内存文件(永久修复 DDS 通信问题) # - v2.7: 添加 ROS_DOMAIN_ID 环境变量传递 # - v2.6: 清理 scan_fixer lock 文件防残留 # ============================================================ set -e # ---- 可配置项(环境变量覆盖默认值) ---- AGV_PROJECT_DIR="${AGV_PROJECT_DIR:-/home/elephant/work/smart-inspection}" AGV_APP_DIR="${AGV_APP_DIR:-$AGV_PROJECT_DIR/agv_app}" AGV_ROS2_DIR="${AGV_ROS2_DIR:-/home/elephant/agv_pro_ros2}" SCAN_FIXER_DIR="${SCAN_FIXER_DIR:-/home/elephant/work/scan_fixer}" FIXER_SCRIPT="${FIXER_SCRIPT:-fix_scan_timestamp_v6.py}" ROS_DOMAIN_ID_VAL=1 echo "==========================================" echo " Robot AGV 全量启动 v4.0" echo "==========================================" echo "" # ---------- 1. 清理旧进程 + FastRTPS 共享内存 ---------- echo "[1/8] 清理旧进程和共享内存..." # 杀掉所有相关进程(先软杀,再硬杀确保干净) pkill -f "ros2 launch agv_pro_bringup" 2>/dev/null || true pkill -f "ros2 launch agv_pro_navigation2" 2>/dev/null || true pkill -f "agv_pro_node" 2>/dev/null || true pkill -f "lslidar_driver_node" 2>/dev/null || true pkill -f "component_container" 2>/dev/null || true pkill -f "robot_state_publisher" 2>/dev/null || true pkill -f "fix_scan_timestamp" 2>/dev/null || true pkill -f "clock_publisher" 2>/dev/null || true pkill -f "python.*app.py" 2>/dev/null || true pkill -f "uv run .*python app.py" 2>/dev/null || true sleep 2 # 【关键】硬杀确保干净 echo " 硬杀残留进程..." pkill -9 -f "agv_pro_node" 2>/dev/null || true pkill -9 -f "lslidar_driver_node" 2>/dev/null || true pkill -9 -f "component_container" 2>/dev/null || true pkill -9 -f "clock_publisher" 2>/dev/null || true pkill -9 -f "fix_scan_timestamp" 2>/dev/null || true pkill -9 -f "app.py" 2>/dev/null || true sleep 1 # 【关键】杀死 ros2 daemon 进程本身(不是只 stop,而是杀进程) echo " 重置 ros2 daemon..." pkill -f "ros2-daemon" 2>/dev/null || true pkill -9 -f "ros2-daemon" 2>/dev/null || true sleep 2 # 【关键】清理 FastRTPS 共享内存文件(杀进程后立即清理) echo " 清理 FastRTPS 共享内存文件..." FASTRTPS_COUNT=$(ls /dev/shm/fastrtps_* 2>/dev/null | wc -l || echo 0) if [ "$FASTRTPS_COUNT" -gt 0 ]; then rm -rf /dev/shm/fastrtps_* echo " 已清理 $FASTRTPS_COUNT 个 FastRTPS 文件" else echo " 无 FastRTPS 文件残留" fi # 清理 scan_fixer 锁文件 rm -f /tmp/scan_fixer.lock # 【关键】验证进程已全部停止 echo " 验证进程停止..." PROC_COUNT=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|app.py' | grep -v grep | wc -l || echo 0) echo " 残留进程数: $PROC_COUNT" if [ "$PROC_COUNT" -gt 0 ]; then echo " ⚠️ 仍有进程残留,强制终止..." pkill -9 -f "agv_pro_node" 2>/dev/null || true pkill -9 -f "lslidar_driver_node" 2>/dev/null || true pkill -9 -f "component_container" 2>/dev/null || true pkill -9 -f "fix_scan_timestamp" 2>/dev/null || true pkill -9 -f "app.py" 2>/dev/null || true sleep 2 PROC_COUNT2=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|app.py' | grep -v grep | wc -l || echo 0) echo " 清理后残留: $PROC_COUNT2" fi echo " ✅ 清理完成" # ---------- 2. 启动 ros2 daemon ---------- echo "[2/8] 启动 ros2 daemon..." source /opt/ros/humble/setup.bash 2>/dev/null || true # 再次确保没有残留共享内存(启动 daemon 前) rm -rf /dev/shm/fastrtps_* 2>/dev/null || true # 使用 bash -c 确保环境变量正确传递 nohup bash -c "source /opt/ros/humble/setup.bash && export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL && ros2 daemon start" >/dev/null 2>&1 & sleep 4 # 验证 daemon 是否就绪(用简单的 topic list 测试) DAEMON_OK=0 for i in $(seq 1 5); do DAEMON_TOPICS=$(source /opt/ros/humble/setup.bash && ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 3 ros2 topic list 2>&1 | wc -l || echo 0) if [ "$DAEMON_TOPICS" -gt 0 ]; then DAEMON_OK=1 echo " ✅ ros2 daemon 就绪" break fi sleep 2 done if [ "$DAEMON_OK" -eq 0 ]; then echo " ⚠️ ros2 daemon 可能有问题,继续尝试启动组件..." fi # ---------- 3. 启动 bringup (含激光雷达) ---------- echo "[3/8] 启动 AGV Bringup..." source /opt/ros/humble/setup.bash 2>/dev/null || true # 【关键】启动前最后确认没有残留共享内存 rm -rf /dev/shm/fastrtps_* 2>/dev/null || true cd "$AGV_ROS2_DIR" source install/setup.bash nohup bash -c "export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL && ros2 launch agv_pro_bringup agv_pro_bringup.launch.py port_name:=/dev/agvpro_controller" > /tmp/ros2_bringup.log 2>&1 & BRINGUP_PID=$! echo " bringup PID: $BRINGUP_PID" echo " 等待 bringup 就绪..." BRINGUP_OK=0 for i in $(seq 1 20); do if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/odom'; then echo " ✅ bringup 已就绪 (${i}x2秒)" BRINGUP_OK=1 break fi sleep 2 done if [ "$BRINGUP_OK" -eq 0 ]; then echo " ⚠️ bringup 未检测到 /odom,继续启动后续组件..." tail -5 /tmp/ros2_bringup.log 2>/dev/null || true fi # ---------- 3.5 启动系统时钟发布器 ---------- echo "[3.5/8] 启动系统时钟发布器 (clock_publisher)..." nohup bash -c "source /opt/ros/humble/setup.bash && \ ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 $SCAN_FIXER_DIR/clock_publisher.py" \ > /tmp/clock_publisher.log 2>&1 & CLOCK_PID=$! echo " clock_publisher PID: $CLOCK_PID" sleep 2 # 验证 /clock 话题 if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/clock'; then echo " ✅ /clock 已上线" else echo " ⚠️ /clock 未上线,检查日志:" tail -5 /tmp/clock_publisher.log 2>/dev/null || true fi # ---------- 4. 启动激光时间戳修正节点 ---------- echo "[4/8] 启动激光时间戳修正节点..." # 确保 /scan 存在 SCAN_OK=0 for i in $(seq 1 10); do if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/scan'; then echo " /scan 话题已上线" SCAN_OK=1 break fi sleep 2 done if [ "$SCAN_OK" -eq 0 ]; then echo " ⚠️ /scan 未上线,检查 bringup 日志" fi nohup bash -c "source /opt/ros/humble/setup.bash && \ ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 $SCAN_FIXER_DIR/$FIXER_SCRIPT" \ > /tmp/scan_fixer.log 2>&1 & FIXER_PID=$! echo " fix_scan_timestamp PID: $FIXER_PID" sleep 5 # 验证 fixer 进程和 scan_corrected FIXER_COUNT=$(ps aux | grep -c "[f]ix_scan_timestamp" 2>/dev/null || echo 0) if [ "$FIXER_COUNT" -gt 1 ]; then echo " ⚠️ 发现 $FIXER_COUNT 个 fixer 进程,杀掉多余的..." pkill -f "fix_scan_timestamp" 2>/dev/null || true pkill -f "clock_publisher" 2>/dev/null || true sleep 2 rm -f /tmp/scan_fixer.lock nohup bash -c "source /opt/ros/humble/setup.bash && \ ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 $SCAN_FIXER_DIR/$FIXER_SCRIPT" \ > /tmp/scan_fixer.log 2>&1 & FIXER_PID=$! sleep 3 fi if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/scan_corrected'; then echo " ✅ /scan_corrected 已上线" else echo " ⚠️ /scan_corrected 未上线,检查日志:" tail -5 /tmp/scan_fixer.log 2>/dev/null || true fi # ---------- 5. 启动 Nav2 ---------- echo "[5/8] 启动 Nav2 导航..." source /opt/ros/humble/setup.bash 2>/dev/null || true cd "$AGV_ROS2_DIR" source install/setup.bash nohup bash -c "source /opt/ros/humble/setup.bash && \ source /home/elephant/agv_pro_ros2/install/setup.bash && \ export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL && \ ros2 launch agv_pro_navigation2 navigation2_active.launch.py \ autostart:=True" > /tmp/ros2_nav2.log 2>&1 & NAV2_PID=$! echo " Nav2 PID: $NAV2_PID" sleep 12 echo " 等待 Nav2 节点就绪..." NAV2_OK=0 for i in $(seq 1 15); do NODES=$(ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 node list 2>/dev/null | \ grep -cE 'lifecycle_manager_navigation|bt_navigator|controller_server' 2>/dev/null || echo 0) NODES=$(echo "$NODES" | tr -d '\n' | awk '{print $1}') if [ "$NODES" -ge 3 ] 2>/dev/null; then echo " ✅ Nav2 节点已就绪 ($NODES 个)" NAV2_OK=1 break fi sleep 3 done if [ "$NAV2_OK" -eq 0 ]; then echo " ⚠️ Nav2 节点未完全就绪,继续..." fi # ---------- 6. 设置精度参数 ---------- echo "[6/8] 设置导航精度参数 (xy_goal_tolerance=0.05m)..." source /opt/ros/humble/setup.bash 2>/dev/null || true cd "$AGV_ROS2_DIR" source install/setup.bash for NODE in /controller_server /bt_navigator /planner_server; do ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set $NODE general_goal_checker.xy_goal_tolerance 0.05 2>/dev/null || true ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set $NODE general_goal_checker.yaw_goal_tolerance 0.05 2>/dev/null || true done ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set /controller_server FollowPath.xy_goal_tolerance 0.05 2>/dev/null || true ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set /controller_server general_goal_checker.stateful True 2>/dev/null || true ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set /controller_server FollowPath.stateful True 2>/dev/null || true echo " ✅ 精度参数已设置" # ---------- 7. 启动 Flask ---------- echo "[7/8] 启动 Flask API..." export ROS_DOMAIN_ID=1 cd "$AGV_APP_DIR" nohup uv run --locked python app.py > /tmp/agv_flask.log 2>&1 & FLASK_PID=$! echo " Flask PID: $FLASK_PID" sleep 4 # ---------- 8. 最终全面验证 ---------- echo "" echo "==========================================" echo " 系统全面验证" echo "==========================================" # 8a. 验证 ros2 topic list(核心指标) echo "" echo "验证 ros2 topic list..." TOPIC_COUNT=$(source /opt/ros/humble/setup.bash && ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 5 ros2 topic list 2>/dev/null | wc -l || echo 0) echo " 话题数量: $TOPIC_COUNT" if [ "$TOPIC_COUNT" -gt 10 ]; then echo " ✅ ros2 daemon 正常 (${TOPIC_COUNT} 个话题)" else echo " ❌ ros2 topic list 异常 (${TOPIC_COUNT} 个话题,可能 DDS 有问题)" echo " 手动执行: rm -rf /dev/shm/fastrtps_* && ros2 daemon stop && ros2 daemon start" fi # 8b. 验证关键话题 echo "" echo "验证关键话题..." for TOPIC in /odom /scan /cmd_vel /tf; do if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q "$TOPIC"; then echo " ✅ $TOPIC" else echo " ⚠️ $TOPIC 未找到" fi done # 8c. 验证进程数量(确保没有重复启动) echo "" echo "验证进程数量..." BRINGUP_PROCS=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node' | grep -v grep | wc -l || echo 0) echo " AGV 核心进程: $BRINGUP_PROCS (应为 2)" if [ "$BRINGUP_PROCS" -eq 2 ]; then echo " ✅ 进程数量正常(无重复)" elif [ "$BRINGUP_PROCS" -gt 2 ]; then echo " ⚠️ 发现 $BRINGUP_PROCS 个核心进程(可能有残留),建议重启" else echo " ⚠️ 进程数量异常" fi # 8d. FastRTPS 共享内存状态 echo "" echo "FastRTPS 共享内存状态:" FASTRTPS_NEW=$(ls /dev/shm/fastrtps_* 2>/dev/null | wc -l || echo 0) echo " 当前文件数: $FASTRTPS_NEW (正常运行时会有一些)" # 8e. Flask API 测试 echo "" echo "验证 Flask API..." FLASK_RUNNING=$(pgrep -f "app.py" | wc -l || echo 0) if [ "$FLASK_RUNNING" -gt 0 ]; then echo " ✅ Flask 进程运行中" else echo " ❌ Flask 未运行" fi # ---------- 完成 ---------- echo "" echo "==========================================" echo " ✅ 启动完成" echo "==========================================" echo "" echo " 进程状态:" for PROC in "bringup:$BRINGUP_PID" "Nav2:$NAV2_PID" "fixer:$FIXER_PID" "Flask:$FLASK_PID"; do NAME="${PROC%%:*}" PID="${PROC##*:}" STATUS=$(ps aux | grep -w "$PID" | grep -v grep | awk '{print "运行中"}' || echo '已退出') echo " $NAME : $STATUS" done echo "" echo " 日志文件:" echo " bringup : /tmp/ros2_bringup.log" echo " Nav2 : /tmp/ros2_nav2.log" echo " fixer : /tmp/scan_fixer.log" echo " Flask : /tmp/agv_flask.log" echo "" echo " 如果仍有问题,请依次执行:" echo " 1. ./stop_all.sh" echo " 2. rm -rf /dev/shm/fastrtps_*" echo " 3. ./start_all.sh"