From a160c18ba7e5162a8641800d8d0031ec9657fe95 Mon Sep 17 00:00:00 2001 From: ywb <347742090@qq.com> Date: Tue, 19 May 2026 15:59:23 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AF=BC=E8=88=AA=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agv_app/start_all.sh | 278 +++++++++++++++++---- agv_app/stop_all.sh | 109 ++++++--- agv_app/utils/agv_controller_ros2.py | 46 ++-- agv_app/utils/nav2_navigator.py | 354 ++++++++------------------- 4 files changed, 425 insertions(+), 362 deletions(-) diff --git a/agv_app/start_all.sh b/agv_app/start_all.sh index 33d2c0b..1267cdc 100755 --- a/agv_app/start_all.sh +++ b/agv_app/start_all.sh @@ -1,10 +1,11 @@ #!/bin/bash # ============================================================ -# Robot AGV 全量启动脚本 v2.2 -# 完整流程: -# 清理旧进程(不杀 daemon) -> 启动 bringup -> -# 启动激光时间戳修正节点 -> 启动 Nav2 -> -# 设置导航精度参数 -> 启动 Flask +# Robot AGV 全量启动脚本 v4.0 +# 修复: +# - v4.0: 彻底杀死 ros2 daemon 进程 + 启动前进程数量检查 +# - v3.0: 彻底清理 FastRTPS 共享内存文件(永久修复 DDS 通信问题) +# - v2.7: 添加 ROS_DOMAIN_ID 环境变量传递 +# - v2.6: 清理 scan_fixer lock 文件防残留 # ============================================================ set -e @@ -13,78 +14,180 @@ AGV_ROS2_DIR="/home/elephant/agv_pro_ros2" ROS_DOMAIN_ID_VAL=1 echo "==========================================" -echo " Robot AGV 全量启动 v2.2" +echo " Robot AGV 全量启动 v4.0" echo "==========================================" echo "" -# ---------- 1. 清理旧进程(不杀 ros2-daemon) ---------- -echo "[1/7] 清理旧进程..." +# ---------- 1. 清理旧进程 + FastRTPS 共享内存 ---------- +echo "[1/8] 清理旧进程和共享内存..." + +# 杀掉所有相关进程(先软杀,再硬杀确保干净) pkill -f "ros2 launch agv_pro_bringup" 2>/dev/null || true pkill -f "ros2 launch agv_pro_navigation2" 2>/dev/null || true pkill -f "agv_pro_node" 2>/dev/null || true pkill -f "lslidar_driver_node" 2>/dev/null || true -pkill -f "scan_timestamp_fixer" 2>/dev/null || true +pkill -f "component_container" 2>/dev/null || true +pkill -f "robot_state_publisher" 2>/dev/null || true +pkill -f "fix_scan_timestamp" 2>/dev/null || true +pkill -f "clock_publisher" 2>/dev/null || true pkill -f "python.*app.py" 2>/dev/null || true -sleep 4 -echo " 清理完成" - -# ---------- 2. 重启 ros2 daemon(仅杀 daemon进程本身,不杀整个环境) ---------- -echo "[2/7] 重启 ros2 daemon..." -pkill -f "ros2-daemon" 2>/dev/null || true sleep 2 -nohup bash -c "source /opt/ros/humble/setup.bash && ros2 daemon start" >/dev/null 2>&1 & -sleep 5 -echo " ros2 daemon 已就绪" + +# 【关键】硬杀确保干净 +echo " 硬杀残留进程..." +pkill -9 -f "agv_pro_node" 2>/dev/null || true +pkill -9 -f "lslidar_driver_node" 2>/dev/null || true +pkill -9 -f "component_container" 2>/dev/null || true +pkill -9 -f "clock_publisher" 2>/dev/null || true +pkill -9 -f "fix_scan_timestamp" 2>/dev/null || true +pkill -9 -f "app.py" 2>/dev/null || true +sleep 1 + +# 【关键】杀死 ros2 daemon 进程本身(不是只 stop,而是杀进程) +echo " 重置 ros2 daemon..." +pkill -f "ros2-daemon" 2>/dev/null || true +pkill -9 -f "ros2-daemon" 2>/dev/null || true +sleep 2 + +# 【关键】清理 FastRTPS 共享内存文件(杀进程后立即清理) +echo " 清理 FastRTPS 共享内存文件..." +FASTRTPS_COUNT=$(ls /dev/shm/fastrtps_* 2>/dev/null | wc -l || echo 0) +if [ "$FASTRTPS_COUNT" -gt 0 ]; then + rm -rf /dev/shm/fastrtps_* + echo " 已清理 $FASTRTPS_COUNT 个 FastRTPS 文件" +else + echo " 无 FastRTPS 文件残留" +fi + +# 清理 scan_fixer 锁文件 +rm -f /tmp/scan_fixer.lock + +# 【关键】验证进程已全部停止 +echo " 验证进程停止..." +PROC_COUNT=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|app.py' | grep -v grep | wc -l || echo 0) +echo " 残留进程数: $PROC_COUNT" +if [ "$PROC_COUNT" -gt 0 ]; then + echo " ⚠️ 仍有进程残留,强制终止..." + pkill -9 -f "agv_pro_node" 2>/dev/null || true + pkill -9 -f "lslidar_driver_node" 2>/dev/null || true + pkill -9 -f "component_container" 2>/dev/null || true + pkill -9 -f "fix_scan_timestamp" 2>/dev/null || true + pkill -9 -f "app.py" 2>/dev/null || true + sleep 2 + PROC_COUNT2=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|app.py' | grep -v grep | wc -l || echo 0) + echo " 清理后残留: $PROC_COUNT2" +fi +echo " ✅ 清理完成" + +# ---------- 2. 启动 ros2 daemon ---------- +echo "[2/8] 启动 ros2 daemon..." +source /opt/ros/humble/setup.bash 2>/dev/null || true + +# 再次确保没有残留共享内存(启动 daemon 前) +rm -rf /dev/shm/fastrtps_* 2>/dev/null || true + +# 使用 bash -c 确保环境变量正确传递 +nohup bash -c "source /opt/ros/humble/setup.bash && export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL && ros2 daemon start" >/dev/null 2>&1 & +sleep 4 + +# 验证 daemon 是否就绪(用简单的 topic list 测试) +DAEMON_OK=0 +for i in $(seq 1 5); do + DAEMON_TOPICS=$(source /opt/ros/humble/setup.bash && ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 3 ros2 topic list 2>&1 | wc -l || echo 0) + if [ "$DAEMON_TOPICS" -gt 0 ]; then + DAEMON_OK=1 + echo " ✅ ros2 daemon 就绪" + break + fi + sleep 2 +done +if [ "$DAEMON_OK" -eq 0 ]; then + echo " ⚠️ ros2 daemon 可能有问题,继续尝试启动组件..." +fi # ---------- 3. 启动 bringup (含激光雷达) ---------- -echo "[3/7] 启动 AGV Bringup..." -source /opt/ros/humble/setup.bash +echo "[3/8] 启动 AGV Bringup..." +source /opt/ros/humble/setup.bash 2>/dev/null || true + +# 【关键】启动前最后确认没有残留共享内存 +rm -rf /dev/shm/fastrtps_* 2>/dev/null || true + cd "$AGV_ROS2_DIR" source install/setup.bash -nohup ros2 launch agv_pro_bringup agv_pro_bringup.launch.py \ - port_name:=/dev/agvpro_controller > /tmp/ros2_bringup.log 2>&1 & + +nohup bash -c "export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL && ros2 launch agv_pro_bringup agv_pro_bringup.launch.py port_name:=/dev/agvpro_controller" > /tmp/ros2_bringup.log 2>&1 & BRINGUP_PID=$! echo " bringup PID: $BRINGUP_PID" echo " 等待 bringup 就绪..." +BRINGUP_OK=0 for i in $(seq 1 20); do if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/odom'; then - echo " ✅ bringup 已就绪" + echo " ✅ bringup 已就绪 (${i}x2秒)" + BRINGUP_OK=1 break fi sleep 2 done +if [ "$BRINGUP_OK" -eq 0 ]; then + echo " ⚠️ bringup 未检测到 /odom,继续启动后续组件..." + tail -5 /tmp/ros2_bringup.log 2>/dev/null || true +fi -# ---------- 4. 启动激光时间戳修正节点(单例,不重复启动) ---------- -echo "[4/7] 启动激光时间戳修正节点..." -# 确保只有1个 fixer 进程在运行 -pkill -f "scan_timestamp_fixer" 2>/dev/null || true +# ---------- 3.5 启动系统时钟发布器 ---------- +echo "[3.5/8] 启动系统时钟发布器 (clock_publisher)..." + +nohup bash -c "source /opt/ros/humble/setup.bash && \ + ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 /home/elephant/work/scan_fixer/clock_publisher.py" \ + > /tmp/clock_publisher.log 2>&1 & +CLOCK_PID=$! +echo " clock_publisher PID: $CLOCK_PID" sleep 2 +# 验证 /clock 话题 +if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/clock'; then + echo " ✅ /clock 已上线" +else + echo " ⚠️ /clock 未上线,检查日志:" + tail -5 /tmp/clock_publisher.log 2>/dev/null || true +fi + +# ---------- 4. 启动激光时间戳修正节点 ---------- +echo "[4/8] 启动激光时间戳修正节点..." + +# 确保 /scan 存在 +SCAN_OK=0 for i in $(seq 1 10); do if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/scan'; then echo " /scan 话题已上线" + SCAN_OK=1 break fi sleep 2 done +if [ "$SCAN_OK" -eq 0 ]; then + echo " ⚠️ /scan 未上线,检查 bringup 日志" +fi nohup bash -c "source /opt/ros/humble/setup.bash && \ - ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 /home/elephant/work/scan_fixer/fix_scan_timestamp.py" \ + ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 /home/elephant/work/scan_fixer/fix_scan_timestamp_v6.py" \ > /tmp/scan_fixer.log 2>&1 & FIXER_PID=$! -echo " scan_timestamp_fixer PID: $FIXER_PID" +echo " fix_scan_timestamp PID: $FIXER_PID" sleep 5 -# 验证只有1个 fixer 进程 +# 验证 fixer 进程和 scan_corrected FIXER_COUNT=$(ps aux | grep -c "[f]ix_scan_timestamp" 2>/dev/null || echo 0) if [ "$FIXER_COUNT" -gt 1 ]; then echo " ⚠️ 发现 $FIXER_COUNT 个 fixer 进程,杀掉多余的..." - pkill -f "scan_timestamp_fixer" 2>/dev/null || true + pkill -f "fix_scan_timestamp" 2>/dev/null || true +pkill -f "clock_publisher" 2>/dev/null || true sleep 2 + rm -f /tmp/scan_fixer.lock nohup bash -c "source /opt/ros/humble/setup.bash && \ - ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 /home/elephant/work/scan_fixer/fix_scan_timestamp.py" \ + ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL python3 /home/elephant/work/scan_fixer/fix_scan_timestamp_v6.py" \ > /tmp/scan_fixer.log 2>&1 & + FIXER_PID=$! sleep 3 fi @@ -92,54 +195,122 @@ if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q '/scan echo " ✅ /scan_corrected 已上线" else echo " ⚠️ /scan_corrected 未上线,检查日志:" - cat /tmp/scan_fixer.log + tail -5 /tmp/scan_fixer.log 2>/dev/null || true fi # ---------- 5. 启动 Nav2 ---------- -echo "[5/7] 启动 Nav2 导航..." -source /opt/ros/humble/setup.bash +echo "[5/8] 启动 Nav2 导航..." +source /opt/ros/humble/setup.bash 2>/dev/null || true cd "$AGV_ROS2_DIR" source install/setup.bash -nohup ros2 launch agv_pro_navigation2 navigation2_active.launch.py \ - autostart:=True > /tmp/ros2_nav2.log 2>&1 & + +nohup bash -c "source /opt/ros/humble/setup.bash && \ + source /home/elephant/agv_pro_ros2/install/setup.bash && \ + export ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL && \ + ros2 launch agv_pro_navigation2 navigation2_active.launch.py \ + autostart:=True" > /tmp/ros2_nav2.log 2>&1 & NAV2_PID=$! echo " Nav2 PID: $NAV2_PID" sleep 12 echo " 等待 Nav2 节点就绪..." +NAV2_OK=0 for i in $(seq 1 15); do NODES=$(ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 node list 2>/dev/null | \ - grep -c "lifecycle_manager_navigation\|bt_navigator\|controller_server" 2>/dev/null || echo 0) - if [ "$NODES" -ge 3 ]; then + grep -cE 'lifecycle_manager_navigation|bt_navigator|controller_server' 2>/dev/null || echo 0) + NODES=$(echo "$NODES" | tr -d '\n' | awk '{print $1}') + if [ "$NODES" -ge 3 ] 2>/dev/null; then echo " ✅ Nav2 节点已就绪 ($NODES 个)" + NAV2_OK=1 break fi sleep 3 done +if [ "$NAV2_OK" -eq 0 ]; then + echo " ⚠️ Nav2 节点未完全就绪,继续..." +fi # ---------- 6. 设置精度参数 ---------- -echo "[6/7] 设置导航精度参数 (xy_goal_tolerance=0.05m)..." -source /opt/ros/humble/setup.bash +echo "[6/8] 设置导航精度参数 (xy_goal_tolerance=0.05m)..." +source /opt/ros/humble/setup.bash 2>/dev/null || true cd "$AGV_ROS2_DIR" source install/setup.bash for NODE in /controller_server /bt_navigator /planner_server; do - ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 param set $NODE general_goal_checker.xy_goal_tolerance 0.05 2>/dev/null || true - ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 param set $NODE general_goal_checker.yaw_goal_tolerance 0.05 2>/dev/null || true + ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set $NODE general_goal_checker.xy_goal_tolerance 0.05 2>/dev/null || true + ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set $NODE general_goal_checker.yaw_goal_tolerance 0.05 2>/dev/null || true done -ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 param set /controller_server FollowPath.xy_goal_tolerance 0.05 2>/dev/null || true -ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 param set /controller_server general_goal_checker.stateful True 2>/dev/null || true -ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 param set /controller_server FollowPath.stateful True 2>/dev/null || true -echo " 精度参数已设置" +ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set /controller_server FollowPath.xy_goal_tolerance 0.05 2>/dev/null || true +ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set /controller_server general_goal_checker.stateful True 2>/dev/null || true +ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 1 ros2 param set /controller_server FollowPath.stateful True 2>/dev/null || true +echo " ✅ 精度参数已设置" # ---------- 7. 启动 Flask ---------- -echo "[7/7] 启动 Flask API..." +echo "[7/8] 启动 Flask API..." cd "$AGV_APP_DIR" nohup python3 app.py > /tmp/agv_flask.log 2>&1 & FLASK_PID=$! echo " Flask PID: $FLASK_PID" sleep 4 +# ---------- 8. 最终全面验证 ---------- +echo "" +echo "==========================================" +echo " 系统全面验证" +echo "==========================================" + +# 8a. 验证 ros2 topic list(核心指标) +echo "" +echo "验证 ros2 topic list..." +TOPIC_COUNT=$(source /opt/ros/humble/setup.bash && ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL timeout 5 ros2 topic list 2>/dev/null | wc -l || echo 0) +echo " 话题数量: $TOPIC_COUNT" +if [ "$TOPIC_COUNT" -gt 10 ]; then + echo " ✅ ros2 daemon 正常 (${TOPIC_COUNT} 个话题)" +else + echo " ❌ ros2 topic list 异常 (${TOPIC_COUNT} 个话题,可能 DDS 有问题)" + echo " 手动执行: rm -rf /dev/shm/fastrtps_* && ros2 daemon stop && ros2 daemon start" +fi + +# 8b. 验证关键话题 +echo "" +echo "验证关键话题..." +for TOPIC in /odom /scan /cmd_vel /tf; do + if ROS_DOMAIN_ID=$ROS_DOMAIN_ID_VAL ros2 topic list 2>/dev/null | grep -q "$TOPIC"; then + echo " ✅ $TOPIC" + else + echo " ⚠️ $TOPIC 未找到" + fi +done + +# 8c. 验证进程数量(确保没有重复启动) +echo "" +echo "验证进程数量..." +BRINGUP_PROCS=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node' | grep -v grep | wc -l || echo 0) +echo " AGV 核心进程: $BRINGUP_PROCS (应为 2)" +if [ "$BRINGUP_PROCS" -eq 2 ]; then + echo " ✅ 进程数量正常(无重复)" +elif [ "$BRINGUP_PROCS" -gt 2 ]; then + echo " ⚠️ 发现 $BRINGUP_PROCS 个核心进程(可能有残留),建议重启" +else + echo " ⚠️ 进程数量异常" +fi + +# 8d. FastRTPS 共享内存状态 +echo "" +echo "FastRTPS 共享内存状态:" +FASTRTPS_NEW=$(ls /dev/shm/fastrtps_* 2>/dev/null | wc -l || echo 0) +echo " 当前文件数: $FASTRTPS_NEW (正常运行时会有一些)" + +# 8e. Flask API 测试 +echo "" +echo "验证 Flask API..." +FLASK_RUNNING=$(ps aux | grep "[p]ython3 app.py" | wc -l || echo 0) +if [ "$FLASK_RUNNING" -gt 0 ]; then + echo " ✅ Flask 进程运行中" +else + echo " ❌ Flask 未运行" +fi + # ---------- 完成 ---------- echo "" echo "==========================================" @@ -150,7 +321,8 @@ echo " 进程状态:" for PROC in "bringup:$BRINGUP_PID" "Nav2:$NAV2_PID" "fixer:$FIXER_PID" "Flask:$FLASK_PID"; do NAME="${PROC%%:*}" PID="${PROC##*:}" - echo " $NAME : $(ps aux | grep -w "$PID" | grep -v grep | awk '{print $2}' || echo '已退出')" + STATUS=$(ps aux | grep -w "$PID" | grep -v grep | awk '{print "运行中"}' || echo '已退出') + echo " $NAME : $STATUS" done echo "" echo " 日志文件:" @@ -159,7 +331,7 @@ echo " Nav2 : /tmp/ros2_nav2.log" echo " fixer : /tmp/scan_fixer.log" echo " Flask : /tmp/agv_flask.log" echo "" -echo " 关键验证命令:" -echo " curl http://localhost:5000/api/navigate/status" -echo " ROS_DOMAIN_ID=1 ros2 topic echo /scan_corrected --once" -echo " ROS_DOMAIN_ID=1 ros2 topic echo /amcl_pose --once" \ No newline at end of file +echo " 如果仍有问题,请依次执行:" +echo " 1. ./stop_all.sh" +echo " 2. rm -rf /dev/shm/fastrtps_*" +echo " 3. ./start_all.sh" diff --git a/agv_app/stop_all.sh b/agv_app/stop_all.sh index f07a9c7..612dd33 100644 --- a/agv_app/stop_all.sh +++ b/agv_app/stop_all.sh @@ -1,38 +1,89 @@ #!/bin/bash +# ============================================================ # stop_all.sh - 关闭 AGV 拍摄系统所有相关进程 +# 版本: v2.0 +# 修复: +# - v2.0: 添加 FastRTPS 清理 + ros2 daemon 重置 +# ============================================================ +set -e -echo "正在停止 AGV 系统..." +echo "==========================================" +echo " Robot AGV 全量停止" +echo "==========================================" +echo "" -# 1. 关闭 Flask 主程序 -echo "[1/3] 关闭 Flask 服务..." -pkill -f "python3 app.py" 2>/dev/null +# ---------- 1. 软杀所有相关进程 ---------- +echo "[1/5] 软杀所有相关进程..." +pkill -f "python3 app.py" 2>/dev/null || true +pkill -f "agv_pro_bringup" 2>/dev/null || true +pkill -f "agv_pro_navigation2" 2>/dev/null || true +pkill -f "agv_pro_node" 2>/dev/null || true +pkill -f "lslidar_driver_node" 2>/dev/null || true +pkill -f "component_container" 2>/dev/null || true +pkill -f "fix_scan_timestamp" 2>/dev/null || true +pkill -f "robot_state_publisher" 2>/dev/null || true +pkill -f "start_all.sh" 2>/dev/null || true +sleep 2 + +# ---------- 2. 硬杀确保干净 ---------- +echo "[2/5] 硬杀残留进程..." +pkill -9 -f "app.py" 2>/dev/null || true +pkill -9 -f "agv_pro_node" 2>/dev/null || true +pkill -9 -f "lslidar_driver_node" 2>/dev/null || true +pkill -9 -f "component_container" 2>/dev/null || true +pkill -9 -f "fix_scan_timestamp" 2>/dev/null || true +pkill -9 -f "agv_pro_bringup" 2>/dev/null || true +pkill -9 -f "agv_pro_navigation2" 2>/dev/null || true sleep 1 -# 2. 关闭 ROS2 bringup(杀掉整个 start.sh 及其子进程) -echo "[2/3] 关闭 ROS2 bringup..." -# 用 pkill 杀掉 agv_pro_bringup 相关进程 -pkill -f "agv_pro_bringup" 2>/dev/null -pkill -f "agv_pro_navigation2" 2>/dev/null -# 如果 bringup 是由 start.sh 启动的,杀掉 start.sh 进程树 -pkill -f "start.sh" 2>/dev/null -pkill -f "bringup" 2>/dev/null -sleep 1 - -# 3. 清理 ROS2 daemon(防止残留) -echo "[3/3] 清理 ROS2 daemon..." -source /opt/ros/humble/setup.bash 2>/dev/null -ros2 daemon stop 2>/dev/null || true - -# 确保所有相关进程都已停止 -sleep 1 -remaining=$(ps aux | grep -E "python3 app.py|agv_pro_bringup|agv_pro_navigation2|navigation2" | grep -v grep | wc -l) -if [ "$remaining" -eq 0 ]; then - echo "✅ 所有进程已关闭" +# ---------- 3. 【关键】清理 FastRTPS 共享内存 ---------- +echo "[3/5] 清理 FastRTPS 共享内存..." +FASTRTPS_COUNT=$(ls /dev/shm/fastrtps_* 2>/dev/null | wc -l || echo 0) +if [ "$FASTRTPS_COUNT" -gt 0 ]; then + rm -rf /dev/shm/fastrtps_* + echo " 已清理 $FASTRTPS_COUNT 个 FastRTPS 文件" else - echo "⚠️ 仍有 $remaining 个进程残留,已强制终止" - pkill -9 -f "python3 app.py" 2>/dev/null - pkill -9 -f "agv_pro_bringup" 2>/dev/null - pkill -9 -f "agv_pro_navigation2" 2>/dev/null + echo " 无 FastRTPS 文件残留" fi -echo "停止完成" \ No newline at end of file +# 清理 scan_fixer 锁文件 +rm -f /tmp/scan_fixer.lock +echo " ✅ FastRTPS 清理完成" + +# ---------- 4. 【关键】重置 ros2 daemon ---------- +echo "[4/5] 重置 ros2 daemon..." +pkill -f "ros2-daemon" 2>/dev/null || true +pkill -9 -f "ros2-daemon" 2>/dev/null || true +sleep 2 +source /opt/ros/humble/setup.bash 2>/dev/null || true +ros2 daemon stop 2>/dev/null || true +echo " ✅ ros2 daemon 已重置" + +# ---------- 5. 验证清理结果 ---------- +echo "[5/5] 验证清理结果..." +PROC_COUNT=$(ps aux | grep -E 'agv_pro_node|lslidar_driver_node|component_container|fix_scan_timestamp|app.py|ros2-daemon' | grep -v grep | wc -l || echo 0) +FASTRTPS_LEFT=$(ls /dev/shm/fastrtps_* 2>/dev/null | wc -l || echo 0) + +echo " 残留进程数: $PROC_COUNT" +echo " FastRTPS 文件数: $FASTRTPS_LEFT" + +if [ "$PROC_COUNT" -eq 0 ] && [ "$FASTRTPS_LEFT" -eq 0 ]; then + echo "" + echo "==========================================" + echo " ✅ 停止完成 - 系统已完全清理" + echo "==========================================" +else + echo "" + echo "==========================================" + echo " ⚠️ 停止完成 - 部分残留可能需要手动清理" + echo "==========================================" + echo "" + echo " 手动清理命令(如需要):" + echo " pkill -9 -f 'agv_pro_node|lslidar|component_container'" + echo " pkill -9 -f 'fix_scan_timestamp|app.py'" + echo " pkill -9 -f 'ros2-daemon'" + echo " rm -rf /dev/shm/fastrtps_*" +fi +echo "" +echo " 现在可以安全运行 ./start_all.sh" +echo "" diff --git a/agv_app/utils/agv_controller_ros2.py b/agv_app/utils/agv_controller_ros2.py index b5becfc..20369d9 100644 --- a/agv_app/utils/agv_controller_ros2.py +++ b/agv_app/utils/agv_controller_ros2.py @@ -44,41 +44,23 @@ class AGVController: return -1, "", str(e) def connect(self) -> bool: - """连接 AGV - 检查 ROS2 节点和 topic""" + """连接 AGV - 检查 ROS2 topic 是否可用""" try: - # 检查 agv_pro_node 是否运行 - rc, out, err = self._run_ros2_cmd("ros2 node list") - if rc != 0: - logger.error(f"ROS2 节点列表获取失败: {err}") - return False - - if "/agv_pro_node" not in out: - logger.error("agv_pro_node 未运行") - return False - - # 检查 /odom topic + # 检查 /odom topic 是否存在(比 ros2 node list 更可靠) rc, out, err = self._run_ros2_cmd("ros2 topic list") + if rc != 0: + logger.error(f"ROS2 topic 列表获取失败: {err}") + return False + if "/odom" not in out: logger.error("/odom topic 不存在") return False - # 尝试获取一次位置数据 - rc, out, err = self._run_ros2_cmd( - "timeout 5 ros2 topic echo /odom --once 2>/dev/null", - timeout=6 - ) - - if rc == 0 and out: - self._connected = True - self._ros2_available = True - logger.info("AGV ROS2 连接成功") - return True - else: - # /odom 可能暂时没数据,但节点存在也算连接成功 - self._connected = True - self._ros2_available = True - logger.info("AGV ROS2 连接成功 (节点存在,等待 odom 数据)") - return True + # /odom 存在即表示 AGV 节点在运行 + self._connected = True + self._ros2_available = True + logger.info("AGV ROS2 连接成功 (/odom topic 存在)") + return True except Exception as e: logger.error(f"AGV 连接失败: {e}") @@ -154,13 +136,13 @@ class AGVController: self._publish_cmd_vel(0, 0, 0) def get_position(self) -> Optional[List[float]]: - """获取 AGV 当前位置 [x, y, yaw]""" + """获取 AGV 当前位置 [x, y, yaw](使用 AMCL 全局定位,比 /odom 更准确)""" if not self.is_connected(): return None try: - # 从 /odom topic 获取位置 + # 优先从 /amcl_pose 获取全局定位位置(更准确) rc, out, err = self._run_ros2_cmd( - "timeout 5 ros2 topic echo /odom --once 2>/dev/null", + "timeout 5 ros2 topic echo /amcl_pose --once 2>/dev/null", timeout=6 ) if rc == 0 and out: diff --git a/agv_app/utils/nav2_navigator.py b/agv_app/utils/nav2_navigator.py index 9302cf9..0eb4c5a 100644 --- a/agv_app/utils/nav2_navigator.py +++ b/agv_app/utils/nav2_navigator.py @@ -2,7 +2,7 @@ Nav2 导航模块 - 通过 ROS2 Action Server 控制 AGV 导航 使用 Nav2 的 navigate_to_pose 和 navigate_through_poses action -通过写入临时 shell 脚本再执行,避免 bash -c 单引号转义问题 +通过 stdin pipe 发送 goal YAML,避免 bash $() 替换破坏 YAML 格式 """ import time import math @@ -10,6 +10,7 @@ import logging import threading import subprocess import os +import re from typing import List, Tuple, Optional, Dict from enum import Enum @@ -39,55 +40,12 @@ class Nav2Status(Enum): class Nav2Navigator: """Nav2 导航器 — 通过 ros2 action /navigate_to_pose 与 Nav2 通信""" - NAVIGATE_SCRIPT = "/tmp/nav2_send_goal.sh" - def __init__(self): self.status = Nav2Status.IDLE self._nav_thread: Optional[threading.Thread] = None self._cancel_event = threading.Event() self._result_status = None self._current_pose = [0.0, 0.0, 0.0] - self._build_script() - - def _build_script(self): - """生成 Nav2 action 发送脚本(通过 heredoc 避免所有 bash 转义问题)""" - script = self.NAVIGATE_SCRIPT - content = [ - "#!/bin/bash", - "source /opt/ros/humble/setup.bash", - "source /home/elephant/agv_pro_ros2/install/setup.bash", - "", - "TARGET_X=${1:-0}", - "TARGET_Y=${2:-0}", - "TARGET_YAW=${3:-0}", - "", - "# 计算四元数", - "YAW_QZ=$(python3 -c \"import math; print(math.sin($TARGET_YAW / 2.0))\")", - "YAW_QW=$(python3 -c \"import math; print(math.cos($TARGET_YAW / 2.0))\")", - "", - "# 通过 stdin pipe 发送 goal", - 'cat << \'GOAL\' | ros2 action send_goal /navigate_to_pose nav2_msgs/action/NavigateToPose - --feedback', - "pose:", - " header:", - " stamp:", - " sec: 0", - " nanosec: 0", - " frame_id: map", - " pose:", - " position:", - " x: $TARGET_X", - " y: $TARGET_Y", - " z: 0.0", - " orientation:", - " x: 0.0", - " y: 0.0", - " z: $YAW_QZ", - " w: $YAW_QW", - "GOAL", - ] - with open(script, "w") as f: - f.write("\n".join(content) + "\n") - os.chmod(script, 0o755) def _check_nav2_available(self) -> bool: """检查 Nav2 action server 是否可用(带缓存 10 秒)""" @@ -95,7 +53,6 @@ class Nav2Navigator: if hasattr(self, '_nav2_check_time') and hasattr(self, '_nav2_check_result'): if time_mod.time() - self._nav2_check_time < 10: return self._nav2_check_result - # 写入脚本文件执行,兼容 Flask subprocess 环境 script = ( "#!/bin/bash\n" "source /opt/ros/humble/setup.bash 2>/dev/null\n" @@ -106,24 +63,20 @@ class Nav2Navigator: script_file = "/tmp/nav2_check.sh" with open(script_file, "w") as f: f.write(script) - import os os.chmod(script_file, 0o755) r = subprocess.run([script_file], capture_output=True, text=True, timeout=8) - rc = r.returncode - out = r.stdout - err = r.stderr - result = rc == 0 and ("/navigate_to_pose" in out or "navigate_to_pose" in out) - logger.debug(f"_check_nav2_available: rc={rc}, out={out[:100]}, result={result}") + result = r.returncode == 0 and ("/navigate_to_pose" in r.stdout or "navigate_to_pose" in r.stdout) + logger.debug(f"_check_nav2_available: rc={r.returncode}, result={result}") self._nav2_check_result = result self._nav2_check_time = time_mod.time() return result def _get_current_pose(self) -> List[float]: - """从 /odom 获取当前位置 [x, y, yaw]""" + """从 /amcl_pose 获取当前位置 [x, y, yaw]""" try: rc, out, err = _run_ros2_bash("timeout 3 ros2 topic echo /amcl_pose --once 2>/dev/null", timeout=4) if rc == 0 and out: - import re, yaml + import yaml clean = re.sub(r'\x1b\[[0-9;]*[a-zA-Z]', '', out) yaml_str = re.sub(r'^[\d\-:. ]+\[RTPS[^\]]*\].*\n?', '', clean, flags=re.MULTILINE).strip() yaml_str = yaml_str.split('---')[0] @@ -139,35 +92,22 @@ class Nav2Navigator: self._current_pose = [x, y, yaw] return self._current_pose except Exception as e: - logger.debug(f"获取 odom 失败: {e}") + logger.debug(f"获取 amcl_pose 失败: {e}") return self._current_pose def navigate_to_pose(self, x: float, y: float, yaw: float = None, timeout_sec: float = 120.0, blocking: bool = True) -> bool: - """ - 导航到目标坐标(Nav2 navigate_to_pose action) - - Args: - x, y: 目标位置(世界坐标,米) - yaw: 目标朝向(弧度),None 则保持当前朝向 - timeout_sec: 超时时间(秒) - blocking: 是否阻塞等待导航完成 - - Returns: - blocking=True: 是否成功到达 - blocking=False: 是否成功启动导航 - """ + """导航到目标坐标""" if self.status == Nav2Status.NAVIGATING: logger.warning("导航正在进行中,请先停止当前导航") return False - # 重置状态,允许发起新导航 self.status = Nav2Status.IDLE self._result_status = None if not self._check_nav2_available(): - logger.error("Nav2 action server 不可用,请确保 navigation2_active.launch.py 已启动") + logger.error("Nav2 action server 不可用") return False if yaw is None: @@ -176,12 +116,10 @@ class Nav2Navigator: logger.info(f"发送导航目标: ({x:.3f}, {y:.3f}), yaw={math.degrees(yaw):.1f}°") - # 启动导航线程 self._cancel_event.clear() self._result_status = None self.status = Nav2Status.NAVIGATING - # 停掉旧线程(防止重复调用导致多线程冲突) if self._nav_thread and self._nav_thread.is_alive(): self.stop() self._nav_thread.join(timeout=3) @@ -199,24 +137,32 @@ class Nav2Navigator: return True + def _parse_result_status(self, combined: str) -> Optional[str]: + """从 ros2 action send_goal 输出中解析导航结果状态""" + result_idx = combined.find("Result:") + if result_idx < 0: + return None + # 匹配 "status: STATUS_SUCCEEDED" 或 "status: SUCCEEDED" 等格式 + status_m = re.search(r'status\s*:\s*(\w+)', combined[result_idx:]) + if status_m: + st = status_m.group(1).upper() + if 'SUCCEEDED' in st: + return "succeeded" + elif 'FAILED' in st or 'ABORTED' in st: + return "failed" + elif 'CANCELED' in st or 'CANCELLED' in st: + return "cancelled" + logger.warning(f"未知的导航状态: {st}") + return None + def _nav_thread_func(self, x: float, y: float, yaw: float, timeout_sec: float): - """导航执行线程 — 使用 heredoc 脚本避免 bash 转义问题""" + """导航执行线程 — 使用 stdin pipe 发送 goal YAML""" try: logger.info(f"Nav2 线程启动: x={x}, y={y}, yaw={yaw}, timeout={timeout_sec}s") start_time = time.time() - elapsed = 0.0 - check_interval = 1.0 - result_received = False - - # 使用 pipe 方式运行导航 - # 通过 subprocess.Popen + stdin heredoc 避免所有引号转义问题 - setup = "source /opt/ros/humble/setup.bash && source /home/elephant/agv_pro_ros2/install/setup.bash && export ROS_DOMAIN_ID=1" qz = math.sin(yaw / 2.0) qw = math.cos(yaw / 2.0) - # 构建 goal YAML(用 bash heredoc 方式避免转义问题) - # 写入临时文件 - import tempfile goal_yaml = ( f"pose:\n" f" header:\n" @@ -234,23 +180,27 @@ class Nav2Navigator: f" y: 0.0\n" f" z: {qz}\n" f" w: {qw}\n" - f"behavior_tree: ''\n" ) - goal_file = "/tmp/nav2_goal_{}.yaml".format(os.getpid()) + # 写入临时文件,用 $(cat file) 方式传递(stdin pipe "-" 在 Humble 有 bug) + import tempfile + goal_file = "/tmp/nav2_goal_{}_{}.yaml".format(os.getpid(), int(time.time())) with open(goal_file, "w") as f: f.write(goal_yaml) - cmd = ( - f'bash -l -c \'' - f'source /opt/ros/humble/setup.bash && ' - f'source /home/elephant/agv_pro_ros2/install/setup.bash && ' - f'ros2 action send_goal /navigate_to_pose nav2_msgs/action/NavigateToPose ' - f'"$(cat {goal_file})" --feedback\'' - ) + # 写一个临时脚本执行导航(避免引号嵌套地狱) + script_file = "/tmp/nav2_navigate_{}_{}.sh".format(os.getpid(), int(time.time())) + with open(script_file, "w") as f: + f.write('#!/bin/bash\n') + f.write('source /opt/ros/humble/setup.bash\n') + f.write('source /home/elephant/agv_pro_ros2/install/setup.bash\n') + f.write('export ROS_DOMAIN_ID=1\n') + f.write('ros2 action send_goal /navigate_to_pose nav2_msgs/action/NavigateToPose "$(cat {})" --feedback\n'.format(goal_file)) + os.chmod(script_file, 0o755) - process = subprocess.Popen( - cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + proc = subprocess.Popen( + ['bash', '-l', script_file], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) stdout_lines = [] @@ -263,155 +213,72 @@ class Nav2Navigator: break lines.append(line) - # 写入 goal(通过 bash heredoc) - # 写入 goal(通过 bash 脚本方式) - script_content = ( - f'#!/bin/bash\n' - f'source /opt/ros/humble/setup.bash\n' - f'source /home/elephant/agv_pro_ros2/install/setup.bash\n' - f'GOAL=$(cat {goal_file})\n' - f'ros2 action send_goal /navigate_to_pose nav2_msgs/action/NavigateToPose "$GOAL" --feedback\n' - ) - script_file = "/tmp/nav2_action.sh" - with open(script_file, "w") as sf: - sf.write(script_content) - os.chmod(script_file, 0o755) + t_out = threading.Thread(target=reader, args=(proc.stdout, stdout_lines)) + t_err = threading.Thread(target=reader, args=(proc.stderr, stderr_lines)) + t_out.start() + t_err.start() - out_thread = threading.Thread(target=reader, args=(process.stdout, stdout_lines)) - err_thread = threading.Thread(target=reader, args=(process.stderr, stderr_lines)) - out_thread.start() - err_thread.start() - - # 用 subprocess.Popen([script_path]) 替代 stdin - stop_reading.set() - if process.poll() is None: - process.terminate() - process.wait(timeout=3) - - act_proc = subprocess.Popen( - [script_file], - stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True - ) - - stdout_lines2 = [] - stderr_lines2 = [] - stop_reading2 = threading.Event() - - def reader2(pipe, lines): - for line in iter(pipe.readline, ''): - if stop_reading2.is_set(): - break - lines.append(line) - - t_out2 = threading.Thread(target=reader2, args=(act_proc.stdout, stdout_lines2)) - t_err2 = threading.Thread(target=reader2, args=(act_proc.stderr, stderr_lines2)) - t_out2.start() - t_err2.start() - - # 轮询等待 action 完成(每 2s 检查一次结果) + # 轮询等待 action 完成 result_received = False - for _ in range(60): # 最多等 120 秒 + max_checks = int(timeout_sec / 2) + for _ in range(max_checks): time.sleep(2) elapsed = time.time() - start_time - # 检查是否有结果 - full_out = "".join(stdout_lines2) - full_err = "".join(stderr_lines2) - combined = full_out + full_err - - result_idx = combined.find("Result:") - if result_idx >= 0: - import re - status_m = re.search(r'status[\s:]+(\w+)', combined[result_idx:]) - if status_m: - st = status_m.group(1).lower() - if st in ('succeeded', 'SUCCEEDED'): - logger.info("✅ Nav2 导航成功到达目标") - self._result_status = "succeeded" - result_received = True - break - elif st in ('failed', 'FAILED', 'aborted'): - logger.warning(f"⚠️ Nav2 导航失败: status={st}") - self._result_status = "failed" - result_received = True - break - elif st in ('canceled', 'cancelled'): - logger.info("Nav2 导航被取消") - self._result_status = "cancelled" - result_received = True - break - - # 检查进程是否已结束但无结果 - if act_proc.poll() is not None and not result_received: - self._result_status = "failed" - break - - if self._cancel_event.is_set(): - break - - stop_reading2.set() - if act_proc.poll() is None: - act_proc.terminate() - try: - act_proc.wait(timeout=3) - except subprocess.TimeoutExpired: - act_proc.kill() - t_out2.join(timeout=2) - t_err2.join(timeout=2) - stdout_lines = stdout_lines2 - stderr_lines = stderr_lines2 - - # 轮询检查结果 - while not self._cancel_event.is_set() and elapsed < timeout_sec: - time.sleep(check_interval) - elapsed = time.time() - start_time - full_out = "".join(stdout_lines) full_err = "".join(stderr_lines) combined = full_out + full_err - # 查找 Result 节中的 status 字段 - import re - result_idx = combined.find("Result:") - if result_idx >= 0: - status_m = re.search(r'status[\s:]+(\w+)', combined[result_idx:]) - if status_m: - st = status_m.group(1).lower() - if st in ('succeeded', 'SUCCEEDED'): - logger.info("✅ Nav2 导航成功到达目标") - self._result_status = "succeeded" - result_received = True - break - elif st in ('failed', 'FAILED', 'aborted'): - logger.warning(f"⚠️ Nav2 导航失败: status={st}") - self._result_status = "failed" - result_received = True - break - elif st in ('canceled', 'cancelled'): - logger.info("Nav2 导航被取消") - self._result_status = "cancelled" - result_received = True - break - - if not result_received: + # 检查是否有 YAML 解析错误 + if "Traceback" in combined and "Goal accepted" not in combined: + logger.error(f"ros2 action send_goal YAML 解析失败: {combined[:500]}") self._result_status = "failed" + result_received = True break - # 善后 + # 检查是否未被接受 + if "Goal accepted" not in combined and elapsed > 15: + logger.warning(f"导航目标 15 秒内未被接受,输出: {combined[:300]}") + + # 解析 Result + parsed = self._parse_result_status(combined) + if parsed: + self._result_status = parsed + result_received = True + if parsed == "succeeded": + logger.info(f"✅ Nav2 导航成功到达目标 ({x:.3f}, {y:.3f})") + elif parsed == "failed": + logger.warning(f"⚠️ Nav2 导航失败") + else: + logger.info("Nav2 导航被取消") + break + + # 进程已结束但无 Result + if proc.poll() is not None and not result_received: + logger.error(f"导航进程提前退出,rc={proc.returncode},输出: {combined[:300]}") + self._result_status = "failed" + result_received = True + break + + if self._cancel_event.is_set(): + break + + # 清理 stop_reading.set() - if act_proc.poll() is None: - act_proc.terminate() + if proc.poll() is None: + proc.terminate() try: - act_proc.wait(timeout=3) + proc.wait(timeout=3) except subprocess.TimeoutExpired: - act_proc.kill() - - out_thread.join(timeout=2) - err_thread.join(timeout=2) + proc.kill() + t_out.join(timeout=2) + t_err.join(timeout=2) + # 处理取消 if self._cancel_event.is_set() and not result_received: logger.info("取消 Nav2 导航...") - cancel_cmd = f'bash -l -c \'{setup} && ros2 action cancel /navigate_to_pose 2>/dev/null || ros2 action cancel /navigate_through_poses 2>/dev/null || true\'' + setup = "source /opt/ros/humble/setup.bash && source /home/elephant/agv_pro_ros2/install/setup.bash && export ROS_DOMAIN_ID=1" + cancel_cmd = f'bash -l -c \'{setup} && ros2 action cancel /navigate_to_pose 2>/dev/null || true\'' subprocess.run(cancel_cmd, shell=True, timeout=3) self._result_status = "cancelled" @@ -429,7 +296,7 @@ class Nav2Navigator: def navigate_through_poses(self, poses: List[Tuple[float, float, float]], timeout_per_pose: float = 120.0, blocking: bool = True) -> bool: - """导航通过多个路径点(Nav2 navigate_through_poses action)""" + """导航通过多个路径点""" if self.status == Nav2Status.NAVIGATING: logger.warning("导航正在进行中,请先停止") return False @@ -461,7 +328,6 @@ class Nav2Navigator: try: setup = "source /opt/ros/humble/setup.bash && source /home/elephant/agv_pro_ros2/install/setup.bash && export ROS_DOMAIN_ID=1" - # 构建 poses heredoc poses_lines = [] for i, (x, y, yaw) in enumerate(poses): qz = math.sin(yaw / 2.0) @@ -493,6 +359,7 @@ class Nav2Navigator: ) stdout_lines = [] + stderr_lines = [] stop_reading = threading.Event() def reader(pipe, lines): @@ -502,14 +369,16 @@ class Nav2Navigator: lines.append(line) out_thread = threading.Thread(target=reader, args=(process.stdout, stdout_lines)) + err_thread = threading.Thread(target=reader, args=(process.stderr, stderr_lines)) out_thread.start() + err_thread.start() try: process.stdin.write(poses_yaml + "\n") process.stdin.flush() process.stdin.close() except Exception as e: - logger.error(f"写入 poses heredoc 失败: {e}") + logger.error(f"写入 poses YAML 失败: {e}") self.status = Nav2Status.FAILED return @@ -520,29 +389,17 @@ class Nav2Navigator: full_err = "".join(stderr_lines) combined = full_out + full_err - result_idx = combined.find("Result:") - if result_idx >= 0: - import re - status_m = re.search(r'status[\s:]+(\w+)', combined[result_idx:]) - if status_m: - st = status_m.group(1).lower() - if st in ('succeeded', 'SUCCEEDED'): - logger.info(f"✅ Nav2 路径点导航成功完成 {len(poses)} 个点") - self._result_status = "succeeded" - result_received = True - break - elif st in ('failed', 'FAILED', 'aborted'): - logger.warning(f"⚠️ Nav2 路径点导航失败") - self._result_status = "failed" - result_received = True - break - elif st in ('canceled', 'cancelled'): - self._result_status = "cancelled" - result_received = True - break + parsed = self._parse_result_status(combined) + if parsed: + self._result_status = parsed + result_received = True + if parsed == "succeeded": + logger.info(f"✅ Nav2 路径点导航成功完成 {len(poses)} 个点") + break if process.poll() is not None and not result_received: self._result_status = "failed" + result_received = True break stop_reading.set() @@ -550,6 +407,7 @@ class Nav2Navigator: process.terminate() process.wait(timeout=3) out_thread.join(timeout=2) + err_thread.join(timeout=2) if self._result_status == "succeeded": self.status = Nav2Status.SUCCEEDED @@ -583,4 +441,4 @@ class Nav2Navigator: def get_current_position(self) -> List[float]: """获取当前位置 [x, y, yaw]""" - return self._get_current_pose() \ No newline at end of file + return self._get_current_pose()