RobotecAI · jmatejcz · Mar 27, 2025 · Mar 28, 2025 · Mar 28, 2025 · Mar 30, 2025
diff --git a/src/rai_bench/rai_bench/examples/tool_calling_agent_bench_navigation_tasks.py b/src/rai_bench/rai_bench/examples/tool_calling_agent_bench_navigation_tasks.py
@@ -0,0 +1,28 @@
+# Copyright (C) 2025 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Sequence
+
+from rai_bench.tool_calling_agent_bench.agent_tasks_interfaces import (
+    ToolCallingAgentTask,
+)
+
+# from rai_bench.tool_calling_agent_bench.ros2_agent_tasks import (
+#     NavigateToPointTask,
+# )
+
+# tasks: Sequence[ToolCallingAgentTask] = [
+#     NavigateToPointTask(),
+#     #  SpinAroundTask()
+# ]
diff --git a/src/rai_bench/rai_bench/examples/tool_calling_agent_bench_tasks.py b/src/rai_bench/rai_bench/examples/tool_calling_agent_bench_tasks.py
@@ -18,85 +18,126 @@
     ToolCallingAgentTask,
 )
 from rai_bench.tool_calling_agent_bench.ros2_agent_tasks import (
-    GetAllROS2RGBCamerasTask,
-    GetObjectPositionsTask,
-    GetROS2DepthCameraTask,
-    GetROS2MessageTask,
-    GetROS2RGBCameraTask,
-    GetROS2TopicsTask,
-    GetROS2TopicsTask2,
-    GrabExistingObjectTask,
-    GrabNotExistingObjectTask,
-    MoveExistingObjectFrontTask,
-    MoveExistingObjectLeftTask,
-    MoveToPointTask,
-    SwapObjectsTask,
+    PublishROS2HRIMessageTask3ExtraCalls,
+    PublishROS2HRIMessageTask0ExtraCalls,
+    PublishROS2HRIMessageTask1ExtraCall,
+    PublishROS2AudioMessageTask0ExtraCalls,
+    PublishROS2AudioMessageTask3ExtraCalls,
+    PublishROS2AudioMessageTask1ExtraCall,
+    PublishROS2DetectionArrayTask3ExtraCalls,
+    PublishROS2DetectionArrayTask1ExtraCall,
+    PublishROS2DetectionArrayTask0ExtraCalls,
+    CallROS2ManipulatorMoveToServiceTask3ExtraCalls,
+    CallROS2ManipulatorMoveToServiceTask1ExtraCall,
+    CallROS2ManipulatorMoveToServiceTask0ExtraCalls,
+    CallGroundedSAMSegmentTask3ExtraCalls,
+    CallGroundedSAMSegmentTask1ExtraCall,
+    CallGroundedSAMSegmentTask0ExtraCalls,
+    CallGroundingDinoClassifyTask3ExtraCalls,
+    CallGroundingDinoClassifyTask1ExtraCall,
+    CallGroundingDinoClassifyTask0ExtraCalls,
+    CallGetLogDigestTask3ExtraCalls,
+    CallGetLogDigestTask1ExtraCall,
+    CallGetLogDigestTask0ExtraCalls,
+    CallVectorStoreRetrievalTask3ExtraCalls,
+    CallVectorStoreRetrievalTask1ExtraCall,
+    CallVectorStoreRetrievalTask0ExtraCalls,
+    CallWhatISeeTask3ExtraCalls,
+    CallWhatISeeTask1ExtraCall,
+    CallWhatISeeTask0ExtraCalls,
 )
 
 tasks: Sequence[ToolCallingAgentTask] = [
-    GetROS2RGBCameraTask(),
-    GetROS2TopicsTask(),
-    GetROS2DepthCameraTask(),
-    GetAllROS2RGBCamerasTask(),
-    GetROS2TopicsTask2(),
-    GetROS2MessageTask(),
-    MoveToPointTask(args={"x": 1.0, "y": 2.0, "z": 3.0, "task": "grab"}),
-    MoveToPointTask(args={"x": 1.2, "y": 2.3, "z": 3.4, "task": "drop"}),
-    GetObjectPositionsTask(
-        objects={
-            "carrot": [{"x": 1.0, "y": 2.0, "z": 3.0}],
-            "apple": [{"x": 4.0, "y": 5.0, "z": 6.0}],
-            "banana": [
-                {"x": 7.0, "y": 8.0, "z": 9.0},
-                {"x": 10.0, "y": 11.0, "z": 12.0},
-            ],
-        },
-    ),
-    GrabExistingObjectTask(
-        object_to_grab="banana",
-        objects={
-            "banana": [{"x": 7.0, "y": 8.0, "z": 9.0}],
-            "apple": [
-                {"x": 4.0, "y": 5.0, "z": 6.0},
-                {"x": 10.0, "y": 11.0, "z": 12.0},
-            ],
-        },
-    ),
-    GrabNotExistingObjectTask(
-        object_to_grab="apple",
-        objects={
-            "banana": [{"x": 7.0, "y": 8.0, "z": 9.0}],
-            "cube": [
-                {"x": 4.0, "y": 5.0, "z": 6.0},
-                {"x": 10.0, "y": 11.0, "z": 12.0},
-            ],
-        },
-    ),
-    MoveExistingObjectLeftTask(
-        object_to_grab="banana",
-        objects={
-            "banana": [{"x": 7.0, "y": 8.0, "z": 9.0}],
-            "apple": [
-                {"x": 4.0, "y": 5.0, "z": 6.0},
-                {"x": 10.0, "y": 11.0, "z": 12.0},
-            ],
-        },
-    ),
-    MoveExistingObjectFrontTask(
-        object_to_grab="banana",
-        objects={
-            "banana": [{"x": 7.0, "y": 8.0, "z": 9.0}],
-            "apple": [
-                {"x": 4.0, "y": 5.0, "z": 6.0},
-                {"x": 10.0, "y": 11.0, "z": 12.0},
-            ],
-        },
-    ),
-    SwapObjectsTask(
-        objects={
-            "banana": [{"x": 1.0, "y": 2.0, "z": 3.0}],
-            "apple": [{"x": 4.0, "y": 5.0, "z": 6.0}],
-        },
-        objects_to_swap=["banana", "apple"],
-    ),
+    PublishROS2HRIMessageTask3ExtraCalls(),
+    PublishROS2HRIMessageTask1ExtraCall(),
+    PublishROS2HRIMessageTask0ExtraCalls(),
+    PublishROS2AudioMessageTask3ExtraCalls(),
+    PublishROS2AudioMessageTask1ExtraCall(),
+    PublishROS2AudioMessageTask0ExtraCalls(),
+    PublishROS2DetectionArrayTask3ExtraCalls(),
+    PublishROS2DetectionArrayTask1ExtraCall(),
+    PublishROS2DetectionArrayTask0ExtraCalls(),
+    CallROS2ManipulatorMoveToServiceTask3ExtraCalls(),
+    CallROS2ManipulatorMoveToServiceTask1ExtraCall(),
+    CallROS2ManipulatorMoveToServiceTask0ExtraCalls(),
+    CallGroundedSAMSegmentTask3ExtraCalls(),
+    CallGroundedSAMSegmentTask1ExtraCall(),
+    CallGroundedSAMSegmentTask0ExtraCalls(),
+    CallGroundingDinoClassifyTask3ExtraCalls(),
+    CallGroundingDinoClassifyTask1ExtraCall(),
+    CallGroundingDinoClassifyTask0ExtraCalls(),
+    CallGetLogDigestTask3ExtraCalls(),
+    CallGetLogDigestTask1ExtraCall(),
+    CallGetLogDigestTask0ExtraCalls(),
+    CallVectorStoreRetrievalTask3ExtraCalls(),
+    CallVectorStoreRetrievalTask1ExtraCall(),
+    CallVectorStoreRetrievalTask0ExtraCalls(),
+    CallWhatISeeTask3ExtraCalls(),
+    CallWhatISeeTask1ExtraCall(),
+    CallWhatISeeTask0ExtraCalls(),
+    # GetROS2RGBCameraTask(),
+    # GetROS2TopicsTask(),
+    # GetROS2DepthCameraTask(),
+    # GetAllROS2RGBCamerasTask(),
+    # GetROS2TopicsTask2(),
+    # GetROS2MessageTask(),
+    # MoveToPointTask(args={"x": 1.0, "y": 2.0, "z": 3.0, "task": "grab"}),
+    # MoveToPointTask(args={"x": 1.2, "y": 2.3, "z": 3.4, "task": "drop"}),
+    # GetObjectPositionsTask(
+    #     objects={
+    #         "carrot": [{"x": 1.0, "y": 2.0, "z": 3.0}],
+    #         "apple": [{"x": 4.0, "y": 5.0, "z": 6.0}],
+    #         "banana": [
+    #             {"x": 7.0, "y": 8.0, "z": 9.0},
+    #             {"x": 10.0, "y": 11.0, "z": 12.0},
+    #         ],
+    #     },
+    # ),
+    # GrabExistingObjectTask(
+    #     object_to_grab="banana",
+    #     objects={
+    #         "banana": [{"x": 7.0, "y": 8.0, "z": 9.0}],
+    #         "apple": [
+    #             {"x": 4.0, "y": 5.0, "z": 6.0},
+    #             {"x": 10.0, "y": 11.0, "z": 12.0},
+    #         ],
+    #     },
+    # ),
+    # GrabNotExistingObjectTask(
+    #     object_to_grab="apple",
+    #     objects={
+    #         "banana": [{"x": 7.0, "y": 8.0, "z": 9.0}],
+    #         "cube": [
+    #             {"x": 4.0, "y": 5.0, "z": 6.0},
+    #             {"x": 10.0, "y": 11.0, "z": 12.0},
+    #         ],
+    #     },
+    # ),
+    # MoveExistingObjectLeftTask(
+    #     object_to_grab="banana",
+    #     objects={
+    #         "banana": [{"x": 7.0, "y": 8.0, "z": 9.0}],
+    #         "apple": [
+    #             {"x": 4.0, "y": 5.0, "z": 6.0},
+    #             {"x": 10.0, "y": 11.0, "z": 12.0},
+    #         ],
+    #     },
+    # ),
+    # MoveExistingObjectFrontTask(
+    #     object_to_grab="banana",
+    #     objects={
+    #         "banana": [{"x": 7.0, "y": 8.0, "z": 9.0}],
+    #         "apple": [
+    #             {"x": 4.0, "y": 5.0, "z": 6.0},
+    #             {"x": 10.0, "y": 11.0, "z": 12.0},
+    #         ],
+    #     },
+    # ),
+    # SwapObjectsTask(
+    #     objects={
+    #         "banana": [{"x": 1.0, "y": 2.0, "z": 3.0}],
+    #         "apple": [{"x": 4.0, "y": 5.0, "z": 6.0}],
+    #     },
+    #     objects_to_swap=["banana", "apple"],
+    # ),
 ]
diff --git a/src/rai_bench/rai_bench/examples/tool_calling_agent_test_bench.py b/src/rai_bench/rai_bench/examples/tool_calling_agent_test_bench.py
@@ -23,6 +23,8 @@
 )
 
 from rai_bench.examples.tool_calling_agent_bench_tasks import tasks
+
+# from rai_bench.examples.tool_calling_agent_bench_navigation_tasks import tasks
 from rai_bench.tool_calling_agent_bench.agent_bench import ToolCallingAgentBenchmark
 
 if __name__ == "__main__":
@@ -60,7 +62,7 @@
         tasks=tasks, logger=bench_logger, results_filename=results_filename
     )
 
-    model_type = "simple_model"
+    model_type = "complex_model"
     model_config = get_llm_model_config_and_vendor(model_type=model_type)[0]
     model_name = getattr(model_config, model_type)
 

diff --git a/src/rai_bench/rai_bench/tool_calling_agent_bench/actions/__init__.py b/src/rai_bench/rai_bench/tool_calling_agent_bench/actions/__init__.py
@@ -0,0 +1,23 @@
+# Copyright (C) 2025 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .action_base_model import ActionBaseModel
+from .navigate_to_pose import NavigateToPoseAction
+from .spin import SpinAction
+
+__all__ = [
+    "ActionBaseModel",
+    "NavigateToPoseAction",
+    "SpinAction",
+]
diff --git a/src/rai_bench/rai_bench/tool_calling_agent_bench/actions/action_base_model.py b/src/rai_bench/rai_bench/tool_calling_agent_bench/actions/action_base_model.py
@@ -0,0 +1,25 @@
+# Copyright (C) 2025 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+from pydantic import BaseModel
+
+
+class ActionBaseModel(BaseModel):
+    action_name: str
+    action_type: str
+    goal: Any
+    result: Any
+    feedback: Any
diff --git a/src/rai_bench/rai_bench/tool_calling_agent_bench/actions/navigate_to_pose.py b/src/rai_bench/rai_bench/tool_calling_agent_bench/actions/navigate_to_pose.py
@@ -0,0 +1,80 @@
+# Copyright (C) 2025 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional
+
+from pydantic import BaseModel
+
+from rai_bench.tool_calling_agent_bench.actions.action_base_model import ActionBaseModel
+
+
+class Time(BaseModel):
+    sec: Optional[int] = 0
+    nanosec: Optional[int] = 0
+
+
+class Header(BaseModel):
+    stamp: Optional[Time] = Time()
+    frame_id: str
+
+
+class Position(BaseModel):
+    x: float
+    y: float
+    z: float
+
+
+class Orientation(BaseModel):
+    x: Optional[float] = 0.0
+    y: Optional[float] = 0.0
+    z: Optional[float] = 0.0
+    w: Optional[float] = 1.0
+
+
+class Pose(BaseModel):
+    position: Position
+    orientation: Optional[Orientation] = Orientation()
+
+
+class PoseStamped(BaseModel):
+    header: Header
+    pose: Pose
+
+
+class Goal(BaseModel):
+    pose: PoseStamped
+    behavior_tree: Optional[str] = ""
+
+
+class Result(BaseModel):
+    result: dict
+
+
+class Feedback(BaseModel):
+    current_pose: PoseStamped
+    navigation_time: Time
+    estimated_time_remaining: Time
+    number_of_recoveries: int
+    distance_remaining: float
+
+
+class NavigateToPoseAction(ActionBaseModel):
+    action_name: str = "/navigate_to_pose"
+    action_type: str = "nav2_msgs/action/NavigateToPose"
+    goal: Goal
+    result: Result
+    feedback: Feedback
+
+
+# TODO (mkotynia): create init for actions