BAAI
/

tanhuajie2001's picture
Upload folder using huggingface_hub
f5409c6 verified
[
{
"id": 1982,
"image": "images/1982_testset_0000394.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"throw the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[161, 256, 183, 281]"
}
]
},
{
"id": 1091,
"image": "images/1091_motorcycle_000923.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the motorcycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[456, 231, 510, 277]"
}
]
},
{
"id": 6498,
"image": "images/6498_thu_cut_fruit_RGB_cut_fruit_cwx_2_22.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[512, 196, 560, 252]"
}
]
},
{
"id": 24,
"image": "images/24_6.png",
"conversations": [
{
"from": "human",
"value": "<image>\nYou are a robot using the joint control. The task is \"take the basket\". Please predict a possible affordance area of the end effector?"
},
{
"from": "gpt",
"value": "[817, 270, 1095, 436]"
}
]
},
{
"id": 2420,
"image": "images/2420_ego4d_7aca7552-56d2-4ac0-be30-aed9c5dfc8ea_9000.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the couch\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[813, 442, 930, 558]"
}
]
},
{
"id": 1045,
"image": "images/1045_bench_002635.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bench\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[321, 169, 508, 224]"
}
]
},
{
"id": 6738,
"image": "images/6738_cup_002554.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"pour the cup\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[224, 293, 277, 356]"
}
]
},
{
"id": 5909,
"image": "images/5909_bicycle_001556.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[117, 113, 227, 192]"
}
]
},
{
"id": 7259,
"image": "images/7259_golf_clubs_001094.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"swing the golf_clubs\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[358, 117, 424, 229]"
}
]
},
{
"id": 6523,
"image": "images/6523_soccer_ball_002817.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"kick the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[231, 117, 378, 275]"
}
]
},
{
"id": 3428,
"image": "images/3428_testset_0001985.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"ride the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[68, 36, 100, 62]"
}
]
},
{
"id": 7249,
"image": "images/7249_banana_000253.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"peel the banana\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[216, 119, 387, 396]"
}
]
},
{
"id": 6568,
"image": "images/6568_testset_0000092.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"kick the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[120, 189, 148, 217]"
}
]
},
{
"id": 5999,
"image": "images/5999_ego4d_6b889d80-1c10-413c-9532-fbd629305a43_36300.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"type_on the laptop\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[1507, 115, 1918, 315]"
}
]
},
{
"id": 2950,
"image": "images/2950_bicycle_000661.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"push the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[483, 0, 533, 24]"
}
]
},
{
"id": 2050,
"image": "images/2050_baseball_002670.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"throw the baseball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[307, 166, 364, 222]"
}
]
},
{
"id": 3501,
"image": "images/3501_testset_0000898.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"ride the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[409, 261, 459, 319]"
}
]
},
{
"id": 5111,
"image": "images/5111_youtube_Making a T-Bone Steak POV First Person Cooking-znuEJcLMSjw-1080p-1644527668544_003600.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"wash the bowl\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[817, 585, 1186, 1005]"
}
]
},
{
"id": 4416,
"image": "images/4416_testset_0000636.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the skateboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[325, 370, 466, 444]"
}
]
},
{
"id": 5277,
"image": "images/5277_epic_P23_02_frame_0000071911.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"stir the bowl\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[222, 96, 293, 163]"
}
]
},
{
"id": 5348,
"image": "images/5348_ego4d_8af2ed7d-4435-40ce-b3cc-685f28bfcb2d_37500.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"take_photo the cell_phone\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[1182, 355, 1372, 505]"
}
]
},
{
"id": 2722,
"image": "images/2722_testset_0001597.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the surfboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[34, 222, 82, 342]"
}
]
},
{
"id": 4035,
"image": "images/4035_testset_0001460.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the suitcase\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[504, 370, 576, 417]"
}
]
},
{
"id": 2999,
"image": "images/2999_thu_cut_fruit_RGB_cut_fruit_tys_3_99.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[355, 60, 408, 113]"
}
]
},
{
"id": 1217,
"image": "images/1217_testset_0001818.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the chair\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[582, 487, 647, 537]"
}
]
},
{
"id": 6395,
"image": "images/6395_knife_001124.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[412, 62, 577, 189]"
}
]
},
{
"id": 4496,
"image": "images/4496_frisbee_002270.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the frisbee\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[58, 73, 925, 931]"
}
]
},
{
"id": 3348,
"image": "images/3348_motorcycle_002093.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"ride the motorcycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[286, 134, 337, 184]"
}
]
},
{
"id": 5290,
"image": "images/5290_ego4d_de8ea8a7-6bdb-4679-9b17-9dc6a79e45e7_4800.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"take_photo the cell_phone\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[889, 424, 1018, 560]"
}
]
},
{
"id": 6667,
"image": "images/6667_bicycle_002350.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"ride the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[389, 72, 455, 120]"
}
]
},
{
"id": 7248,
"image": "images/7248_banana_000480.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"peel the banana\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[289, 112, 359, 296]"
}
]
},
{
"id": 4655,
"image": "images/4655_youtube_how to cook PASTA RISOTTATA UNDER 30 MINUTE PASTA MADE LIKE RISOTTO JONAH IN THE KITCHEN-QHjXmwMg3MY-1080p-1644528336886 (1)_012420.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"open the bottle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[760, 284, 885, 409]"
}
]
},
{
"id": 4731,
"image": "images/4731_thu_read_book_RGB_read_book_cwx_3_86.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"open the book\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[275, 255, 526, 348]"
}
]
},
{
"id": 3375,
"image": "images/3375_testset_0001667.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"ride the motorbike\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[493, 181, 548, 227]"
}
]
},
{
"id": 3059,
"image": "images/3059_knife_000029.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[407, 149, 456, 204]"
}
]
},
{
"id": 5885,
"image": "images/5885_testset_0001139.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[437, 235, 481, 275]"
}
]
},
{
"id": 1000,
"image": "images/1000_bench_001222.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bench\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[316, 286, 394, 330]"
}
]
},
{
"id": 1705,
"image": "images/1705_skateboard_002490.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"carry the skateboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[279, 191, 350, 248]"
}
]
},
{
"id": 7081,
"image": "images/7081_thu_stir_RGB_stir_lhh_3_60.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sip the cup\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[289, 132, 378, 211]"
}
]
},
{
"id": 647,
"image": "images/647_thu_stir_RGB_stir_lhh_3_60.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"drink_with the cup\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[289, 132, 378, 211]"
}
]
},
{
"id": 5360,
"image": "images/5360_ego4d_f5b87541-3879-444f-be65-2b17f1d95735_24450.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"take_photo the cell_phone\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[1547, 200, 1701, 413]"
}
]
},
{
"id": 5,
"image": "images/5_17.png",
"conversations": [
{
"from": "human",
"value": "<image>\nYou are a robot using the joint control. The task is \"take the basket\". Please predict a possible affordance area of the end effector?"
},
{
"from": "gpt",
"value": "[441, 960, 679, 1055]"
}
]
},
{
"id": 3682,
"image": "images/3682_ego4d_9b8e8dc6-4069-4ef4-b8b2-edeb76894d53_17250.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"pour the bottle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[992, 0, 1157, 158]"
}
]
},
{
"id": 2483,
"image": "images/2483_testset_0000557.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the bench\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[267, 276, 509, 322]"
}
]
},
{
"id": 5378,
"image": "images/5378_camera_000542.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"take_photo the camera\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[252, 284, 492, 425]"
}
]
},
{
"id": 4480,
"image": "images/4480_axe_003531.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the axe\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[341, 136, 803, 508]"
}
]
},
{
"id": 432,
"image": "images/432_testset_0000327.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"catch the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[164, 98, 198, 129]"
}
]
},
{
"id": 5440,
"image": "images/5440_testset_0000972.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"pick_up the suitcase\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[116, 68, 159, 94]"
}
]
},
{
"id": 0,
"image": "images/0_8.png",
"conversations": [
{
"from": "human",
"value": "<image>\nYou are a robot using the joint control. The task is \"take the basket\". Please predict a possible affordance area of the end effector?"
},
{
"from": "gpt",
"value": "[1158, 344, 1382, 452]"
}
]
},
{
"id": 200,
"image": "images/200_punching_bag_002699.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"boxing the punching_bag\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[141, 173, 284, 425]"
}
]
},
{
"id": 6410,
"image": "images/6410_knife_003240.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[334, 0, 428, 116]"
}
]
},
{
"id": 5567,
"image": "images/5567_banana_002174.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"peel the banana\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[49, 67, 209, 245]"
}
]
},
{
"id": 4535,
"image": "images/4535_testset_0002282.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the frisbee\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[267, 45, 366, 107]"
}
]
},
{
"id": 3242,
"image": "images/3242_testset_0001182.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"kick the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[250, 171, 277, 200]"
}
]
},
{
"id": 6210,
"image": "images/6210_bed_002956.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the bed\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[234, 207, 496, 252]"
}
]
},
{
"id": 4004,
"image": "images/4004_ego4d_8a280483-bbfd-4f24-9654-bf6a008e406d_25050.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the scissors\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[1188, 262, 1457, 527]"
}
]
},
{
"id": 4217,
"image": "images/4217_epic_P02_122_frame_0000010441.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the bowl\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[373, 79, 409, 119]"
}
]
},
{
"id": 2205,
"image": "images/2205_testset_0001880.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"throw the frisbee\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[152, 86, 230, 136]"
}
]
},
{
"id": 4052,
"image": "images/4052_testset_0001986.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the suitcase\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[153, 24, 243, 76]"
}
]
},
{
"id": 6386,
"image": "images/6386_youtube_Making Japanese Katsudon POV First Person Cooking-YYz5a6wS2oE-1080p-1644527980851_006060.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[917, 481, 1075, 608]"
}
]
},
{
"id": 6127,
"image": "images/6127_youtube_Good_012720.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"stick the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[418, 0, 541, 141]"
}
]
},
{
"id": 4507,
"image": "images/4507_frisbee_003223.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the frisbee\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[33, 0, 839, 839]"
}
]
},
{
"id": 6794,
"image": "images/6794_ego4d_6e0a6558-c212-4cab-b374-007671edb59c_14100.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the cup\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[1087, 441, 1206, 549]"
}
]
},
{
"id": 1420,
"image": "images/1420_testset_0001955.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the skateboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[416, 145, 590, 240]"
}
]
},
{
"id": 5110,
"image": "images/5110_bowl_001170.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"wash the bowl\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[0, 171, 213, 228]"
}
]
},
{
"id": 3092,
"image": "images/3092_thu_cut_fruit_RGB_cut_fruit_wyy_1_82.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[293, 227, 369, 279]"
}
]
},
{
"id": 2760,
"image": "images/2760_testset_0001353.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the surfboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[122, 38, 151, 127]"
}
]
},
{
"id": 3216,
"image": "images/3216_testset_0002265.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"kick the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[172, 299, 384, 543]"
}
]
},
{
"id": 6724,
"image": "images/6724_broccoli_003228.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"eat the broccoli\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[97, 128, 241, 251]"
}
]
},
{
"id": 4638,
"image": "images/4638_ego4d_11f51f72-c0bd-4631-aabb-6ecbe17d95c9_10050.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hit the axe\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[858, 154, 1091, 263]"
}
]
},
{
"id": 2269,
"image": "images/2269_banana_000977.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut the banana\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[162, 203, 307, 429]"
}
]
},
{
"id": 1867,
"image": "images/1867_keyboard_000219.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"type_on the keyboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[289, 114, 554, 219]"
}
]
},
{
"id": 382,
"image": "images/382_ego4d_4ac59908-3f13-4085-9479-7348b13803ee_6450.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"write the pen\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[1557, 221, 1918, 432]"
}
]
},
{
"id": 2939,
"image": "images/2939_testset_0002111.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"push the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[213, 56, 239, 76]"
}
]
},
{
"id": 6040,
"image": "images/6040_ego4d_20d62117-8b5f-44b3-b86f-a936e6cdd2c2_4950.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"type_on the laptop\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[2291, 558, 2560, 771]"
}
]
},
{
"id": 3326,
"image": "images/3326_motorcycle_002068.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"ride the motorcycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[362, 291, 417, 339]"
}
]
},
{
"id": 790,
"image": "images/790_testset_0002285.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the couch\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[344, 242, 444, 308]"
}
]
},
{
"id": 5435,
"image": "images/5435_testset_0001206.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"pick_up the suitcase\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[250, 24, 274, 46]"
}
]
},
{
"id": 3696,
"image": "images/3696_wine_glass_002765.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"pour the wine_glass\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[184, 273, 256, 506]"
}
]
},
{
"id": 3721,
"image": "images/3721_baseball_bat_003712.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the baseball_bat\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[105, 192, 289, 258]"
}
]
},
{
"id": 1628,
"image": "images/1628_testset_0001409.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"carry the surfboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[132, 184, 183, 220]"
}
]
},
{
"id": 7126,
"image": "images/7126_thu_use_chopstick_RGB_use_chopstick_cwx_3_54.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"wash the cup\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[339, 151, 442, 252]"
}
]
},
{
"id": 2982,
"image": "images/2982_youtube_First Person Cooking ITALIAN SAUSAGE & PEPPERS WITH POLENTA JONAH IN THE KITCHEN POV COOKING-2YPjWvmOb5o-1080p-1644528307790_002580.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[900, 142, 1084, 276]"
}
]
},
{
"id": 4162,
"image": "images/4162_testset_0001746.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the surfboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[72, 122, 190, 172]"
}
]
},
{
"id": 4174,
"image": "images/4174_snowboard_001840.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the snowboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[679, 54, 805, 260]"
}
]
},
{
"id": 7266,
"image": "images/7266_golf_clubs_000638.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"swing the golf_clubs\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[26, 80, 165, 218]"
}
]
},
{
"id": 3592,
"image": "images/3592_banana_002623.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"eat the banana\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[49, 254, 169, 449]"
}
]
},
{
"id": 4754,
"image": "images/4754_microwave_001049.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"open the microwave\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[112, 108, 621, 171]"
}
]
},
{
"id": 3571,
"image": "images/3571_apple_000735.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"eat the apple\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[174, 76, 535, 307]"
}
]
},
{
"id": 2979,
"image": "images/2979_knife_000695.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[138, 174, 241, 224]"
}
]
},
{
"id": 5354,
"image": "images/5354_ego4d_9a0a2d01-c4e0-4e30-8a69-642070d59ea9_6450.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"take_photo the cell_phone\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[1537, 374, 1701, 548]"
}
]
},
{
"id": 1444,
"image": "images/1444_bicycle_001404.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[298, 80, 348, 126]"
}
]
},
{
"id": 5447,
"image": "images/5447_testset_0001906.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"pick_up the suitcase\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[285, 0, 333, 60]"
}
]
},
{
"id": 6518,
"image": "images/6518_testset_0000299.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"kick the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[277, 240, 300, 263]"
}
]
},
{
"id": 789,
"image": "images/789_testset_0000920.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the couch\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[214, 258, 456, 348]"
}
]
},
{
"id": 878,
"image": "images/878_testset_0000949.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bench\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[274, 201, 400, 250]"
}
]
},
{
"id": 4620,
"image": "images/4620_tennis_racket_001468.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hit the tennis_racket\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[200, 1105, 273, 1407]"
}
]
},
{
"id": 6169,
"image": "images/6169_thu_bounce_ball_RGB_bounce_ball_cwx_2_97.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"throw the basketball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[218, 173, 336, 288]"
}
]
},
{
"id": 483,
"image": "images/483_testset_0000152.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"catch the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[222, 119, 249, 144]"
}
]
},
{
"id": 2095,
"image": "images/2095_rugby_ball_002080.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"throw the rugby_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[81, 93, 207, 174]"
}
]
},
{
"id": 5299,
"image": "images/5299_ego4d_5cae3bb3-1e1e-4b9f-82e8-87dbaad8f73d_9900.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"take_photo the cell_phone\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[744, 386, 882, 666]"
}
]
},
{
"id": 5542,
"image": "images/5542_epic_P33_102_frame_0000008911.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"peel the carrot\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[26, 25, 82, 89]"
}
]
},
{
"id": 1766,
"image": "images/1766_ego4d_70e81c43-3bf3-4a3c-81df-16f5108a6918_24150.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"type_on the laptop\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[588, 645, 884, 764]"
}
]
},
{
"id": 6663,
"image": "images/6663_testset_0001042.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"ride the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[136, 69, 162, 102]"
}
]
},
{
"id": 3264,
"image": "images/3264_testset_0000353.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"kick the punching_bag\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[249, 124, 345, 468]"
}
]
},
{
"id": 6274,
"image": "images/6274_testset_0001830.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"push the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[252, 123, 278, 150]"
}
]
},
{
"id": 6209,
"image": "images/6209_testset_0001089.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the bed\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[219, 306, 464, 413]"
}
]
},
{
"id": 2328,
"image": "images/2328_testset_0000188.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"drag the suitcase\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[186, 112, 216, 137]"
}
]
},
{
"id": 1535,
"image": "images/1535_testset_0000898.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[412, 274, 463, 322]"
}
]
},
{
"id": 2945,
"image": "images/2945_testset_0002150.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"push the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[204, 120, 291, 210]"
}
]
},
{
"id": 6767,
"image": "images/6767_escape_S05_short_cut_570.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"pour the cup\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[938, 372, 1061, 494]"
}
]
},
{
"id": 7211,
"image": "images/7211_testset_0001668.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"pick_up the skis\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[344, 236, 408, 299]"
}
]
},
{
"id": 1393,
"image": "images/1393_testset_0000436.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the motorbike\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[524, 223, 603, 283]"
}
]
},
{
"id": 77,
"image": "images/77_testset_0000454.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"jump the surfboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[244, 372, 320, 427]"
}
]
},
{
"id": 5328,
"image": "images/5328_youtube_First Person Cooking POPEYES CHICKEN SANDWICH COPYCAT RECIPE JONAH IN THE KITCHEN POV COOKING-lNAPNvF0kGg-1080p-1644528296768_001740.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"take_photo the cell_phone\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[295, 573, 437, 780]"
}
]
},
{
"id": 7178,
"image": "images/7178_youtube_First Person Cooking Red Pesto Pasta 30 Minute Pasta JONAH IN THE KITCHEN-FeAspQ44YFk-1080p-1644528516587_005160.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"wash the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[691, 446, 1100, 872]"
}
]
},
{
"id": 587,
"image": "images/587_testset_0000198.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"catch the frisbee\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[0, 167, 14, 234]"
}
]
},
{
"id": 1545,
"image": "images/1545_testset_0001366.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[171, 254, 216, 299]"
}
]
},
{
"id": 2547,
"image": "images/2547_testset_0000277.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the bench\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[94, 469, 227, 556]"
}
]
},
{
"id": 39,
"image": "images/39_testset_0000686.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"jump the surfboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[92, 59, 130, 90]"
}
]
},
{
"id": 6559,
"image": "images/6559_testset_0000893.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"kick the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[164, 122, 208, 160]"
}
]
},
{
"id": 5156,
"image": "images/5156_epic_P01_101_frame_0000035461.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"stir the bowl\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[197, 54, 291, 160]"
}
]
},
{
"id": 3021,
"image": "images/3021_knife_000942.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[288, 236, 394, 286]"
}
]
},
{
"id": 1392,
"image": "images/1392_testset_0001231.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the motorbike\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[300, 295, 383, 343]"
}
]
},
{
"id": 6450,
"image": "images/6450_youtube_Really_001140.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[833, 546, 979, 746]"
}
]
},
{
"id": 5683,
"image": "images/5683_testset_0000166.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"catch the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[83, 288, 128, 330]"
}
]
},
{
"id": 3417,
"image": "images/3417_bicycle_001546.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"ride the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[234, 80, 289, 126]"
}
]
},
{
"id": 284,
"image": "images/284_ego4d_8f847cfc-ae36-461c-a96b-8fc58766fb2d_8700.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"write the pen\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[717, 735, 1085, 909]"
}
]
},
{
"id": 4984,
"image": "images/4984_ego4d_5c3be03c-07a5-4427-a2fe-449f16e8017b_3900.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"text_on the cell_phone\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[327, 650, 522, 856]"
}
]
},
{
"id": 1514,
"image": "images/1514_testset_0001042.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[134, 89, 160, 113]"
}
]
},
{
"id": 7066,
"image": "images/7066_cup_002410.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sip the cup\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[129, 130, 462, 234]"
}
]
},
{
"id": 474,
"image": "images/474_testset_0000092.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"catch the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[120, 189, 148, 217]"
}
]
},
{
"id": 2393,
"image": "images/2393_testset_0002092.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the couch\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[353, 275, 619, 324]"
}
]
},
{
"id": 1916,
"image": "images/1916_thu_cut_fruit_RGB_cut_fruit_lzy_2_49.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"stick the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[387, 69, 435, 117]"
}
]
},
{
"id": 2417,
"image": "images/2417_testset_0001465.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the couch\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[209, 321, 369, 366]"
}
]
},
{
"id": 12,
"image": "images/12_20.png",
"conversations": [
{
"from": "human",
"value": "<image>\nYou are a robot using the joint control. The task is \"take the basket\". Please predict a possible affordance area of the end effector?"
},
{
"from": "gpt",
"value": "[544, 210, 1053, 378]"
}
]
},
{
"id": 2667,
"image": "images/2667_testset_0001397.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the bench\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[188, 208, 356, 250]"
}
]
},
{
"id": 5140,
"image": "images/5140_toothbrush_000575.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"wash the toothbrush\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[130, 43, 319, 196]"
}
]
},
{
"id": 4777,
"image": "images/4777_youtube_First Person Cooking CHICKEN QUESADILLAS JONAH IN THE KITCHEN POV COOKING-zma2SgJg6l0-1080p-1644528321483 (1)_002520.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"open the oven\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[908, 824, 1261, 959]"
}
]
},
{
"id": 6997,
"image": "images/6997_ego4d_61f0a3d8-01f9-4149-bd0c-c31ad5d319ed_12750.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the axe\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[1328, 819, 1618, 1022]"
}
]
},
{
"id": 2067,
"image": "images/2067_basketball_003534.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"throw the basketball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[458, 244, 586, 373]"
}
]
},
{
"id": 1668,
"image": "images/1668_testset_0001463.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"carry the snowboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[132, 198, 165, 231]"
}
]
},
{
"id": 6716,
"image": "images/6716_banana_002934.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"eat the banana\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[83, 390, 213, 431]"
}
]
},
{
"id": 1431,
"image": "images/1431_testset_0001432.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[203, 175, 256, 223]"
}
]
},
{
"id": 4379,
"image": "images/4379_wine_glass_003375.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the wine_glass\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[288, 152, 439, 295]"
}
]
},
{
"id": 2492,
"image": "images/2492_testset_0001207.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the bench\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[185, 273, 356, 318]"
}
]
},
{
"id": 1511,
"image": "images/1511_bicycle_002266.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[145, 106, 209, 159]"
}
]
},
{
"id": 5865,
"image": "images/5865_bicycle_001224.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[325, 181, 426, 232]"
}
]
},
{
"id": 1780,
"image": "images/1780_ego4d_6a85ad1a-746c-45ac-b372-b677cb6e0a3d_5850.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"type_on the laptop\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[1989, 1315, 2237, 1564]"
}
]
},
{
"id": 3070,
"image": "images/3070_youtube_Really_011880.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[831, 587, 988, 785]"
}
]
},
{
"id": 3693,
"image": "images/3693_wine_glass_002922.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"pour the wine_glass\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[300, 331, 362, 425]"
}
]
},
{
"id": 3730,
"image": "images/3730_baseball_bat_000452.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the baseball_bat\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[1084, 91, 1299, 280]"
}
]
},
{
"id": 165,
"image": "images/165_testset_0000353.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"boxing the punching_bag\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[249, 124, 345, 468]"
}
]
},
{
"id": 3170,
"image": "images/3170_scissors_002226.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the scissors\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[254, 250, 330, 362]"
}
]
},
{
"id": 5718,
"image": "images/5718_soccer_ball_002743.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"catch the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[151, 86, 222, 154]"
}
]
},
{
"id": 26,
"image": "images/26_3.png",
"conversations": [
{
"from": "human",
"value": "<image>\nYou are a robot using the joint control. The task is \"take the basket\". Please predict a possible affordance area of the end effector?"
},
{
"from": "gpt",
"value": "[1182, 373, 1398, 522]"
}
]
},
{
"id": 4852,
"image": "images/4852_bottle_001569.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sip the bottle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[168, 83, 218, 125]"
}
]
},
{
"id": 4569,
"image": "images/4569_testset_0001135.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the frisbee\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[162, 105, 186, 128]"
}
]
},
{
"id": 197,
"image": "images/197_testset_0000909.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"boxing the punching_bag\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[192, 183, 265, 490]"
}
]
},
{
"id": 3282,
"image": "images/3282_punching_bag_000047.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"kick the punching_bag\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[216, 257, 271, 383]"
}
]
},
{
"id": 477,
"image": "images/477_testset_0000911.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"catch the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[158, 106, 193, 142]"
}
]
},
{
"id": 1397,
"image": "images/1397_testset_0000611.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the motorbike\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[148, 199, 246, 263]"
}
]
},
{
"id": 2244,
"image": "images/2244_carrot_000953.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut the carrot\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[341, 99, 463, 263]"
}
]
},
{
"id": 684,
"image": "images/684_bottle_002780.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"drink_with the bottle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[444, 270, 517, 339]"
}
]
},
{
"id": 2135,
"image": "images/2135_testset_0000991.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"throw the frisbee\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[494, 188, 670, 332]"
}
]
},
{
"id": 2680,
"image": "images/2680_testset_0001407.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the bench\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[184, 513, 403, 572]"
}
]
},
{
"id": 4619,
"image": "images/4619_tennis_racket_000964.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hit the tennis_racket\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[191, 1159, 265, 1420]"
}
]
},
{
"id": 1939,
"image": "images/1939_ego4d_c2a3d4ac-8d13-4daa-a9a9-a867999e956f_29400.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"stick the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[884, 356, 989, 462]"
}
]
},
{
"id": 16,
"image": "images/16_26.png",
"conversations": [
{
"from": "human",
"value": "<image>\nYou are a robot using the joint control. The task is \"take the knife\". Please predict a possible affordance area of the end effector?"
},
{
"from": "gpt",
"value": "[976, 517, 1215, 807]"
}
]
},
{
"id": 1378,
"image": "images/1378_testset_0001495.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the motorbike\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[428, 256, 495, 307]"
}
]
},
{
"id": 4053,
"image": "images/4053_testset_0001112.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the suitcase\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[94, 0, 139, 17]"
}
]
},
{
"id": 6238,
"image": "images/6238_testset_0002145.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"lie_on the bed\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[131, 164, 260, 212]"
}
]
},
{
"id": 4834,
"image": "images/4834_cup_002062.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sip the cup\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[346, 120, 542, 221]"
}
]
},
{
"id": 21,
"image": "images/21_4.png",
"conversations": [
{
"from": "human",
"value": "<image>\nYou are a robot using the joint control. The task is \"take the knife\". Please predict a possible affordance area of the end effector?"
},
{
"from": "gpt",
"value": "[1391, 905, 1728, 1076]"
}
]
},
{
"id": 3691,
"image": "images/3691_thu_use_chopstick_RGB_use_chopstick_gs_2_176.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"pour the wine_glass\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[273, 346, 327, 406]"
}
]
},
{
"id": 1160,
"image": "images/1160_testset_0001004.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the chair\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[291, 393, 366, 463]"
}
]
},
{
"id": 160,
"image": "images/160_punching_bag_000345.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"boxing the punching_bag\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[90, 442, 276, 1093]"
}
]
},
{
"id": 3035,
"image": "images/3035_thu_cut_fruit_RGB_cut_fruit_tys_1_79.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[398, 68, 449, 126]"
}
]
},
{
"id": 979,
"image": "images/979_testset_0001849.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bench\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[365, 253, 491, 320]"
}
]
},
{
"id": 4495,
"image": "images/4495_testset_0001819.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the frisbee\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[182, 78, 216, 107]"
}
]
},
{
"id": 6421,
"image": "images/6421_ego4d_c2a3d4ac-8d13-4daa-a9a9-a867999e956f_15000.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[927, 122, 1035, 226]"
}
]
},
{
"id": 22,
"image": "images/22_5.png",
"conversations": [
{
"from": "human",
"value": "<image>\nYou are a robot using the joint control. The task is \"take the basket\". Please predict a possible affordance area of the end effector?"
},
{
"from": "gpt",
"value": "[683, 213, 916, 351]"
}
]
},
{
"id": 6463,
"image": "images/6463_ego4d_002d2729-df71-438d-8396-5895b349e8fd_67200.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[779, 482, 938, 705]"
}
]
},
{
"id": 2002,
"image": "images/2002_testset_0000962.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"throw the soccer_ball\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[290, 170, 323, 200]"
}
]
},
{
"id": 2335,
"image": "images/2335_testset_0001233.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"drag the suitcase\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[143, 128, 170, 150]"
}
]
},
{
"id": 4395,
"image": "images/4395_wine_glass_002282.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the wine_glass\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[126, 236, 195, 299]"
}
]
},
{
"id": 3638,
"image": "images/3638_ego4d_6be77236-230f-4d9b-98d5-1cbf3be34023_5700.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"pour the cup\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[1524, 264, 1681, 446]"
}
]
},
{
"id": 1031,
"image": "images/1031_testset_0000902.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the bench\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[280, 355, 468, 402]"
}
]
},
{
"id": 727,
"image": "images/727_wine_glass_001599.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"drink_with the wine_glass\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[130, 186, 223, 243]"
}
]
},
{
"id": 3089,
"image": "images/3089_youtube_Good_006000.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[606, 312, 794, 476]"
}
]
},
{
"id": 1344,
"image": "images/1344_testset_0002270.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"sit_on the surfboard\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[124, 93, 322, 156]"
}
]
},
{
"id": 7262,
"image": "images/7262_golf_clubs_003141.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"swing the golf_clubs\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[818, 213, 948, 425]"
}
]
},
{
"id": 6087,
"image": "images/6087_knife_001875.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"stick the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[160, 284, 225, 339]"
}
]
},
{
"id": 3650,
"image": "images/3650_cup_000508.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"pour the cup\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[433, 250, 483, 313]"
}
]
},
{
"id": 3108,
"image": "images/3108_youtube_Why_013980.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[1294, 641, 1428, 779]"
}
]
},
{
"id": 3816,
"image": "images/3816_bottle_001227.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"hold the bottle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[87, 181, 220, 317]"
}
]
},
{
"id": 6135,
"image": "images/6135_youtube_POV 'From-Scratch' Meatballs & Spaghetti for My Wife First Person Cooking-RN4LfG5ZHE4-1080p-1644527646933_003840.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"stick the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[364, 171, 491, 295]"
}
]
},
{
"id": 6688,
"image": "images/6688_bicycle_001334.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"ride the bicycle\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[190, 163, 245, 210]"
}
]
},
{
"id": 6355,
"image": "images/6355_knife_001749.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[90, 181, 279, 224]"
}
]
},
{
"id": 6391,
"image": "images/6391_youtube_First Person Cooking CHICKEN QUESADILLAS JONAH IN THE KITCHEN POV COOKING-zma2SgJg6l0-1080p-1644528321483 (1)_007380.jpg",
"conversations": [
{
"value": "<image>\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?",
"from": "human"
},
{
"from": "gpt",
"value": "[869, 0, 1031, 218]"
}
]
}
]