Skip to content

[Bug] How to test 3D Visual Grounding task using img feature? #110

@christian6022

Description

@christian6022

Prerequisite

Task

I'm using the official example scripts/configs for the officially supported tasks/models/datasets.

Branch

main branch https://github.com/open-mmlab/mmdetection3d

Environment

I want to evaluate 3D Visual Grounding model.
However, when I implement test.py for 3D Visual Grounding model evaluation,
there is an error about annation.

How should I do for evaluation using img feature?

Thank you,

Reproduces the problem - code sample

        for idx in range(len(batch_img_metas)):
            img_meta = batch_img_metas[idx]
            img_scale_factor = (
                img.new_tensor(img_meta["scale_factor"][:2])
                if "scale_factor" in img_meta.keys()
                else 1
            )
            img_flip = img_meta["flip"] if "flip" in img_meta.keys() else False
            img_crop_offset = (
                img.new_tensor(img_meta["img_crop_offset"])
                if "img_crop_offset" in img_meta.keys()
                else 0
            )
            proj_mat = get_proj_mat_by_coord_type(img_meta, self.coord_type)
            # Multi-View Sparse Fusion
            if isinstance(proj_mat, dict):
                assert "extrinsic" in proj_mat.keys()
                assert "intrinsic" in proj_mat.keys()
                projection = []
                # Support different intrinsic matrices for different images
                # if the original intrinsic is only a matrix
                # we will simply copy it to construct the intrinsic matrix list
                # in MultiViewPipeline
                assert isinstance(proj_mat["intrinsic"], list)
                for proj_idx in range(len(proj_mat["extrinsic"])):
                    intrinsic = img.new_tensor(proj_mat["intrinsic"][proj_idx])
                    extrinsic = img.new_tensor(proj_mat["extrinsic"][proj_idx])
                    projection.append(intrinsic @ extrinsic)
                proj_mat = torch.stack(projection)
                points_imgfeats = []
                for level_idx in range(num_levels):
                    point = x[level_idx].decomposed_coordinates[idx] * self.voxel_size
                    points_imgfeat = batch_point_sample(
                        img_meta,
                        img_features=img_features[level_idx][idx],
                        points=point,
                        proj_mat=proj_mat,
                        coord_type=self.coord_type,
                        img_scale_factor=img_scale_factor,
                        img_crop_offset=img_crop_offset,
                        img_flip=img_flip,
                        img_pad_shape=img.shape[-2:],
                        img_shape=img_meta["img_shape"][:2],
                        aligned=False,
                    )
                    points_imgfeats.append(points_imgfeat)  # one sample, all levels
            else:
                feature = img_features[idx]
                proj_mat = points.new_tensor(proj_mat)
                points_imgfeats = []
                for level_idx in range(num_levels):
                    point = x[level_idx].decomposed_coordinates[idx] * self.voxel_size
                    points_imgfeat = point_sample(
                        img_meta,
                        img_features=feature[None, ...],
                        points=point,
                        proj_mat=point.new_tensor(proj_mat),
                        coord_type="CAMERA",
                        img_scale_factor=img_scale_factor,
                        img_crop_offset=img_crop_offset,
                        img_flip=img_flip,
                        img_pad_shape=img.shape[-2:],
                        img_shape=img_meta["img_shape"][:2],
                        aligned=False,
                    )
                    points_imgfeats.append(points_imgfeat)  # one sample, all levels
            all_points_imgfeats.append(points_imgfeats)  # all samples, all levels

        # append img features
        for level_idx in range(num_levels):
            mlvl_feats = torch.cat([
                all_points_imgfeats[sample_idx][level_idx]
                for sample_idx in range(num_samples)
            ])
            img_x = ME.SparseTensor(
                features=mlvl_feats,
                coordinate_map_key=x[level_idx].coordinate_map_key,
                coordinate_manager=x[level_idx].coordinate_manager,
            )
            x[level_idx] = ME.cat(x[level_idx], img_x)

        # channel mapper feature of different level to the fixed number
        feats, scores, coords = self.neck_3d(x, batch_size)

Reproduces the problem - command or script

python tools/test.py configs/grounding/mv-grounding_8xb12_embodiedscan-vg-9dof.py work_dirs/mv-3dvg/epoch_12.pth

Reproduces the problem - error message

Traceback (most recent call last):
  File "tools/test.py", line 167, in <module>
    main()
  File "tools/test.py", line 163, in main
    runner.test()
  File "/opt/conda/envs/embodiedscan-env/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1823, in test
    metrics = self.test_loop.run()  # type: ignore
  File "/opt/conda/envs/embodiedscan-env/lib/python3.8/site-packages/mmengine/runner/loops.py", line 463, in run
    self.run_iter(idx, data_batch)
  File "/opt/conda/envs/embodiedscan-env/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
    return func(*args, **kwargs)
  File "/opt/conda/envs/embodiedscan-env/lib/python3.8/site-packages/mmengine/runner/loops.py", line 487, in run_iter
    outputs = self.runner.model.test_step(data_batch)
  File "/opt/conda/envs/embodiedscan-env/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 145, in test_step
    return self._run_forward(data, mode='predict')  # type: ignore
  File "/opt/conda/envs/embodiedscan-env/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 361, in _run_forward
    results = self(**data, mode=mode)
  File "/opt/conda/envs/embodiedscan-env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/embodiedscan_dev/embodiedscan/models/detectors/sparse_featfusion_captioner.py", line 360, in forward
    return self.predict(
  File "/embodiedscan_dev/embodiedscan/models/detectors/sparse_featfusion_captioner.py", line 476, in predict
    point_feats, logit_scores, point_xyz = self.extract_feat(
  File "/embodiedscan_dev/embodiedscan/models/detectors/sparse_featfusion_captioner.py", line 211, in extract_feat
    points_imgfeat = batch_point_sample(
  File "/embodiedscan_dev/embodiedscan/models/layers/fusion_layers/point_fusion.py", line 275, in batch_point_sample
    points = apply_3d_transformation(points, coord_type, img_meta, reverse=True)
  File "/embodiedscan_dev/embodiedscan/models/layers/fusion_layers/point_fusion.py", line 52, in apply_3d_transformation
    if isinstance(img_meta["pcd_rotation"], torch.Tensor)
KeyError: 'pcd_rotation'

Additional information

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions