mmselfsup.models.algorithms.pixmim 源代码

# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List

import torch

from mmselfsup.registry import MODELS
from mmselfsup.structures import SelfSupDataSample
from .mae import MAE


[文档]@MODELS.register_module()
class PixMIM(MAE):
    """The official implementation of PixMIM.

    Implementation of `PixMIM: Rethinking Pixel Reconstruction in
    Masked Image Modeling <https://arxiv.org/pdf/2303.02416.pdf>`_.

    Please refer to MAE for these initialization arguments.
    """

[文档]    def loss(self, inputs: List[torch.Tensor],
             data_samples: List[SelfSupDataSample],
             **kwargs) -> Dict[str, torch.Tensor]:
        """The forward function in training.

        Args:
            inputs (List[torch.Tensor]): The input images.
            data_samples (List[SelfSupDataSample]): All elements required
                during the forward function.
        Returns:
            Dict[str, torch.Tensor]: A dictionary of loss components.
        """
        # ids_restore: the same as that in original repo, which is used
        # to recover the original order of tokens in decoder.
        low_freq_targets = self.target_generator(inputs[0])
        latent, mask, ids_restore = self.backbone(inputs[0])
        pred = self.neck(latent, ids_restore)
        loss = self.head(pred, low_freq_targets, mask)
        losses = dict(loss=loss)
        return losses