Source code for mmselfsup.models.algorithms.milan
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List
import torch
from mmselfsup.registry import MODELS
from mmselfsup.structures import SelfSupDataSample
from .base import BaseModel
[docs]@MODELS.register_module()
class MILAN(BaseModel):
"""MILAN.
Implementation of `MILAN: Masked Image Pretraining on Language Assisted
Representation <https://arxiv.org/abs/2208.06049>`_.
"""
[docs] def loss(self, inputs: List[torch.Tensor],
data_samples: List[SelfSupDataSample],
**kwargs) -> Dict[str, torch.Tensor]:
"""The forward function in training.
Args:
inputs (List[torch.Tensor]): The input images.
data_samples (List[SelfSupDataSample]): All elements required
during the forward function.
Returns:
Dict[str, torch.Tensor]: A dictionary of loss components.
"""
# ids_restore: the same as that in original repo, which is used
# to recover the original order of tokens in decoder.
clip_feature, importance = self.target_generator(inputs[0])
importance = importance[:, 0, 1:]
latent, ids_restore, ids_keep, ids_dump = self.backbone(
inputs[0], importance)
pred = self.neck(latent, ids_restore, ids_keep, ids_dump)
loss = self.head(pred, clip_feature)
losses = dict(loss=loss)
return losses