Source code for alr.modules.dropout

r"""
Modify Dropout :class:`torch.nn.Modules` to _always_ activate in training and inference.
The classes in this module are taken from `PyTorch <https://github.com/pytorch/pytorch/tree/master/torch>`_ *as-is*.
The main function you should be concerned with is :func:`replace_dropout`.
"""
import torch
import torch.nn.functional as F
import copy
import sys
import inspect
import re
import warnings

from torch.nn.modules.dropout import _DropoutNd
from typing import Optional

# The Dropout classes below are taken as-is from torch


[docs]class PersistentDropout(_DropoutNd):
    r"""During training, randomly zeroes some of the elements of the input
    tensor with probability :attr:`p` using samples from a Bernoulli
    distribution. Each channel will be zeroed out independently on every forward
    call.
    This has proven to be an effective technique for regularization and
    preventing the co-adaptation of neurons as described in the paper
    `Improving neural networks by preventing co-adaptation of feature
    detectors`_ .
    Furthermore, the outputs are scaled by a factor of :math:`\frac{1}{1-p}` during
    training. This means that during evaluation the module simply computes an
    identity function.

    Args:
        p: probability of an element to be zeroed. Default: 0.5
        inplace: If set to ``True``, will do this operation in-place. Default: ``False``

    Shape:
        - Input: :math:`(*)`. Input can be of any shape
        - Output: :math:`(*)`. Output is of the same shape as input

    Examples::
        >>> m = nn.Dropout(p=0.2)
        >>> input = torch.randn(20, 16)
        >>> output = m(input)

    .. _Improving neural networks by preventing co-adaptation of feature
        detectors: https://arxiv.org/abs/1207.0580
    """

[docs]    def forward(self, input):
        return F.dropout(input, self.p, True, self.inplace)


[docs]class PersistentDropout2d(_DropoutNd):
    r"""Randomly zero out entire channels (a channel is a 2D feature map,
    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
    batched input is a 2D tensor :math:`\text{input}[i, j]`).
    Each channel will be zeroed out independently on every forward call with
    probability :attr:`p` using samples from a Bernoulli distribution.
    Usually the input comes from :class:`nn.Conv2d` modules.
    As described in the paper
    `Efficient Object Localization Using Convolutional Networks`_ ,
    if adjacent pixels within feature maps are strongly correlated
    (as is normally the case in early convolution layers) then i.i.d. dropout
    will not regularize the activations and will otherwise just result
    in an effective learning rate decrease.
    In this case, :func:`nn.Dropout2d` will help promote independence between
    feature maps and should be used instead.

    Args:
        p (float, optional): probability of an element to be zero-ed.
        inplace (bool, optional): If set to ``True``, will do this operation in-place

    Shape:
        - Input: :math:`(N, C, H, W)`
        - Output: :math:`(N, C, H, W)` (same shape as input)

    Examples::
        >>> m = nn.Dropout2d(p=0.2)
        >>> input = torch.randn(20, 16, 32, 32)
        >>> output = m(input)

    .. _Efficient Object Localization Using Convolutional Networks:
       http://arxiv.org/abs/1411.4280
    """

[docs]    def forward(self, input):
        return F.dropout2d(input, self.p, True, self.inplace)


[docs]class PersistentDropout3d(_DropoutNd):
    r"""Randomly zero out entire channels (a channel is a 3D feature map,
    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
    batched input is a 3D tensor :math:`\text{input}[i, j]`).
    Each channel will be zeroed out independently on every forward call with
    probability :attr:`p` using samples from a Bernoulli distribution.
    Usually the input comes from :class:`nn.Conv3d` modules.
    As described in the paper
    `Efficient Object Localization Using Convolutional Networks`_ ,
    if adjacent pixels within feature maps are strongly correlated
    (as is normally the case in early convolution layers) then i.i.d. dropout
    will not regularize the activations and will otherwise just result
    in an effective learning rate decrease.
    In this case, :func:`nn.Dropout3d` will help promote independence between
    feature maps and should be used instead.

    Args:
        p (float, optional): probability of an element to be zeroed.
        inplace (bool, optional): If set to ``True``, will do this operation in-place

    Shape:
        - Input: :math:`(N, C, D, H, W)`
        - Output: :math:`(N, C, D, H, W)` (same shape as input)

    Examples::
        >>> m = nn.Dropout3d(p=0.2)
        >>> input = torch.randn(20, 16, 4, 32, 32)
        >>> output = m(input)

    .. _Efficient Object Localization Using Convolutional Networks:
       http://arxiv.org/abs/1411.4280
    """

[docs]    def forward(self, input):
        return F.dropout3d(input, self.p, True, self.inplace)


[docs]class PersistentAlphaDropout(_DropoutNd):
    r"""Applies Alpha Dropout over the input.
    Alpha Dropout is a type of Dropout that maintains the self-normalizing
    property.
    For an input with zero mean and unit standard deviation, the output of
    Alpha Dropout maintains the original mean and standard deviation of the
    input.
    Alpha Dropout goes hand-in-hand with SELU activation function, which ensures
    that the outputs have zero mean and unit standard deviation.
    During training, it randomly masks some of the elements of the input
    tensor with probability *p* using samples from a bernoulli distribution.
    The elements to masked are randomized on every forward call, and scaled
    and shifted to maintain zero mean and unit standard deviation.
    During evaluation the module simply computes an identity function.
    More details can be found in the paper `Self-Normalizing Neural Networks`_ .

    Args:
        p (float): probability of an element to be dropped. Default: 0.5
        inplace (bool, optional): If set to ``True``, will do this operation in-place

    Shape:
        - Input: :math:`(*)`. Input can be of any shape
        - Output: :math:`(*)`. Output is of the same shape as input

    Examples::
        >>> m = nn.AlphaDropout(p=0.2)
        >>> input = torch.randn(20, 16)
        >>> output = m(input)

    .. _Self-Normalizing Neural Networks: https://arxiv.org/abs/1706.02515
    """

[docs]    def forward(self, input):
        return F.alpha_dropout(input, self.p, True)


[docs]class PersistentFeatureAlphaDropout(_DropoutNd):
[docs]    def forward(self, input):
        return F.feature_alpha_dropout(input, self.p, True)


def _replace_dropout(parent, prefix):
    for name, mod in parent.named_children():
        if isinstance(mod, _DropoutNd):
            kwargs = dict(p=mod.p)
            if prefix.lower() == "persistent":
                kwargs["inplace"] = mod.inplace
            try:
                # replace dropout module with one that always does dropout regardless of the model's mode
                parent.add_module(
                    name,
                    getattr(sys.modules[__name__], prefix + type(mod).__name__)(
                        **kwargs
                    ),
                )
            except AttributeError:
                raise NotImplementedError(
                    f"{type(mod).__name__} hasn't been implemented yet."
                )
        _replace_dropout(mod, prefix)


[docs]def replace_dropout(
    module: torch.nn.Module, inplace: Optional[bool] = True
) -> torch.nn.Module:
    r"""
    Recursively replaces dropout modules in `module` such that dropout is performed
    regardless of the model's mode. That is, dropout is performed during training
    (`model.train()`) and inference (`model.eval()`) modes.

    Args:
        module (`torch.nn.Module`): PyTorch module object
        inplace (bool, optional): If `True`, the `model` is modified *in-place*. If `False`, `model` is not modified and a new model is cloned.

    Returns:
        `torch.nn.Module`: Same `module` instance if `inplace` is `False`, else a brand new module.
    """
    if not inplace:
        module = copy.deepcopy(module)
    _replace_dropout(module, prefix="Persistent")
    _inspect_forward(module)
    return module


def _inspect_forward(module: torch.nn.Module):
    src = inspect.getsource(module.forward).strip()
    src = re.sub(r"\s", "", src)
    if re.search(r".*dropout(\dd)?\(.*\).*", src):
        warnings.warn(
            "Found usage of non-module dropout in module's forward function."
            " Please make sure that the training flag is set to True during eval mode too.",
            UserWarning,
        )


# Both consistent dropout implementations were taken from ElementAI's baal repository with minor modifications.
# see their licence here: https://github.com/ElementAI/baal/blob/master/LICENSE
[docs]class ConsistentDropout(_DropoutNd):
    """
    ConsistentDropout is useful when doing research.
    It guarantees that while the masks are the same between batches
    during inference. The masks are different inside the batch.
    This is slower than using regular Dropout, but it is useful
    when you want to use the same set of weights for each sample used in inference.
    From BatchBALD (Kirsch et al, 2019), this is necessary to use BatchBALD and remove noise
    from the prediction.
    Args:
        p (float): probability of an element to be zeroed. Default: 0.5
    Notes:
        For optimal results, you should use a batch size of one
        during inference time.
        Furthermore, to guarantee that each sample uses the same
        set of weights,
        you must use `replicate_in_memory=True` in ModelWrapper,
        which is the default.
    """

    def __init__(self, p=0.5):
        super().__init__(p=p, inplace=False)
        self.reset()

[docs]    def forward(self, x):
        if self.training:
            return F.dropout(x, self.p, training=True, inplace=False)
        else:
            if self._mask is None or self._mask.shape != x.shape:
                self._mask = self._make_mask(x)
            return torch.mul(x, self._mask)

    def _make_mask(self, x):
        return F.dropout(torch.ones_like(x, device=x.device), self.p, training=True)

[docs]    def reset(self):
        self._mask = None

[docs]    def eval(self):
        self.reset()
        return super().eval()

[docs]    def train(self, mode=True):
        super().train(mode)
        if not mode:
            self.reset()


[docs]class ConsistentDropout2d(_DropoutNd):
    """
    ConsistentDropout is useful when doing research.
    It guarantees that while the mask are the same between batches,
    they are different inside the batch.
    This is slower than using regular Dropout, but it is useful
    when you want to use the same set of weights for each unlabelled sample.
    Args:
        p (float): probability of an element to be zeroed. Default: 0.5
    Notes:
        For optimal results, you should use a batch size of one
        during inference time.
        Furthermore, to guarantee that each sample uses the same
        set of weights,
        you must use `replicate_in_memory=True` in ModelWrapper,
        which is the default.
    """

    def __init__(self, p=0.5):
        super().__init__(p=p, inplace=False)
        self.reset()

[docs]    def forward(self, x):
        if self.training:
            return F.dropout2d(x, self.p, training=True, inplace=False)
        else:
            if self._mask is None or self._mask.shape != x.shape:
                self._mask = self._make_mask(x)
            return torch.mul(x, self._mask)

    def _make_mask(self, x):
        return F.dropout2d(torch.ones_like(x, device=x.device), self.p, training=True)

[docs]    def reset(self):
        self._mask = None

[docs]    def eval(self):
        self.reset()
        return super().eval()

[docs]    def train(self, mode=True):
        super().train(mode)
        if not mode:
            self.reset()


[docs]def replace_consistent_dropout(
    module: torch.nn.Module, inplace: Optional[bool] = True
) -> torch.nn.Module:
    r"""
    Recursively replaces dropout modules in `module` such that dropout is performed
    regardless of the model's mode *and uses the same mask across batches*.
    The mask is refreshed each time `model.eval()` is invoked but the mask is guaranteed
    to be consistent across all batch (different masks for each item *within* the batch).

    Args:
        module (`torch.nn.Module`): PyTorch module object
        inplace (bool, optional): If `True`, the `model` is modified *in-place*. If `False`, `model` is not modified and a new model is cloned.

    Returns:
        `torch.nn.Module`: Same `module` instance if `inplace` is `False`, else a brand new module.
    """
    if not inplace:
        module = copy.deepcopy(module)
    _replace_dropout(module, prefix="Consistent")
    _inspect_forward(module)
    return module