Shortcuts

Source code for torch.nn.modules.normalization

# mypy: allow-untyped-defs
import numbers
from typing import Optional, Union

import torch
from torch import Size, Tensor
from torch.nn import functional as F, init
from torch.nn.parameter import Parameter

from ._functions import CrossMapLRN2d as _cross_map_lrn2d
from .module import Module


__all__ = ["LocalResponseNorm", "CrossMapLRN2d", "LayerNorm", "GroupNorm", "RMSNorm"]


[docs]class LocalResponseNorm(Module): r"""Applies local response normalization over an input signal. The input signal is composed of several input planes, where channels occupy the second dimension. Applies normalization across channels. .. math:: b_{c} = a_{c}\left(k + \frac{\alpha}{n} \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta} Args: size: amount of neighbouring channels used for normalization alpha: multiplicative factor. Default: 0.0001 beta: exponent. Default: 0.75 k: additive factor. Default: 1 Shape: - Input: :math:`(N, C, *)` - Output: :math:`(N, C, *)` (same shape as input) Examples:: >>> lrn = nn.LocalResponseNorm(2) >>> signal_2d = torch.randn(32, 5, 24, 24) >>> signal_4d = torch.randn(16, 5, 7, 7, 7, 7) >>> output_2d = lrn(signal_2d) >>> output_4d = lrn(signal_4d) """ __constants__ = ["size", "alpha", "beta", "k"] size: int alpha: float beta: float k: float def __init__( self, size: int, alpha: float = 1e-4, beta: float = 0.75, k: float = 1.0 ) -> None: super().__init__() self.size = size self.alpha = alpha self.beta = beta self.k = k def forward(self, input: Tensor) -> Tensor: return F.local_response_norm(input, self.size, self.alpha, self.beta, self.k) def extra_repr(self): return "{size}, alpha={alpha}, beta={beta}, k={k}".format(**self.__dict__)
class CrossMapLRN2d(Module): size: int alpha: float beta: float k: float def __init__( self, size: int, alpha: float = 1e-4, beta: float = 0.75, k: float = 1 ) -> None: super().__init__() self.size = size self.alpha = alpha self.beta = beta self.k = k def forward(self, input: Tensor) -> Tensor: return _cross_map_lrn2d.apply(input, self.size, self.alpha, self.beta, self.k) def extra_repr(self) -> str: return "{size}, alpha={alpha}, beta={beta}, k={k}".format(**self.__dict__) _shape_t = Union[int, list[int], Size]
[docs]class LayerNorm(Module): r"""Applies Layer Normalization over a mini-batch of inputs. This layer implements the operation as described in the paper `Layer Normalization <https://arxiv.org/abs/1607.06450>`__ .. math:: y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta The mean and standard-deviation are calculated over the last `D` dimensions, where `D` is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape` is ``(3, 5)`` (a 2-dimensional shape), the mean and standard-deviation are computed over the last 2 dimensions of the input (i.e. ``input.mean((-2, -1))``). :math:`\gamma` and :math:`\beta` are learnable affine transform parameters of :attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``. The variance is calculated via the biased estimator, equivalent to `torch.var(input, unbiased=False)`. .. note:: Unlike Batch Normalization and Instance Normalization, which applies scalar scale and bias for each entire channel/plane with the :attr:`affine` option, Layer Normalization applies per-element scale and bias with :attr:`elementwise_affine`. This layer uses statistics computed from input data in both training and evaluation modes. Args: normalized_shape (int or list or torch.Size): input shape from an expected input of size .. math:: [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1] \times \ldots \times \text{normalized\_shape}[-1]] If a single integer is used, it is treated as a singleton list, and this module will normalize over the last dimension which is expected to be of that specific size. eps: a value added to the denominator for numerical stability. Default: 1e-5 elementwise_affine: a boolean value that when set to ``True``, this module has learnable per-element affine parameters initialized to ones (for weights) and zeros (for biases). Default: ``True``. bias: If set to ``False``, the layer will not learn an additive bias (only relevant if :attr:`elementwise_affine` is ``True``). Default: ``True``. Attributes: weight: the learnable weights of the module of shape :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``. The values are initialized to 1. bias: the learnable bias of the module of shape :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``. The values are initialized to 0. Shape: - Input: :math:`(N, *)` - Output: :math:`(N, *)` (same shape as input) Examples:: >>> # NLP Example >>> batch, sentence_length, embedding_dim = 20, 5, 10 >>> embedding = torch.randn(batch, sentence_length, embedding_dim) >>> layer_norm = nn.LayerNorm(embedding_dim) >>> # Activate module >>> layer_norm(embedding) >>> >>> # Image Example >>> N, C, H, W = 20, 5, 10, 10 >>> input = torch.randn(N, C, H, W) >>> # Normalize over the last three dimensions (i.e. the channel and spatial dimensions) >>> # as shown in the image below >>> layer_norm = nn.LayerNorm([C, H, W]) >>> output = layer_norm(input) .. image:: ../_static/img/nn/layer_norm.jpg :scale: 50 % """ __constants__ = ["normalized_shape", "eps", "elementwise_affine"] normalized_shape: tuple[int, ...] eps: float elementwise_affine: bool def __init__( self, normalized_shape: _shape_t, eps: float = 1e-5, elementwise_affine: bool = True, bias: bool = True, device=None, dtype=None, ) -> None: factory_kwargs = {"device": device, "dtype": dtype} super().__init__() if isinstance(normalized_shape, numbers.Integral): # mypy error: incompatible types in assignment normalized_shape = (normalized_shape,) # type: ignore[assignment] self.normalized_shape = tuple(normalized_shape) # type: ignore[arg-type] self.eps = eps self.elementwise_affine = elementwise_affine if self.elementwise_affine: self.weight = Parameter( torch.empty(self.normalized_shape, **factory_kwargs) ) if bias: self.bias = Parameter( torch.empty(self.normalized_shape, **factory_kwargs) ) else: self.register_parameter("bias", None) else: self.register_parameter("weight", None) self.register_parameter("bias", None) self.reset_parameters() def reset_parameters(self) -> None: if self.elementwise_affine: init.ones_(self.weight) if self.bias is not None: init.zeros_(self.bias) def forward(self, input: Tensor) -> Tensor: return F.layer_norm( input, self.normalized_shape, self.weight, self.bias, self.eps ) def extra_repr(self) -> str: return ( "{normalized_shape}, eps={eps}, " "elementwise_affine={elementwise_affine}".format(**self.__dict__) )
[docs]class GroupNorm(Module): r"""Applies Group Normalization over a mini-batch of inputs. This layer implements the operation as described in the paper `Group Normalization <https://arxiv.org/abs/1803.08494>`__ .. math:: y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta The input channels are separated into :attr:`num_groups` groups, each containing ``num_channels / num_groups`` channels. :attr:`num_channels` must be divisible by :attr:`num_groups`. The mean and standard-deviation are calculated separately over the each group. :math:`\gamma` and :math:`\beta` are learnable per-channel affine transform parameter vectors of size :attr:`num_channels` if :attr:`affine` is ``True``. The variance is calculated via the biased estimator, equivalent to `torch.var(input, unbiased=False)`. This layer uses statistics computed from input data in both training and evaluation modes. Args: num_groups (int): number of groups to separate the channels into num_channels (int): number of channels expected in input eps: a value added to the denominator for numerical stability. Default: 1e-5 affine: a boolean value that when set to ``True``, this module has learnable per-channel affine parameters initialized to ones (for weights) and zeros (for biases). Default: ``True``. Shape: - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}` - Output: :math:`(N, C, *)` (same shape as input) Examples:: >>> input = torch.randn(20, 6, 10, 10) >>> # Separate 6 channels into 3 groups >>> m = nn.GroupNorm(3, 6) >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm) >>> m = nn.GroupNorm(6, 6) >>> # Put all 6 channels into a single group (equivalent with LayerNorm) >>> m = nn.GroupNorm(1, 6) >>> # Activating the module >>> output = m(input) """ __constants__ = ["num_groups", "num_channels", "eps", "affine"] num_groups: int num_channels: int eps: float affine: bool def __init__( self, num_groups: int, num_channels: int, eps: float = 1e-5, affine: bool = True, device=None, dtype=None, ) -> None: factory_kwargs = {"device": device, "dtype": dtype} super().__init__() if num_channels % num_groups != 0: raise ValueError("num_channels must be divisible by num_groups") self.num_groups = num_groups self.num_channels = num_channels self.eps = eps self.affine = affine if self.affine: self.weight = Parameter(torch.empty(num_channels, **factory_kwargs)) self.bias = Parameter(torch.empty(num_channels, **factory_kwargs)) else: self.register_parameter("weight", None) self.register_parameter("bias", None) self.reset_parameters() def reset_parameters(self) -> None: if self.affine: init.ones_(self.weight) init.zeros_(self.bias) def forward(self, input: Tensor) -> Tensor: return F.group_norm(input, self.num_groups, self.weight, self.bias, self.eps) def extra_repr(self) -> str: return "{num_groups}, {num_channels}, eps={eps}, affine={affine}".format( **self.__dict__ )
[docs]class RMSNorm(Module): r"""Applies Root Mean Square Layer Normalization over a mini-batch of inputs. This layer implements the operation as described in the paper `Root Mean Square Layer Normalization <https://arxiv.org/pdf/1910.07467.pdf>`__ .. math:: y_i = \frac{x_i}{\mathrm{RMS}(x)} * \gamma_i, \quad \text{where} \quad \text{RMS}(x) = \sqrt{\epsilon + \frac{1}{n} \sum_{i=1}^{n} x_i^2} The RMS is taken over the last ``D`` dimensions, where ``D`` is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape` is ``(3, 5)`` (a 2-dimensional shape), the RMS is computed over the last 2 dimensions of the input. Args: normalized_shape (int or list or torch.Size): input shape from an expected input of size .. math:: [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1] \times \ldots \times \text{normalized\_shape}[-1]] If a single integer is used, it is treated as a singleton list, and this module will normalize over the last dimension which is expected to be of that specific size. eps: a value added to the denominator for numerical stability. Default: :func:`torch.finfo(x.dtype).eps` elementwise_affine: a boolean value that when set to ``True``, this module has learnable per-element affine parameters initialized to ones (for weights). Default: ``True``. Shape: - Input: :math:`(N, *)` - Output: :math:`(N, *)` (same shape as input) Examples:: >>> rms_norm = nn.RMSNorm([2, 3]) >>> input = torch.randn(2, 2, 3) >>> rms_norm(input) """ __constants__ = ["normalized_shape", "eps", "elementwise_affine"] normalized_shape: tuple[int, ...] eps: Optional[float] elementwise_affine: bool def __init__( self, normalized_shape: _shape_t, eps: Optional[float] = None, elementwise_affine: bool = True, device=None, dtype=None, ) -> None: factory_kwargs = {"device": device, "dtype": dtype} super().__init__() if isinstance(normalized_shape, numbers.Integral): # mypy error: incompatible types in assignment normalized_shape = (normalized_shape,) # type: ignore[assignment] self.normalized_shape = tuple(normalized_shape) # type: ignore[arg-type] self.eps = eps self.elementwise_affine = elementwise_affine if self.elementwise_affine: self.weight = Parameter( torch.empty(self.normalized_shape, **factory_kwargs) ) else: self.register_parameter("weight", None) self.reset_parameters()
[docs] def reset_parameters(self) -> None: """ Resets parameters based on their initialization used in __init__. """ if self.elementwise_affine: init.ones_(self.weight)
[docs] def forward(self, x: torch.Tensor) -> torch.Tensor: """ Runs forward pass. """ return F.rms_norm(x, self.normalized_shape, self.weight, self.eps)
[docs] def extra_repr(self) -> str: """ Extra information about the module. """ return ( "{normalized_shape}, eps={eps}, " "elementwise_affine={elementwise_affine}".format(**self.__dict__) )
# TODO: ContrastiveNorm2d # TODO: DivisiveNorm2d # TODO: SubtractiveNorm2d

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy