
    $hCU                        d Z ddlZddlZddlmZ ddlZddlmZ d/dZd/dZ	d/dZ
d Zd	 Zd/d
Z	 	 	 d0dedededeej                 def
dZ	 	 	 d0dedededeej                 def
dZ	 	 	 	 	 d1dedededededeej                 defdZdededefdZdedefdZdedefdZd Zd2dZd  Z	 d3ded!edeej                 defd"Z	 	 d3ded!edeej                 defd#Zd$ Z	 	 	 	 d4deded'ed(edeej                 f
d)Z	 	 	 	 d4deded'ed(edeej                 f
d*Z	 	 d5deej                 fd+Z	 	 d6deej                 fd-Z d. Z! e!e          Z" e!e          Z# e!e          Z$ e!e          Z% e!e          Z& e!e          Z' e!e          Z( e!e          Z) e!e          Z* e!e          Z+ e!e           Z,dS )7zHThis file contains utilities for initializing neural network parameters.    N)Tensor)Optionalc                     t          j                    5  |                     |||          cd d d            S # 1 swxY w Y   d S N	generator)torchno_graduniform_tensorabr   s       M/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/torch/nn/init.py_no_grad_uniform_r      s    	 : :q!y99: : : : : : : : : : : : : : : : : :   9= =c                     t          j                    5  |                     |||          cd d d            S # 1 swxY w Y   d S r   )r	   r
   normal_r   meanstdr   s       r   _no_grad_normal_r      s    	 > >~~dC9~==> > > > > > > > > > > > > > > > > >r   c                 "   d }||d|z  z
  k     s||d|z  z   k    rt          j        dd           t          j                    5   |||z
  |z            } |||z
  |z            }|                     d|z  dz
  d|z  dz
  |           |                                  |                     |t          j        d          z             | 	                    |           | 
                    ||           | cd d d            S # 1 swxY w Y   d S )	Nc                 `    dt          j        | t          j        d          z            z   dz  S )N      ?       @)matherfsqrt)xs    r   norm_cdfz(_no_grad_trunc_normal_.<locals>.norm_cdf   s)    TXa$)B--/000B66       zjmean is more than 2 std from [a, b] in nn.init.trunc_normal_. The distribution of values may be incorrect.
stacklevel   r   r   )minmax)warningswarnr	   r
   r   erfinv_mul_r   r   add_clamp_)	r   r   r   r   r   r   r!   lus	            r   _no_grad_trunc_normal_r1      s   7 7 7 	q1s7{q1s7{ 2 2 E!"	$ 	$ 	$ 	$ 
   Ha$h#%&&Ha$h#%&& 	A	1q519	BBB 	 	C$)B--'(((D 	!###+                 s   B2DDDc                     t          j                    5  |                     |          cd d d            S # 1 swxY w Y   d S N)r	   r
   fill_r   vals     r   _no_grad_fill_r7   :   s    	 ! !||C  ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !s   6::c                     t          j                    5  |                                 cd d d            S # 1 swxY w Y   d S r3   )r	   r
   zero_r   s    r   _no_grad_zero_r;   ?   s}    	  ||~~                 s   599c                    g d}| |v s| dk    rdS | dk    rdS | dk    rt          j        d          S | dk    rw|d
}nUt          |t                    st          |t                    st          |t
                    r|}nt          d| d          t          j        dd|dz  z   z            S | dk    rdS t          d|            )a  Return the recommended gain value for the given nonlinearity function.

    The values are as follows:

    ================= ====================================================
    nonlinearity      gain
    ================= ====================================================
    Linear / Identity :math:`1`
    Conv{1,2,3}D      :math:`1`
    Sigmoid           :math:`1`
    Tanh              :math:`\frac{5}{3}`
    ReLU              :math:`\sqrt{2}`
    Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
    SELU              :math:`\frac{3}{4}`
    ================= ====================================================

    .. warning::
        In order to implement `Self-Normalizing Neural Networks`_ ,
        you should use ``nonlinearity='linear'`` instead of ``nonlinearity='selu'``.
        This gives the initial weights a variance of ``1 / N``,
        which is necessary to induce a stable fixed point in the forward pass.
        In contrast, the default gain for ``SELU`` sacrifices the normalization
        effect for more stable gradient flow in rectangular layers.

    Args:
        nonlinearity: the non-linear function (`nn.functional` name)
        param: optional parameter for the non-linear function

    Examples:
        >>> gain = nn.init.calculate_gain('leaky_relu', 0.2)  # leaky_relu with negative_slope=0.2

    .. _Self-Normalizing Neural Networks: https://papers.nips.cc/paper/2017/hash/5d44ee6f2c3f71b73125876103c8f6c4-Abstract.html
    )linearconv1dconv2dconv3dconv_transpose1dconv_transpose2dconv_transpose3dsigmoidr&   tanhg?relur   
leaky_reluN{Gz?znegative_slope z not a valid numberr#   selug      ?zUnsupported nonlinearity )r   r   
isinstanceboolintfloat
ValueError)nonlinearityparam
linear_fnsnegative_slopes       r   calculate_gainrS   D   s   D vuuJz!!\Y%>%>q			w			y~~		%	%=!NNE4(( 	KZs-C-C 	KzRWY^G_G_ 	K"NNIuIIIJJJyNa$7 78999			wC\CCDDDr"           r   r   r   r   r   returnc                     t           j                            |           r+t           j                            t          | f| |||          S t          | |||          S )a  Fill the input Tensor with values drawn from the uniform distribution.

    :math:`\mathcal{U}(a, b)`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the lower bound of the uniform distribution
        b: the upper bound of the uniform distribution
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.uniform_(w)
    r   )r	   	overrideshas_torch_function_variadichandle_torch_functionr   r   r   s       r   r   r   |   s`    ( 226:: 
44vi!qI 5 
 
 	
 VQ9555r"   r   r   c                     t           j                            |           r+t           j                            t          | f| |||          S t          | |||          S )a  Fill the input Tensor with values drawn from the normal distribution.

    :math:`\mathcal{N}(\text{mean}, \text{std}^2)`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.normal_(w)
    r   )r	   rW   rX   rY   r   r   r   s       r   r   r      s`    ( 226:: 
44fYvDcY 5 
 
 	
 FD#y999r"          r   c                 ,    t          | |||||          S )a  Fill the input Tensor with values drawn from a truncated normal distribution.

    The values are effectively drawn from the
    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
    with values outside :math:`[a, b]` redrawn until they are within
    the bounds. The method used for generating the random values works
    best when :math:`a \leq \text{mean} \leq b`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        a: the minimum cutoff value
        b: the maximum cutoff value
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.trunc_normal_(w)
    r   )r1   )r   r   r   r   r   r   s         r   trunc_normal_r]      s    8 "&$QYOOOOr"   r6   c                     t           j                            |           r)t           j                            t          | f| |          S t          | |          S )zFill the input Tensor with the value :math:`\text{val}`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        val: the value to fill the tensor with

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.constant_(w, 0.3)
    r5   )r	   rW   rX   rY   	constant_r7   r5   s     r   r_   r_      sP     226:: c44Y	RX^a4bbb&#&&&r"   c                 "    t          | d          S )zFill the input Tensor with the scalar value `1`.

    Args:
        tensor: an n-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.ones_(w)
    r   )r7   r:   s    r   ones_ra      s     &"%%%r"   c                      t          |           S )zFill the input Tensor with the scalar value `0`.

    Args:
        tensor: an n-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.zeros_(w)
    )r;   r:   s    r   zeros_rc      s     &!!!r"   c                     |                                  dk    rt          d          t          j                    5  t          j        | j        | | j        d ddd           n# 1 swxY w Y   | S )a=  Fill the 2-dimensional input `Tensor` with the identity matrix.

    Preserves the identity of the inputs in `Linear` layers, where as
    many inputs are preserved as possible.

    Args:
        tensor: a 2-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.eye_(w)
    r#   ,Only tensors with 2 dimensions are supported)outrequires_gradN)
ndimensionrN   r	   r
   eyeshaperg   r:   s    r   eye_rk      s     aGHHH	 Q Q	6<V6;OPPPPQ Q Q Q Q Q Q Q Q Q Q Q Q Q QMs   A$$A(+A(r&   c                 @   |                                  }|dvrt          d          |                                 }|d         |z  dk    rt          d          |d         |z  }t          ||d                   }t	          j                    5  |                                  t          |          D ]}t          |          D ]}|dk    r%d| ||z  |z   ||                     d          dz  f<   -|dk    r<d| ||z  |z   ||                     d          dz  |                     d          dz  f<   od| ||z  |z   ||                     d          dz  |                     d          dz  |                     d          dz  f<   	 d	d	d	           n# 1 swxY w Y   | S )
aF  Fill the {3, 4, 5}-dimensional input `Tensor` with the Dirac delta function.

    Preserves the identity of the inputs in `Convolutional`
    layers, where as many input channels are preserved as possible. In case
    of groups>1, each group of channels preserves identity

    Args:
        tensor: a {3, 4, 5}-dimensional `torch.Tensor`
        groups (int, optional): number of groups in the conv layer (default: 1)
    Examples:
        >>> w = torch.empty(3, 16, 5, 5)
        >>> nn.init.dirac_(w)
        >>> w = torch.empty(3, 24, 5, 5)
        >>> nn.init.dirac_(w, 3)
    )         z5Only tensors with 3, 4, or 5 dimensions are supportedr   z!dim 0 must be divisible by groupsr&   rm   r#   rn   N)rh   rN   sizer'   r	   r
   r9   range)r   groups
dimensionssizesout_chans_per_grpmin_dimgds           r   dirac_ry     s,     ""$$J""PQQQKKMMEQx&A<===aF*#U1X..G	 I Iv 		I 		IA7^^ I I??PQF10014aQ19LLMM1__23 10014aQ19L!;;q>>Q./ 0 0 HI 10014aQ19L!;;q>>Q.A!0CD E EI		II I I I I I I I I I I I I I I Ms   C8FFFc                 &   |                                  }|dk     rt          d          |                     d          }|                     d          }d}|                                  dk    r| j        dd          D ]}||z  }||z  }||z  }||fS )Nr#   zNFan in and fan out can not be computed for tensor with fewer than 2 dimensionsr&   r   )dimrN   rp   rj   )r   rs   num_input_fmapsnum_output_fmapsreceptive_field_sizesfan_infan_outs           r   _calculate_fan_in_and_fan_outr   ;  s    JA~~ijjjkk!nnO{{1~~zz||a abb! 	& 	&A A%  33F!55G7?r"   gainc                     t          |           \  }}|t          j        dt          ||z             z            z  }t          j        d          |z  }t	          | | ||          S )a  Fill the input `Tensor` with values using a Xavier uniform distribution.

    The method is described in `Understanding the difficulty of training
    deep feedforward neural networks` - Glorot, X. & Bengio, Y. (2010).
    The resulting tensor will have values sampled from
    :math:`\mathcal{U}(-a, a)` where

    .. math::
        a = \text{gain} \times \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}

    Also known as Glorot initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        gain: an optional scaling factor
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu'))
    r         @)r   r   r   rM   r   )r   r   r   r   r   r   r   s          r   xavier_uniform_r   N  sc    0 4F;;OFG
3v'7!8!8899
9C	#AVaRI666r"   c                     t          |           \  }}|t          j        dt          ||z             z            z  }t	          | d||          S )a  Fill the input `Tensor` with values using a Xavier normal distribution.

    The method is described in `Understanding the difficulty of training deep feedforward
    neural networks` - Glorot, X. & Bengio, Y. (2010). The resulting tensor
    will have values sampled from :math:`\mathcal{N}(0, \text{std}^2)` where

    .. math::
        \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}

    Also known as Glorot initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        gain: an optional scaling factor
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.xavier_normal_(w)
    r   rT   )r   r   r   rM   r   )r   r   r   r   r   r   s         r   xavier_normal_r   m  sO    2 4F;;OFG
3v'7!8!8899
9CFBY777r"   c                     |                                 }ddg}||vrt          d| d|           t          |           \  }}|dk    r|n|S )Nr   r   zMode z" not supported, please use one of )lowerrN   r   )r   modevalid_modesr   r   s        r   _calculate_correct_fanr     sh    ::<<DY'K;VVVVVWWW3F;;OFGX%%6672r"   r   rG   r   rO   c           	         t           j                            |           r,t           j                            t          | f| ||||          S d| j        v rt          j        d           | S t          | |          }t          ||          }|t          j        |          z  }t          j        d          |z  }t          j                    5  |                     | ||          cddd           S # 1 swxY w Y   dS )a  Fill the input `Tensor` with values using a Kaiming uniform distribution.

    The method is described in `Delving deep into rectifiers: Surpassing
    human-level performance on ImageNet classification` - He, K. et al. (2015).
    The resulting tensor will have values sampled from
    :math:`\mathcal{U}(-\text{bound}, \text{bound})` where

    .. math::
        \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}

    Also known as He initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the negative slope of the rectifier used after this layer (only
            used with ``'leaky_relu'``)
        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
            preserves the magnitude of the variance of the weights in the
            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
            backwards pass.
        nonlinearity: the non-linear function (`nn.functional` name),
            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.kaiming_uniform_(w, mode='fan_in', nonlinearity='relu')
    )r   r   r   rO   r   r   ,Initializing zero-element tensors is a no-opr   r   N)r	   rW   rX   rY   kaiming_uniform_rj   r)   r*   r   rS   r   r   r
   r   )	r   r   r   rO   r   fanr   r   bounds	            r   r   r     sU   F 226:: !44I% 5 ! ! 	! 	FLDEEE
 
.
.C,**D
3
CIcNNS E	 C Cvu	BBC C C C C C C C C C C C C C C C C Cs   C22C69C6c                 8   d| j         v rt          j        d           | S t          | |          }t	          ||          }|t          j        |          z  }t          j                    5  | 	                    d||          cddd           S # 1 swxY w Y   dS )a  Fill the input `Tensor` with values using a Kaiming normal distribution.

    The method is described in `Delving deep into rectifiers: Surpassing
    human-level performance on ImageNet classification` - He, K. et al. (2015).
    The resulting tensor will have values sampled from
    :math:`\mathcal{N}(0, \text{std}^2)` where

    .. math::
        \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}

    Also known as He initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the negative slope of the rectifier used after this layer (only
            used with ``'leaky_relu'``)
        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
            preserves the magnitude of the variance of the weights in the
            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
            backwards pass.
        nonlinearity: the non-linear function (`nn.functional` name),
            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.kaiming_normal_(w, mode='fan_out', nonlinearity='relu')
    r   r   r   N)
rj   r)   r*   r   rS   r   r   r	   r
   r   )r   r   r   rO   r   r   r   r   s           r   kaiming_normal_r     s    F 	FLDEEE
 
.
.C,**D
3
C	 ; ;~~a	~::; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;s   *BBBc                    |                                  dk     rt          d          |                                 dk    r| S |                     d          }|                                 |z  }|                     ||                              dd|          }||k     r|                                 t          j        	                    |          \  }}t          j
        |d          }|                                }	||	z  }||k     r|                                 t          j                    5  |                     |                              |           |                     |           ddd           n# 1 swxY w Y   | S )a  Fill the input `Tensor` with a (semi) orthogonal matrix.

    Described in `Exact solutions to the nonlinear dynamics of learning in deep
    linear neural networks` - Saxe, A. et al. (2013). The input tensor must have
    at least 2 dimensions, and for tensors with more than 2 dimensions the
    trailing dimensions are flattened.

    Args:
        tensor: an n-dimensional `torch.Tensor`, where :math:`n \geq 2`
        gain: optional scaling factor
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK)
        >>> w = torch.empty(3, 5)
        >>> nn.init.orthogonal_(w)
    r#   z4Only tensors with 2 or more dimensions are supportedr   r&   r   N)rh   rN   numelrp   newr   t_r	   linalgqrdiagsignr
   view_ascopy_r,   )
r   r   r   rowscols	flattenedqrrx   phs
             r   orthogonal_r     s   , QOPPP||~~;;q>>D<<>>T!D

4&&..q!y.IIId{{ <??9%%DAq
1aA	
BGAd{{		  q"""D               Ms   1>E;;E?E?rH   c                    |                                  dk    rt          d          | j        \  }}t          t	          j        ||z                      }t          j                    5  |                     d||           t          |          D ]'}t          j
        |          }|d|         }	d| |	|f<   (	 ddd           n# 1 swxY w Y   | S )a  Fill the 2D input `Tensor` as a sparse matrix.

    The non-zero elements will be drawn from the normal distribution
    :math:`\mathcal{N}(0, 0.01)`, as described in `Deep learning via
    Hessian-free optimization` - Martens, J. (2010).

    Args:
        tensor: an n-dimensional `torch.Tensor`
        sparsity: The fraction of elements in each column to be set to zero
        std: the standard deviation of the normal distribution used to generate
            the non-zero values
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.sparse_(w, sparsity=0.1)
    r#   re   r   r   N)rh   rN   rj   rL   r   ceilr	   r
   r   rq   randperm)
r   sparsityr   r   r   r   	num_zeroscol_idxrow_indiceszero_indicess
             r   sparse_r   .  s   . aGHHHJD$DIho..//I	 . .q#333T{{ 	. 	.G...K&z	z2L,-F<())	.. . . . . . . . . . . . . . . Ms   )ACC
C
c                 l      j         d d          fd}d d d d|_        |_         |S )Nc                  N    t          j        d d dd            | i |S )Nznn.init.z' is now deprecated in favor of nn.init..r#   r$   )r)   r*   )argskwargsmethnew_nameold_names     r   deprecated_initz(_make_deprecate.<locals>.deprecated_initY  sC    ]]]RZ]]]jklllltT$V$$$r"   z
    z_(...)

    .. warning::
        This method is now deprecated in favor of :func:`torch.nn.init.z"`.

    See :func:`~torch.nn.init.z` for details.)__name____doc__)r   r   r   r   s   ` @@r   _make_deprecater   U  s    }H}H% % % % % % %:: : IQ	: :  (: : :O  (Or"   r3   )rT   r   N)rT   r   r[   r   N)r&   )r   N)r   r   rG   N)r&   N)rH   N)-r   r   r)   r	   r   typingr   	_Optionalr   r   r1   r7   r;   rS   rM   	Generatorr   r   r]   r_   ra   rc   rk   ry   r   r   r   r   strr   r   r   r   r   uniformnormalconstantri   diracxavier_uniformxavier_normalkaiming_uniformkaiming_normal
orthogonalsparse r"   r   <module>r      s   N N          ( ( ( ( ( (: : : :
> > > >
       F! ! !
  
5E 5E 5E 5Et ,0	6 666 6 )	6
 6 6 6 6: ,0	: ::
: 
: )	:
 : : : :8 ,0P PP
P 
P 	P
 P )P P P P P>'f '5 'V ' ' ' ' 
&& 
&V 
& 
& 
& 
&
"6 
"f 
" 
" 
" 
"  *) ) ) )X  ( PT7 7772;EO2L77 7 7 7B ,08 88
8 )8 	8 8 8 8>3 3 3 $,05C 5C5C5C 5C 	5C
 )5C 5C 5C 5Ct $,0*; *;*;*; *; 	*;
 )*; *; *; *;^ 
,00 0 )0 0 0 0l 	,0	# # )	# # # #N  & /(
#
#		!	!?9%%od 11//!/"233 11_[))
		!	!r"   