
    $h	,                        d dl Z d dl mZ ddlmZmZmZmZmZmZm	Z	 d dl
mZmZ ddgZ G d de          Zd	d
e de de dz   e_        	 	 	 ddee         dee         dee         dee         dee         dededededededefdZdee         dee         dee         dee         dededededededefdZdee         dee         dee         dee         dededededededefdZdS )    N)Tensor   )	Optimizer_use_grad_for_differentiable_default_to_fused_or_foreach_differentiable_doc_foreach_doc_maximize_doc_view_as_real)ListOptionalAdadeltaadadeltac                   t     e Zd Z	 	 	 	 	 dddddee         d	ed
ef fdZ fdZd Zedd            Z	 xZ
S )r         ??ư>r   NF)maximizedifferentiableforeachr   r   c          	      J   d|k    st          d|           d|cxk    rdk    sn t          d|           d|k    st          d|           d|k    st          d|           t          |||||||          }	t                                          ||	           d S )Ng        zInvalid learning rate: r   zInvalid rho value: zInvalid epsilon value: zInvalid weight_decay value: )lrrhoepsweight_decayr   r   r   )
ValueErrordictsuper__init__)selfparamsr   r   r   r   r   r   r   defaults	__class__s             T/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/torch/optim/adadelta.pyr   zAdadelta.__init__   s     byy;r;;<<<c    S    8388999czz<s<<===l""JLJJKKK%)
 
 
 	*****    c                     t                                          |           | j        D ]D}|                    dd            |                    dd           |                    dd           Ed S )Nr   r   Fr   )r   __setstate__param_groups
setdefault)r    stategroupr#   s      r$   r'   zAdadelta.__setstate__,   s}    U###& 	6 	6EY---Z///-u5555	6 	6r%   c                 X   d}|d         D ]}|j         |t          j        |          z  }|                    |           |j         j        rt          d          |                    |j                    | j        |         }t          |          dk    rKd|d<   t          j        |t          j	                  |d<   t          j        |t          j	                  |d<   |                    |d                    |                    |d                    |dxx         d	z  cc<   |S )
NFr!   z*Adadelta does not support sparse gradientsr   step)memory_format
square_avg	acc_deltar   )
gradtorch
is_complexappend	is_sparseRuntimeErrorr*   len
zeros_likepreserve_format)	r    r+   params_with_gradgradssquare_avgs
acc_deltashas_complexpr*   s	            r$   _init_groupzAdadelta._init_group3   s?   x 	 	Av~5+A...K##A&&&v Q"#OPPPLL   JqME 5zzQ !f&+&6U%:' ' 'l# &+%5U%:& & &k" u\2333eK0111&MMMQMMMMr%   c                 z   d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]y}g }g }g }g }|d         |d         |d         |d         |d         |d         |d         f\  }}	}
}}}}|                     |||||          }t	          ||||||	|
|||||	           z|S )
zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r   r   r   )r   r   r   r   r   r   r   r>   )r2   enable_gradr(   r@   r   )r    closurelossr+   r:   r;   r<   r=   r   r   r   r   r   r   r   r>   s                   r$   r-   zAdadelta.stepP   sZ    "$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & 	 	E!EKJdeen%i j!&'MIBS,> **52BE;XbccK )!-'     s   /33)r   r   r   r   N)N)__name__
__module____qualname__r   boolr   r'   r@   r   r-   __classcell__)r#   s   @r$   r   r      s         "&+ $+ + + $+ + + + + + + +@6 6 6 6 6  : ", , , "!, , , , ,r%   a  Implements Adadelta algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)},
                \: f(\theta) \text{ (objective)}, \: \rho \text{ (decay)},
                \: \lambda \text{ (weight decay)}                                                \\
            &\textbf{initialize} :  v_0  \leftarrow 0 \: \text{ (square avg)},
                \: u_0 \leftarrow 0 \: \text{ (accumulate variables)}                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm}if \: \lambda \neq 0                                                    \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda  \theta_{t-1}                            \\
            &\hspace{5mm} v_t      \leftarrow v_{t-1} \rho + g^2_t (1 - \rho)                    \\
            &\hspace{5mm}\Delta x_t    \leftarrow   \frac{\sqrt{u_{t-1} +
                \epsilon }}{ \sqrt{v_t + \epsilon}  }g_t \hspace{21mm}                           \\
            &\hspace{5mm} u_t  \leftarrow   u_{t-1}  \rho +
                 \Delta x^2_t  (1 - \rho)                                                        \\
            &\hspace{5mm}\theta_t      \leftarrow   \theta_{t-1} - \gamma  \Delta x_t            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `ADADELTA: An Adaptive Learning Rate Method`_.
    a  
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        rho (float, optional): coefficient used for computing a running average
            of squared gradients (default: 0.9). A higher value of `rho` will
            result in a slower average, which can be helpful for preventing
            oscillations in the learning process.
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-6).
        lr (float, optional): coefficient that scale delta before it is applied
            to the parameters (default: 1.0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        zd

    .. _ADADELTA\: An Adaptive Learning Rate Method:
        https://arxiv.org/abs/1212.5701

    Fr!   r;   r<   r=   r   r   r>   r   r   r   r   r   c                   |t          | |d          \  }}|r-t          j                                        rt	          d          |r&t          j                                        st
          }nt          } || ||||||	|
|||           dS )zvFunctional API that performs Adadelta algorithm computation.

    See :class:`~torch.optim.Adadelta` for details.
    NF)	use_fusedz6torch.jit.script not supported with foreach optimizers)r   r   r   r   r   r   r>   )r   r2   jitis_scriptingr6   _multi_tensor_adadelta_single_tensor_adadelta)r!   r;   r<   r=   r   r   r>   r   r   r   r   r   _funcs                 r$   r   r      s    , 1&.TYZZZ
7 U59))++ USTTT 'uy--// '%&D!%     r%   c                v   t          | |||          D ]\  }}}}|s|n| }|dk    r|                    ||          }t          j        |          r<t          j        |          }t          j        |          }t          j        |          }|                    |                              ||d|z
             |                    |                                          }|                    |                                          }|	r|                                }|	                    |                              |           |                    |                              ||d|z
             t          j        |          rt          j
        |          }|                    ||            d S )Nr   alphar   value)zipaddr2   r3   view_as_realmul_addcmul_sqrt_clonediv_view_as_complexadd_)r!   r;   r<   r=   r   r   r   r   r   r   r>   paramr1   r/   r0   stddeltas                    r$   rO   rO      s    14{J1 1 % %,j) $.tt$188E866DE"" 	,+J77J*955I%d++D%%dDC%@@@nnS!!''))c""((** 	"KKMME

3T"""s$$UES$AAAE"" 	1)%00E

5
$$$$/% %r%   c                d   |	r
J d            t          |           dk    rd S t          j        | |||g          }|                                D ]`\  \  }}}}}|rt	          j        |          }|
rt          ||||           |dk    r1|rt	          j        |||           nt	          j        |||          }t	          j	        ||           t	          j
        |||d|z
             t	          j        ||          }t	          j        |           t	          j        ||          }t	          j        |           t	          j        ||           t	          j	        ||           t	          j        |||            t	          j	        ||           t	          j
        |||d|z
             bd S )Nz#_foreach ops don't support autogradr   rS   r   rU   )r7   r   "_group_tensors_by_device_and_dtypevaluesr2   _foreach_negr   _foreach_add__foreach_add_foreach_mul__foreach_addcmul__foreach_sqrt__foreach_div_)r!   r;   r<   r=   r   r   r   r   r   r   r>   grouped_tensorsdevice_paramsdevice_gradsdevice_square_avgsdevice_acc_deltasrP   rb   deltass                      r$   rN   rN     s    DDDDD
6{{aBFES^`jCkllOUdUkUkUmUm R RQ	M-'9;Lq 	< -l;;L 	^-7IK\]]]1 c#L-|TTTTT$1,Uabbb.444 2L,VWZ]V]^^^^ !3S99S!!!#$5s;;V$$$FC(((FL111M6"====-s333 166SQQQQQ9R Rr%   )NFF)r2   r   	optimizerr   r   r   r   r	   r
   r   typingr   r   __all__r   __doc__rH   floatr   rO   rN    r%   r$   <module>rz      s         Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y ! ! ! ! ! ! ! !z
"r r r r ry r r rj6 
  
  
  7/ r # - -L-<- f- V	- d^- - - 	- 
- 
- - - - - -`&%L&%<&% f&% V	&% 	&% 
&% 
&% &% &% &% &% &% &% &%R1RL1R<1R f1R V	1R 	1R 1R 
1R 
1R 1R 1R 1R 1R 1R 1R 1R 1Rr%   