
    $hz2                     $   d dl Z d dl mZ ddlmZmZmZmZmZmZm	Z	m
Z
mZ d dlmZmZ ddgZ G d de          Zd	d
e
 de	 de dz   e_        	 	 	 	 ddee         dee         dee         dee         dee         dee         dedededededededefdZdee         dee         dee         dee         dee         dedededededededefdZdee         dee         dee         dee         dee         dedededededededefdZdS )     N)Tensor   )		Optimizer_use_grad_for_differentiable
_get_value_stack_if_compiling_default_to_fused_or_foreach_differentiable_doc_maximize_doc_foreach_doc_view_as_real)ListOptionalAdamaxadamaxc                   t     e Zd Z	 	 	 	 	 dddddee         d	ed
ef fdZ fdZd Zedd            Z	 xZ
S )r   Mb`?g?g+?:0yE>r   NF)maximizedifferentiableforeachr   r   c          	         d|k    st          d|           d|k    st          d|           d|d         cxk    rdk     sn t          d|d                    d|d         cxk    rdk     sn t          d|d                    d|k    st          d	|           t          |||||||
          }	t                                          ||	           d S )N        zInvalid learning rate: zInvalid epsilon value: r         ?z#Invalid beta parameter at index 0: r   z#Invalid beta parameter at index 1: zInvalid weight_decay value: )lrbetasepsweight_decayr   r   r   )
ValueErrordictsuper__init__)selfparamsr   r   r   r   r   r   r   defaults	__class__s             R/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/torch/optim/adamax.pyr#   zAdamax.__init__   s.    byy;r;;<<<czz<s<<===eAh$$$$$$$$M58MMNNNeAh$$$$$$$$M58MMNNNl""JLJJKKK%)
 
 
 	*****    c                    t                                          |           | j        D ]D}|                    dd            |                    dd           |                    dd           Et	          | j                                                  }t          |          dk    ot          j	        |d         d                   }|s;|D ]:}t          j
        t          |d                   t          j                  |d<   9d S d S )Nr   r   Fr   r   stepdtype)r"   __setstate__param_groups
setdefaultliststatevalueslentorch	is_tensortensorfloatfloat32)r$   r2   groupstate_valuesstep_is_tensorsr'   s         r(   r.   zAdamax.__setstate__/   s   U###& 	6 	6EY---Z///-u5555DJ--//00l++q0 
eoOF#7
 7
  	P! P P!Lqy)9)9OOO&			P 	PP Pr)   c                    d}|d         D ]F}|j         |t          j        |          z  }|                    |           |j         j        rt          d          |                    |j                    | j        |         }	t          |	          dk    rit          j        dt          j	                  |	d<   t          j
        |t          j                  |	d	<   t          j
        |t          j                  |	d
<   |                    |	d	                    |                    |	d
                    |                    |	d                    H|S )NFr%   z(Adamax does not support sparse gradientsr   r   r,   r+   )memory_formatexp_avgexp_inf)gradr5   
is_complexappend	is_sparseRuntimeErrorr2   r4   r7   r9   
zeros_likepreserve_format)
r$   r:   params_with_gradgradsexp_avgsexp_infsstate_stepshas_complexpr2   s
             r(   _init_groupzAdamax._init_group=   sO   x 	. 	.Av~5+A...K##A&&&v O"#MNNNLL   JqME 5zzQ %S F F Ff#(#3U%:$ $ $i  $)#3U%:$ $ $i  OOE),---OOE),---uV}----r)   c                    d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]~}g }g }g }g }g }|d         \  }	}
|d         }|d         }|d         }|d         }|d         }|d         }|                     ||||||          }t	          |||||||	|
||||||	           |S )
zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r   r   r   )	r   beta1beta2r   r   r   r   r   rN   )r5   enable_gradr/   rP   r   )r$   closurelossr:   rI   rJ   rK   rL   rM   rR   rS   r   r   r   r   r   r   rN   s                     r(   r+   zAdamax.stepY   sa    "$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! &  	  	E!EHHK >LE5,CtB 0LI&GZ(H"#34N**52BE8U]_jkkK )!-'    " s   /33)r   r   r   r   NN)__name__
__module____qualname__r   boolr#   r.   rP   r   r+   __classcell__)r'   s   @r(   r   r      s         "& + $ +  +  + $ +  +  +  +  +  +  +  +DP P P P P  8 ". . . "!. . . . .r)   a  Implements Adamax algorithm (a variant of Adam based on infinity norm).

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \beta_1, \beta_2
                \text{ (betas)},\theta_0 \text{ (params)},f(\theta) \text{ (objective)},
                \: \lambda \text{ (weight decay)},                                                \\
            &\hspace{13mm}    \epsilon \text{ (epsilon)}                                          \\
            &\textbf{initialize} :  m_0 \leftarrow 0 \text{ ( first moment)},
                u_0 \leftarrow 0 \text{ ( infinity norm)}                                 \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm}if \: \lambda \neq 0                                                    \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda  \theta_{t-1}                            \\
            &\hspace{5mm}m_t      \leftarrow   \beta_1 m_{t-1} + (1 - \beta_1) g_t               \\
            &\hspace{5mm}u_t      \leftarrow   \mathrm{max}(\beta_2 u_{t-1}, |g_{t}|+\epsilon)   \\
            &\hspace{5mm}\theta_t \leftarrow \theta_{t-1} - \frac{\gamma m_t}{(1-\beta^t_1) u_t} \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adam: A Method for Stochastic Optimization`_.
    a
  
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 2e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        zd

    .. _Adam\: A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980

    Fr%   rJ   rK   rL   rM   r   r   r   rN   r   rR   rS   r   r   c	                p   t          d |D                       st          d          |t          | |d          \  }}|r-t          j                                        rt          d          |r&t          j                                        st          }nt          } || |||||	|
||||||           dS )zrFunctional API that performs adamax algorithm computation.

    See :class:`~torch.optim.Adamax` for details.
    c              3   J   K   | ]}t          |t          j                  V  d S rW   )
isinstancer5   r   ).0ts     r(   	<genexpr>zadamax.<locals>.<genexpr>   s.      @@qz!U\**@@@@@@r)   zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizers)r   rR   rS   r   r   r   r   rN   )allrF   r	   r5   jitis_scripting_multi_tensor_adamax_single_tensor_adamax)r%   rJ   rK   rL   rM   r   r   r   rN   r   rR   rS   r   r   _funcs                   r(   r   r      s    . @@K@@@@@ 
^
 
 	
 1&.TYZZZ
7 U59))++ USTTT %uy--// %#$D!%     r)   c                   t          |           D ]\  }}||         }|
s|n| }||         }||         }||         }|dz  }|	dk    r|                    ||	          }t          j        |          rPt          j        |          }t          j        |          }t          j        |          }t          j        |          }|                    |d|z
             t          j        |                    |                              d          |	                                
                    |                              d          gd          }|st          j        |dd|           n*|                    t          j        |dd                     d|t          |          z  z
  }||z  }|                    |||            d S )Nr   r   alphaFkeepdimout)ro   )value)	enumerateaddr5   rC   view_as_reallerp_catmul_	unsqueezeabsadd_
unsqueeze_amaxcopy_r   addcdiv_)r%   rJ   rK   rL   rM   r   rR   rS   r   r   r   r   rN   iparamrB   r@   rA   step_tnorm_bufbias_correctionclrs                         r(   rh   rh      s   " f%% !5 !55Qx#.tt$1+1+Q!188E866DE"" 	2&u--E%d++D(11G(11G 	dAI&&&9\\%  **1--txxzzs/C/C/N/Nq/Q/QRTU
 
  	BJxEw?????MM%*Xq%@@@AAAez&'9'999?"wt4444C!5 !5r)   c          
      h   |r
J d            t          |           dk    rd S t          j        | ||||g          }|                                D ]\  \  }}}}}}|
rt	          j        |          }|rt          ||||           |d         j        r,t	          j        |t	          j	        dd          d           nt	          j        |d           |dk    r1|
rt	          j        |||           nt	          j
        |||          }t	          j        ||dz
             t	          j        ||           t          ||          D ]\  }}t	          j        |                    d          |                                                    |	                              d          gd          }t	          j        |dd||                                                                f	           fd
|D             }t-          fd|D                       }t	          j        ||||           d S )Nz#_foreach ops don't support autogradr   r   cpu)devicerl   r   Frn   c                 :    g | ]}d t          |          z  z
  S )r   )r   )r`   r+   rR   s     r(   
<listcomp>z(_multi_tensor_adamax.<locals>.<listcomp>_  s+    ZZZdAD)9)9 99ZZZr)   c                      g | ]
}d |z  z  S ) )r`   r   r   s     r(   r   z(_multi_tensor_adamax.<locals>.<listcomp>`  s#    "g"g"g?2o)=#>"g"g"gr)   )r4   r   "_group_tensors_by_device_and_dtyper3   r5   _foreach_negr   is_cpu_foreach_add_r7   _foreach_add_foreach_lerp__foreach_mul_ziprv   rx   ry   rz   r{   maxnewlongr   _foreach_addcdiv_)r%   rJ   rK   rL   rM   rR   rS   r   r   r   r   r   rN   grouped_tensorsgrouped_paramsgrouped_gradsgrouped_exp_avgsgrouped_exp_infsgrouped_state_stepsri   rA   rB   r   bias_correctionsr   s        ` `                 r(   rg   rg   %  s   " DDDDD
6{{aBFES[]egrCsttOixii  jB  jB &Y &Ye	a.-)9;KM`cd 	>!.}==M 	].-9IK[\\\ q!( 	8 3U\#e5T5T5T\_````` 3Q7771 f#M>VVVVV % 2=.Xd e e e 	-}a%iHHH 	,e444 !1=AA 	W 	WMGTy""1%%txxzzs';';'F'Fq'I'IJA H Ih5w@R@R@T@T6UVVVVVZZZZFYZZZ!"g"g"g"gVf"g"g"ghh0@BRTWXXXXM&Y &Yr)   )NFFF)r5   r   	optimizerr   r   r   r   r	   r
   r   r   r   typingr   r   __all__r   __doc__r[   r8   r   rh   rg   r   r)   r(   <module>r      s]         ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' " ! ! ! ! ! ! !X
| | | | |Y | | |~2 
  
  
  3*j # 5 5L5<5 6l5 6l	5
 f5 d^5 5 5 5 
5 5 5  	!5" #5 5 5 5p25L25<25 6l25 6l	25
 f25 
25 25 25 	25 25 25 25 25 25 25 25j=YL=Y<=Y 6l=Y 6l	=Y
 f=Y =Y =Y 	=Y =Y 
=Y =Y =Y =Y =Y =Y =Y =Y =Yr)   