
    $hW              #       r   d dl Z d dl mZ ddlmZmZmZmZmZmZm	Z	m
Z
mZmZ d dlmZmZ ddgZ G d de          Zd	d
e
 de de	 dz   e_        	 	 	 	 	 d"dee         dee         dee         dee         dee         dee         dedee         dededededededededef"dZdee         dee         dee         dee         dee         dee         dedededededededededef d Zdee         dee         dee         dee         dee         dee         dedededededededededef d!ZdS )#    N)Tensor   )
	Optimizer_use_grad_for_differentiable
_get_value_dispatch_sqrt_stack_if_compiling_capturable_doc_differentiable_doc_foreach_doc_default_to_fused_or_foreach_view_as_real)ListOptionalNAdamnadamc            
       t     e Zd Z	 	 dddddd	ed
ee         dedef fdZ fdZd Zedd            Z	 xZ
S )r   Mb`?g?g+?:0yE>r   Mbp?FN)foreach
capturabledifferentiabledecoupled_weight_decayr   r   r   c                   d|k    st          d|           d|k    st          d|           d|d         cxk    rdk     sn t          d|d                    d|d         cxk    rdk     sn t          d|d                    d|k    st          d	|           d|k    st          d
|           t          ||||||||	|
	  	        }t                                          ||           d S )N        zInvalid learning rate: zInvalid epsilon value: r         ?z#Invalid beta parameter at index 0: r   z#Invalid beta parameter at index 1: zInvalid weight_decay value: zInvalid momentum_decay value: )	lrbetasepsweight_decaymomentum_decayr   r   r   r   )
ValueErrordictsuper__init__)selfparamsr   r    r!   r"   r#   r   r   r   r   defaults	__class__s               Q/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/torch/optim/nadam.pyr'   zNAdam.__init__
   sS    byy;r;;<<<czz<s<<===eAh$$$$$$$$M58MMNNNeAh$$$$$$$$M58MMNNNl""JLJJKKKn$$NnNNOOO2U%1./E 'J~_ _ _ 	*****    c                    t                                          |           | j        D ]Z}|                    dd            |                    dd           |                    dd           |                    dd           [t	          | j                                                  }t          |          dk    ot          j	        |d         d                   }|s;|D ]8}t          j
        t          |d                   t          j                  |d<   9t          |          dk    ot          j	        |d         d	                   }|s.|D ]-}t          j
        |d	         t          j                  |d	<   ,d S d S )
Nr   r   Fr   r   r   stepdtype
mu_product)r&   __setstate__param_groups
setdefaultliststatevalueslentorch	is_tensortensorfloatfloat32)r(   r7   groupstate_valuesstep_is_tensorsmu_product_is_tensorr+   s          r,   r3   zNAdam.__setstate__    s   U###& 	> 	>EY---\5111-u5555u====DJ--//00l++q0^eolSToV\F]6^6^ 	P! P P!Lqy)9)9OOO&		 #L 1 1Q 6jEOLYZO\hLi<j<j# 	U! U U"',qem"T"T"T,	U 	UU Ur-   c                    d}|d         D ]}	|	j         |t          j        |	          z  }|                    |	           |	j         j        rt          d          |                    |	j                    | j        |	         }
t          |
          dk    r|d         r&t          j        dt          j	        |	j
                  nt          j        dt          j	        	          |
d
<   |d         r&t          j        dt          j	        |	j
                  nt          j        dt          j	        	          |
d<   t          j        |	t          j                  |
d<   t          j        |	t          j                  |
d<   |                    |
d                    |                    |
d                    |                    |
d                    |                    |
d
                    |S )NFr)   z'NAdam does not support sparse gradientsr   r    )r1   devicer   r0   r/   r   r2   )memory_formatexp_avg
exp_avg_sq)gradr:   
is_complexappend	is_sparseRuntimeErrorr7   r9   zerosr>   rF   r<   ones
zeros_likepreserve_format)r(   r?   params_with_gradgradsexp_avgsexp_avg_sqsmu_productsstate_stepshas_complexpr7   s              r,   _init_groupzNAdam._init_group1   s   x 	2 	2Av!u/222 ''***6# R&'PQQQQV$$$
1u::?? !.[BemAHMMMM49LEM4Z4Z4Z &M !.[
2U]18LLLL49LEM4Z4Z4Z ,'
 (-'7I^'_'_'_E)$*/*:1ELa*b*b*bE,'i 0111""5#6777""5#6777""5=111r-   c                    |                                   d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]}g }g }g }g }g }g }	|d         \  }
}|                     |||||||	          }t          ||||||	|
||d         |d         |d         |d         |d         |d         |d	         |d
         |           |S )zPerforms a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr    r   r"   r#   r!   r   r   r   r   )beta1beta2r   r"   r#   r!   r   r   r   r   rY   ) _cuda_graph_capture_health_checkr:   enable_gradr4   r[   r   )r(   closurelossr?   rS   rT   rU   rV   rW   rX   r]   r^   rY   s                r,   r/   z
NAdam.stepT   sr    	--///"$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & 	+ 	+E!EHKKK >LE5**52BE8U`bmoz{{K"4[$^4!&'7!8El)./G)H	*"<0!&'7!8)!+ + + + +$ s   AA
A)r   r   r   r   r   FN)__name__
__module____qualname__boolr   r'   r3   r[   r   r/   __classcell__)r+   s   @r,   r   r   	   s        @DUZ+.2u(-+ + +NR+%d^+@D+ "&+ + + + + +,U U U U U"! ! !F "+ + + "!+ + + + +r-   a  Implements NAdam algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma_t \text{ (lr)}, \: \beta_1,\beta_2 \text{ (betas)},
                \: \theta_0 \text{ (params)}, \: f(\theta) \text{ (objective)}                   \\
            &\hspace{13mm} \: \lambda \text{ (weight decay)}, \:\psi \text{ (momentum decay)}    \\
            &\hspace{13mm} \: \textit{decoupled\_weight\_decay}                                  \\
            &\textbf{initialize} :  m_0 \leftarrow 0 \text{ ( first moment)},
                v_0 \leftarrow 0 \text{ ( second moment)}                                 \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \theta_t \leftarrow \theta_{t-1}                                       \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm}\textbf{if} \: \textit{decoupled\_weight\_decay}                       \\
            &\hspace{15mm} \theta_t \leftarrow \theta_{t-1} - \gamma \lambda \theta_{t-1}                    \\
            &\hspace{10mm}\textbf{else}                                                          \\
            &\hspace{15mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm} \mu_t \leftarrow \beta_1 \big(1 - \frac{1}{2}  0.96^{t \psi} \big)     \\
            &\hspace{5mm} \mu_{t+1} \leftarrow \beta_1 \big(1 - \frac{1}{2} 0.96^{(t+1)\psi}\big)\\
            &\hspace{5mm}m_t           \leftarrow   \beta_1 m_{t-1} + (1 - \beta_1) g_t          \\
            &\hspace{5mm}v_t           \leftarrow   \beta_2 v_{t-1} + (1-\beta_2) g^2_t          \\
            &\hspace{5mm}\widehat{m_t} \leftarrow \mu_{t+1} m_t/(1-\prod_{i=1}^{t+1}\mu_i)\\[-1.ex]
            & \hspace{11mm} + (1-\mu_t) g_t /(1-\prod_{i=1}^{t} \mu_{i})                         \\
            &\hspace{5mm}\widehat{v_t} \leftarrow   v_t/\big(1-\beta_2^t \big)                   \\
            &\hspace{5mm}\theta_t \leftarrow \theta_t - \gamma \widehat{m_t}/
                \big(\sqrt{\widehat{v_t}} + \epsilon \big)                                       \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Incorporating Nesterov Momentum into Adam`_.
    a  
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 2e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        momentum_decay (float, optional): momentum momentum_decay (default: 4e-3)
        decoupled_weight_decay (bool, optional): whether to use decoupled weight
            decay as in AdamW to obtain NAdamW (default: False)
        z	
        z

    .. _Incorporating Nesterov Momentum into Adam:
        https://openreview.net/forum?id=OM0jvwB8jIp57ZJjtNEZ
    .. _Decoupled Weight Decay Regularization:
        https://arxiv.org/abs/1711.05101

    Fr)   rT   rU   rV   rW   rX   r   r   r   r   rY   r]   r^   r   r"   r#   r!   c                   t          d |D                       st          d          t          d |D                       st          d          |t          | |	d          \  }}|r-t          j                                        rt          d          |r&t          j                                        st          }nt          } || ||||||||||||||	|
	           dS )
zpFunctional API that performs NAdam algorithm computation.

    See :class:`~torch.optim.NAdam` for details.
    c              3   J   K   | ]}t          |t          j                  V  d S rc   
isinstancer:   r   .0ts     r,   	<genexpr>znadam.<locals>.<genexpr>   .      @@qz!U\**@@@@@@r-   zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsc              3   J   K   | ]}t          |t          j                  V  d S rc   rk   rm   s     r,   rp   znadam.<locals>.<genexpr>   rq   r-   zPAPI has changed, `mu_products` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizers)
r]   r^   r   r"   r#   r   r!   r   r   rY   )allrN   r   r:   jitis_scripting_multi_tensor_nadam_single_tensor_nadam)r)   rT   rU   rV   rW   rX   r   r   r   r   rY   r]   r^   r   r"   r#   r!   _funcs                      r,   r   r      s)   4 @@K@@@@@ omnnn@@K@@@@@ omnnn1&.TYZZZ
7 U59))++ USTTT $uy--// $"#D					"& 6& " " " " " "r-   c       
   
         t          |           D ]\  }}||         }||         }||         }||         }||         }t          j        |          rPt          j        |          }t          j        |          }t          j        |          }t          j        |          }t          j                                        s6|r4|j        r|j        r|j        s|j        r|j        r|j        s
J d            |dz  }|r|}nt          |          }d||z  z
  }|	dk    r5|r|	                    d||	z  z
             n|
                    ||	          }|ddd||
z  z  z  z
  z  }|ddd|dz   |
z  z  z  z
  z  }||z  }|                    |d|z
             |	                    |                              ||d|z
             |                    |                                          }|s|ri|
                    |          }||z  }|| d|z
  z  d|z
  z  z  }|| |z  d|z
  z  z  }|                    ||           |                    ||           Gt          |          |z  }|                    |           |                    ||| d|z
  z  dt          |          z
  z             |                    ||| |z  d|z
  z             d S )	NzUIf capturable=True, params, mu_products, and state_steps must be CUDA or XLA tensors.r   r   alphar         ?Q?)value)	enumerater:   rK   view_as_real_utilsis_compilingis_cudais_xlar   mul_addlerp_addcmul_divsqrtaddcdiv_add_)r)   rT   rU   rV   rW   rX   r]   r^   r   r"   r#   r!   r   r   r   rY   iparamrJ   rH   rI   r2   step_tr/   bias_correction2mumu_nextdenommu_product_nexts                                r,   rx   rx      sp   $ f%% >[ >[5Qx1+ ^
 ^
QE"" 	8&u--E%d++D(11G+J77J |((** 	gz 	gg#-#5g:@.gNSlg_i_pgu{  vCg gfg g D 	! 	&DDf%%Du},1% ;

1rL001111xx\x:: b3$4.+@"ABBC2t^0K'L MMN 	b
 	dAI&&&''d!e)'DDD/005577 	[Z 	[IIcNNE )72OB3"r'?b:o>?D"w"2F!GHGNN4'''NN7E****(44w>OJJsOOONN4sb2g"zR\G]G]B]/^N```NN7E2#-BDX1YNZZZZ}>[ >[r-   c       
         	  
 t          |           dk    rd S |r
J d            t          j                                        s4|r2t	          d t          | ||          D                       s
J d            t          j        | |||||g          }|                                D ]#\  \  }}}}}}}|rt          ||||           |d         j
        r,t          j        |t          j        dd          d           nt          j        |d	           |	dk    r5|rt          j        |d	|	z  z
             nt          j        |||	          }t          j        ||d	z
             t          j        |           t          j        |||d	z
             t          j        |          }|r&t          j        |
          }t          j        d
|          }t          j        |d           t          j        |d           t          j        |           t          j        |
           t          j        d
|          }t          j        |d           t          j        |d           t          j        |           ~t          j        |          }t          j        |d           t          j        |           t          j        |           n,fd|D             }
fd|D             }
fd|D             }t          j        ||           t          j        ||           t          j        ||           ~|rt          j        |d           t          j        |           t          j        |d          }t          j        |           t          j        ||           |}~t          j        ||          }t          j        |           t          j        |d           t          j        ||           |}~t          j        ||          } t          j        | ||           t          j        || |           t5          fdt          ||          D                       }t5          fdt          ||          D                       }t          j        ||||           t          j        ||||           %d S )Nr   z#_foreach ops don't support autogradc              3   H   K   | ]\  }}}|j         o|j         o|j         V  d S rc   )r   )rn   rZ   mpr/   s       r,   rp   z&_multi_tensor_nadam.<locals>.<genexpr>g  sT       M M"q"d 9<< M M M M M Mr-   zNIf capturable=True, params, mu_products, and state_steps must be CUDA tensors.r   cpu)rF   r|   r   r   g      c           	      T    g | ]$}t          d t          |          z  z
            %S )r   )r   r   )rn   r/   r^   s     r,   
<listcomp>z'_multi_tensor_nadam.<locals>.<listcomp>  s4    #r#r#rVZN1u
4@P@P7P3P$Q$Q#r#r#rr-   c           	      L    g | ] }d ddt          |          z  z  z  z
  z  !S )r   r~   r   r   rn   r/   r]   r#   s     r,   r   z'_multi_tensor_nadam.<locals>.<listcomp>  s;    vvvZ^5BD1A1AN1R(S!TTUvvvr-   c           	      R    g | ]#}d ddt          |          dz   z  z  z  z
  z  $S )r   r~   r   r   r   r   s     r,   r   z'_multi_tensor_nadam.<locals>.<listcomp>  sO     9 9 9  cTz$7G7G!7K~6]-^&_!_` 9 9 9r-   c                 R    g | ]#\  }}d |z
  z  d t          |          z
  z  dz  $S r   r   )rn   r2   r   r   s      r,   r   z'_multi_tensor_nadam.<locals>.<listcomp>  sT     3h 3h 3h7Ez2 57"r'Nb:V`KaKaFa4bfh3h 3h 3h 3hr-   c                 R    g | ]#\  }}|z  d t          |          |z  z
  z  dz  $S r   r   )rn   r2   r   r   s      r,   r   z'_multi_tensor_nadam.<locals>.<listcomp>  sV     4s 4s 4s8K
G 68'\R*U_J`J`cjJjEj5koq4q 4s 4s 4sr-   )r9   r:   r   r   rt   zipr   "_group_tensors_by_device_and_dtyper8   r   is_cpu_foreach_add_r<   _foreach_mul__foreach_add_foreach_lerp__foreach_addcmul__foreach_sqrt_foreach_mul_foreach_pow_foreach_sub__foreach_neg__foreach_sqrt__foreach_div__foreach_sub_foreach_addcdiv_r	   )!r)   rT   rU   rV   rW   rX   r]   r^   r   r"   r#   r!   r   r   r   rY   grouped_tensorsgrouped_paramsgrouped_gradsgrouped_exp_avgsgrouped_exp_avg_sqsgrouped_mu_productsgrouped_state_stepsry   exp_avg_sq_sqrtexponentmusmu_nextsbias_correction_sqrtr   step_size_gradsstep_size_expavg	numerators!         ``` `                      r,   rw   rw   N  s   $ 6{{aDDDDD <$$&& ]: ] M M&)&+{&K&KM M M M M 	] 	]\	] 	] M
  BFES[]hju  xC  DD  E  EOO^OeOeOgOgli li 	L 
H.-)9	13F  	`.-9IK^___ q!( 	8 3U\#e5T5T5T\_````` 3Q7771% f#NA\8I4IJJJJ % 2=.Xd e e e 	-}a%iHHH/777 3]MSTW\S\]]]-.ABB 	9)*=~NNH$T844CT***S)))U+++ .999)$99H$///#...%000 #(#5e=P#Q#Q  4c::: 4555 !56666#r#r#r#r^q#r#r#r vvvvvbuvvvC9 9 9 9 9$79 9 9H 	/555O-ABBBOS111 ! '	iS)))R(((&':C@@E&&&U+++!O &':HEEE"--- s+++%000' *?MJJI#I/?AQRRR #NIOOOO1 3h 3h 3h 3hILM`beIfIf3h 3h 3h i iO2 4s 4s 4s 4sORSfhpOqOq4s 4s 4s  t  t #NM?Tcddd#N4DoWghhhhYli lir-   )FNFFF)r:   r   	optimizerr   r   r   r   r	   r
   r   r   r   r   typingr   r   __all__r   __doc__rg   r=   r   rx   rw   rE   r-   r,   <module>r      s          y y y y y y y y y y y y y y y y y y y y y y y y ! ! ! ! ! ! ! !G
w w w w wI w w wr#F 
  
  
  G9H */$("!&#:" :"$v, :"f:"L:" F|:" F|	:"
 F|:" #':" D>:" :" :" :" :" :"  !:"" #:"$  %:"& ':" :" :" :"zP[f P[ $VP[#'<P[ '+6lP[ '+6l	P[
 '+6lP[ !&P[ !&P[ #P[ (-P[ */P[ $P[ 26P[ &*P[ *.P[  '+!P[ P[ P[ P[fKiV Ki#F|Ki"&v,Ki &*&\Ki &*&\	Ki
 &*&\Ki  %Ki  %Ki "Ki ',Ki ).Ki #Ki 15Ki %)Ki )-Ki  &*!Ki Ki Ki Ki Ki Kir-   