
    Vhe1                        d Z ddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ ddlZd	d
lmZ d	dlmZmZ d	dlmZmZ d dej        eej        f         defdZ G d de          Z  G d de	          Z! G d de	          Z" G d d          Z#dddddZ$ G d d          Z%d Z& G d de          Z'dS )!z
Support for PyTorch lightning. You should just replace the call
to `Trainer(...)` with `get_trainer(...)`.
For using `dora.log.LogProgress` as a progress bar with PL, see `PLLogProgress`.
    N)LightningModule)Callback)ProgressBarBase)ClusterEnvironment)Trainer)from_argparse_args   )distrib)get_xpis_xp)boldLogProgressTmetricsepochc                    i }|                                  D ]\  }}|r|                    d          r|s|                    d          r5|                    d          s|                    d          r|                    dd          d         }t          |t          j                  r,|                                dk    r|                                }|||<   |S )a  Filters metrics before formatting, in particular to remove the `_step` or `_epoch`
    suffix. This will also convert torch tensors to float.
    Args:
        metrics: dict given by PL.
        epoch: if True, keep only epoch level metrics, otherwise, keep only step level metrics.
    _step_epoch_r	   r   )itemsendswithrsplit
isinstancetorchTensornumelitem)r   r   outkeyvalues        T/var/www/html/movieo_spanner_bot/venv/lib/python3.11/site-packages/dora/lightning.py_filter_metricsr!      s     Cmmoo 	 	
U 	S\\'** 	 	h// 	<<   	(CLL$:$: 	(**S!$$Q'CeU\** 	!u{{}}/A/AJJLLECJ    c                        e Zd Z fdZdefdZedefd            ZdefdZ	deddfdZ
defd	Zd
eddfdZdefdZdefdZedefd            Zedefd            Zedefd            Z xZS )DoraEnvironmentc                     t                                                       t          j                    | _        t          j                     d S N)super__init__r
   get_distrib_specspecset_distrib_envself	__class__s    r    r(   zDoraEnvironment.__init__4   s>    ,..	!!!!!r"   returnc                     dS NT r-   s    r    creates_childrenz DoraEnvironment.creates_children9   s    tr"   c                     dS r1   r2   r3   s    r    creates_processes_externallyz,DoraEnvironment.creates_processes_externally<   s    tr"   c                     | j         j        S r&   )r*   
world_sizer3   s    r    r8   zDoraEnvironment.world_size@       y##r"   sizeNc                     d S r&   r2   )r-   r:   s     r    set_world_sizezDoraEnvironment.set_world_sizeC       r"   c                     | j         j        S r&   )r*   rankr3   s    r    global_rankzDoraEnvironment.global_rankF   s    y~r"   r?   c                     d S r&   r2   )r-   r?   s     r    set_global_rankzDoraEnvironment.set_global_rankI   r=   r"   c                     | j         j        S r&   )r*   
local_rankr3   s    r    rD   zDoraEnvironment.local_rankL   r9   r"   c                     | j         j        S r&   )r*   	node_rankr3   s    r    rF   zDoraEnvironment.node_rankO   s    y""r"   c                      dS NFr2   r2   r"   r    detectzDoraEnvironment.detectR   s    ur"   c                 &    t           j        d         S )N	MAIN_ADDR)osenvironr3   s    r    main_addresszDoraEnvironment.main_addressV   s    z+&&r"   c                 @    t          t          j        d                   S )N	MAIN_PORT)intrL   rM   r3   s    r    	main_portzDoraEnvironment.main_portZ   s    2:k*+++r"   )__name__
__module____qualname__r(   boolr4   propertyr6   rQ   r8   r<   r@   rB   rD   rF   staticmethodrI   strrN   rR   __classcell__r.   s   @r    r$   r$   3   s       " " " " "
$     d    X$C $ $ $ $3 4    S    C D    $C $ $ $ $#3 # # # # D    \ 'c ' ' ' X' ,3 , , , X, , , , ,r"   r$   c                   $    e Zd ZdZd Zd Zd ZdS )DoraCheckpointSyncz>Make sure Dora history, and checkpoint state are in sync.
    c                 ,    t                      | _        d S r&   )r   xpr3   s    r    r(   zDoraCheckpointSync.__init__b   s    ((r"   c                 T    |d         }| j         j                            |           d S )Ndora_link_history)r_   linkupdate_history)r-   trainer	pl_module
checkpointhistorys        r    on_load_checkpointz%DoraCheckpointSync.on_load_checkpointe   s+    01##G,,,,,r"   c                 j    | j         j        j        |d<   | j         j        |d<   | j         j        |d<   |S )Nra   dora_sigdora_cfg)r_   rb   rg   sigcfgr-   rd   re   rf   s       r    on_save_checkpointz%DoraCheckpointSync.on_save_checkpointi   s6    *.',*>
&'!%
:!%
:r"   N)rS   rT   rU   __doc__r(   rh   ro   r2   r"   r    r]   r]   _   sK           - - -    r"   r]   c                   4     e Zd ZdZ fdZd Zd Zd Z xZS )DoraHistoryLoggerz,Save metrics to Dora using the XP link.
    c                 x    t                                                       t                      j        | _        d S r&   )r'   r(   r   rb   r,   s    r    r(   zDoraHistoryLogger.__init__s   s*    HHM			r"   c                     d| _         d S r1   _first_validr-   rd   re   s      r    on_fit_startzDoraHistoryLogger.on_fit_startw   s     r"   c                     d| _         d S rH   ru   rw   s      r    on_train_epoch_startz&DoraHistoryLogger.on_train_epoch_startz   s    !r"   c                 |    | j         rd S |j        }t          |d          }| j                            |           d S )NTr   )rv   logged_metricsr!   rb   push_metrics)r-   rd   re   r   s       r    on_epoch_endzDoraHistoryLogger.on_epoch_end}   sH     	F(!'666	w'''''r"   )	rS   rT   rU   rp   r(   rx   rz   r   rZ   r[   s   @r    rr   rr   p   so         " " " " "! ! !" " "( ( ( ( ( ( (r"   rr   c                       e Zd Zd ZdS )_DummySLURMConnectorc                     d S r&   r2   r3   s    r    register_slurm_signal_handlersz3_DummySLURMConnector.register_slurm_signal_handlers   r=   r"   N)rS   rT   rU   r   r2   r"   r    r   r      s#            r"   r   )auto_resumeadd_dora_loggerno_unfinished_epochsc                 ~   t                      st          d          t          j        }t	          |d          r|j        }t	          |d          t          j        |dgt          |          z   fi |}|d= |	                    d          pg }t                      }t          t          j                                        |                                          }|                                dk    r||dgz  }||d<   |	                    dd          pg }	|	                    t#                                 |	|d<   |d	         t          d
          |d         dk    rt          d          ||d	<   |j        j        |d<   t)                      j        |d<   |r'|d                             t-                                 |                    d          }
| rC|
At)                      j        dz  }|                                rt3          |          }nd}||d<   t          di |}|rt5                      |_        |S )a  Return a PL trainer, adding the necessary glue code to make everything works.
    The arguments are exactly the same as for `pytorch_lightning.trainer.Trainer`,
    with a few extras documented after.

    ..note:: You should not pass `gpus=` or `num_nodes=` arguments as those will be filled by Dora.

    Args:
        auto_resume (bool): if True, automatically resume previous checkpoints.
            You are still responsible for creating the `ModelCheckpoint` callback,
            this only handles the `resume_from_checkpoint` part.
        add_dora_logger (bool): if True, adds a Dora Logger to automatically
            forward the metrics (those logged with per_epoch=True), otherwise
            pushing metrics will be up to you.
        no_unfinished_epochs (bool): if True, deactivates SLURM signal handling
            by PL, which can result in half finished epoch with each interruption.
            It is recommended to instead dump a checkpoint every epoch and resume
            from that one so that training is reliable.

    z.This can only be called from inside a Dora XP.__wrapped__Nr-   pluginsr	   ddp	callbacksgpuszCYou cannot specify the number of GPUs, as this is provided by Dora.	num_nodeszDYou cannot specify the number of nodes, as this is provided by Dora.default_root_dirresume_from_checkpointz	last.ckptr2   )r   RuntimeErrorr   r(   hasattrr   inspectgetcallargslistpopr$   minr   cudadevice_countr8   appendr]   r*   r   r   folderrr   getis_filerY   r   slurm_connector)r   r   r   argskwargsinitr   envr   r   r   lastresumerd   s                 r    get_trainerr      sM   * 77 MKLLL D
$
&
&   $
&
&   vT

':EEfEEFvjj##)rG


Cuz&&((#..*:*:;;D
~~!C<F9

;--3I'))***#F;f~!`aaakaabbbF6N(,F;!'F 8{""#4#6#6777#ZZ(@AA 2-5xx,<<>> 	YYFFF+1'(G 9"6"8"8Nr"   c                   J    e Zd Z ej        ej                  d             ZdS )
_Interceptc                 "    || _         || _        d S r&   )r   r   )r-   r   r   s      r    r(   z_Intercept.__init__   s    	r"   N)rS   rT   rU   	functoolswrapsr   r(   r2   r"   r    r   r      s=        Y_W%&&  '&  r"   r   c                 T    t          t          | fi |}t          |j        i |j        S r&   )r   r   r   r   r   )r   r   	intercepts      r    trainer_from_argparse_argsr      s1    ":t>>v>>I	;)*:;;;r"   c                        e Zd ZdZd fdZdeddf fdZ fdZede	fd            Z
	 dd
ej        eej        f         dedefdZed             Zd Z fdZ fdZd Z fdZ fdZd Zd Z fdZ fdZ fdZd Zd Zd Z xZS )PLLogProgressz<`dora.log.LogProgress` support for Pytorch-Lightning.


    r/   Nc                 r    t                                                       || _        || _        d | _        d S r&   )r'   r(   loggerr   
_pl_module)r-   r   r   r.   s      r    r(   zPLLogProgress.__init__   s2    8<r"   stagec                 j    t                                          |||           || _        g | _        d S r&   )r'   setupr   _replay_history)r-   rd   re   r   r.   s       r    r   zPLLogProgress.setup   s2    gy%000#02r"   c                 h    t                                          ||           d| _        d| _        d S )NFT)r'   rx   	_in_trainrv   r-   rd   re   r.   s      r    rx   zPLLogProgress.on_fit_start   s2    Wi000 r"   c                 "    | j         J | j         S r&   )r   r3   s    r    re   zPLLogProgress.pl_module   s    ***r"   Fr   r   c                     i }|                                 D ]-\  }}t          |t                    rt          |d          ||<   .|S )ag  Default method to format metrics for displaying in the progress bar.
        To customize, you can define a `format_metrics()` method on your
        Lightning module.

        Args:
            metrics: dict of metrics given by PL.
            stage: "train" or "valid".
            epoch: if True, provided metrics are for the end of epoch summary.
        z.5f)r   r   floatformat)r-   r   r   r   r   r   r   s          r    format_metricszPLLogProgress.format_metrics   sO     !--// 	0 	0JC%'' 0!%//C
r"   c                 8    t          | j        d| j                  S )Nr   )getattrre   r   r3   s    r    _format_metricszPLLogProgress._format_metrics  s    t~'79LMMMr"   c                    | j                             d           | j                             |dk    rdnd           |                                d| j        j        dz    z   }|dk    rt          | j                  }n-|dk    rt          | j                  }nt          d|           t          |          }t          | j         |f||d	| j        | _        t          | j                   d S )
NzF----------------------------------------------------------------------trainzTraining...zValidating...z	 | Epoch r	   validzInvalid stage )totalname)r   info
capitalizerd   current_epochrQ   total_train_batchestotal_val_batchesr   ranger   r   logprogiter)r-   r   r   r   loaders        r    _on_epoch_startzPLLogProgress._on_epoch_start  s    """%7*:*:PPP!!$P0JQ0N$P$PPG011EEg.//EE777888u"4;^e$^^RVR]^^T\r"   c                     |                      d           d| _        d| _        t                                          ||          S )Nr   TF)r   r   rv   r'   rz   r   s      r    rz   z"PLLogProgress.on_train_epoch_start  s@    W%%%!ww++GY???r"   c                 r    |                      d           t                                          ||          S Nr   )r   r'   on_validation_epoch_startr   s      r    r   z'PLLogProgress.on_validation_epoch_start$  s1    W%%%ww00)DDDr"   c                     |                      | j        | j                  }t          |d          }|                     ||d          } | j        j        di | t          | j                   d S )NFr|   r2   )get_metricsrd   re   r!   r   r   updatenext)r-   r   r   	formatteds       r    _on_batch_endzPLLogProgress._on_batch_end(  sx    ""4<@@!'777((%u(EE	((i(((T\r"   c                 d     t                      j        |i | |                     d           d S )Nr   )r'   on_train_batch_endr   r-   r   r   r.   s      r    r   z PLLogProgress.on_train_batch_end/  s9    ""D3F3337#####r"   c                 d     t                      j        |i | |                     d           d S r   )r'   on_validation_batch_endr   r   s      r    r   z%PLLogProgress.on_validation_batch_end3  s9    ''88887#####r"   c                 >   |dk    r0| j         j        j        j                            d          d         }n4| j         j        j        j        j                            d          d         }t          |d          }|                     || j         j        |           d S )Nr   Flogr|   )	rd   fit_loop
epoch_loop_resultsr   val_loopr!   _show_epoch_summaryr   )r-   r   r   s      r    _on_stage_endzPLLogProgress._on_stage_end7  s    Gl+6?GGNNuUGGl+6?HPPQVWWX]^G!'777  (BGLLLLLr"   c           	      ^   | j                             |||f           |                     ||d          }|                                }d                    d |                                D                       }| j                            t          | d|dz    d|                      d S )NTr|   z | c              3   N   K   | ] \  }}|                                  d | V  !dS )=N)r   ).0r   vals      r    	<genexpr>z4PLLogProgress._show_epoch_summary.<locals>.<genexpr>D  sO       
 
,4Cs~~''#''
 
 
 
 
 
r"   z Summary | End of Epoch r	   )	r   r   r   r   joinr   r   r   r   )r-   r   r   r   r   r   summarys          r    r   z!PLLogProgress._show_epoch_summary@  s    ##UE7$;<<<((%t(DD	!!** 
 
8A8I8I
 
 
 
 
 	VVuqyVVWVVWWXXXXXr"   c                     t                                          ||           | j        s	| j        sJ | j        s|                     d           d| _        d S d S Nr   F)r'   on_train_endr   rv   r   r   s      r    on_validation_startz!PLLogProgress.on_validation_startI  sg    Wi000~2!2222  	#w'''"DNNN	# 	#r"   c                     t                                          ||           | j        r|                     d           d| _        d S r   )r'   r   r   r   r   s      r    r   zPLLogProgress.on_epoch_endP  sF    Wi000> 	(w'''r"   c                 v    t                                          ||           |                     d           d S r   )r'   on_validation_endr   r   s      r    r   zPLLogProgress.on_validation_endV  s7    !!'95557#####r"   c                     d S r&   r2   r3   s    r    disablezPLLogProgress.disableZ  s	     	r"   c                     |                     dg           }|r| j                            d           |D ]} | j        |  d S )Ndora_replay_historyzReplaying past metrics...)r   r   r   r   )r-   rd   re   rf   replay_historysteps         r    rh   z PLLogProgress.on_load_checkpoint_  sa    #(=rBB 	:K8999" 	, 	,D$D$d+++	, 	,r"   c                     | j         |d<   |S )Nr   )r   rn   s       r    ro   z PLLogProgress.on_save_checkpointf  s    ,0,@
()r"   )r/   N)F) rS   rT   rU   rp   r(   rY   r   rx   rW   r   re   tpDictAnyrV   r   r   r   rz   r   r   r   r   r   r   r   r   r   r   rh   ro   rZ   r[   s   @r    r   r      s;        
= = = = = =3s 3t 3 3 3 3 3 3
! ! ! ! !
 ?    X
 27 bgc26k&: !*.   " N N XN  @ @ @ @ @E E E E E  $ $ $ $ $$ $ $ $ $M M MY Y Y# # # # #    $ $ $ $ $  
, , ,      r"   r   )T)(rp   r   r   rL   typingr   pytorch_lightningr   pytorch_lightning.callbacksr   $pytorch_lightning.callbacks.progressr   &pytorch_lightning.plugins.environmentsr   pytorch_lightning.trainerr   $pytorch_lightning.utilities.argparser   r    r
   r_   r   r   r   r   r   r   rY   r   rV   r!   r$   r]   rr   r   r   r   r   r   r2   r"   r    <module>r     sl   
      				     - - - - - - 0 0 0 0 0 0 @ @ @ @ @ @ E E E E E E - - - - - - C C C C C C                " " " " " " " " RWS"&[1 $    *), ), ), ), ),( ), ), ),X       "( ( ( ( ( ( ( (,        $(TX C C C C CL       < < <
I I I I IO I I I I Ir"   