
    #Sh;                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Zd dlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ  G d	 d
          Z G d de          Z	 	 d$dej        ej        ej                          dej        ej        eef                  dedej        ej        ej                          fdZd Z  G d d          Z!dededej"        e#ej$        e         f         fdZ%dde%fdej        ej        ej                          dede&dej'        ddf
dZ(ej)        d%d ej        e#         dej        d         fd!            Z* G d" d#          Z+dS )&    N)Path   )core)JobEnvironment)CommandFunctionDelayedSubmission)environment_variablesc                   J     e Zd ZdZ fdZdej        dej        defdZ xZ	S )Checkpointablea#  Derived callable classes are requeued after timeout with their current
    state dumped at checkpoint.

    __call__ method must be implemented to make your class a callable.

    Note
    ----
    The following implementation of the checkpoint method resubmits the full current
    state of the callable (self) with the initial argument. You may want to replace the method to
    curate the state (dump a neural network to a standard format and remove it from
    the state so that not to pickle it) and change/remove the initial parameters.
    c                     t                                          |           }t          |          sJ d| j         d            |S )NzClass z^ is marked as Checkpointable but doesn't have a __call__ method. Please add a __call__ method.)super__new__callable__name__)clsargskwargsinstance	__class__s       V/var/www/html/movieo_spanner_bot/venv/lib/python3.11/site-packages/submitit/helpers.pyr   zCheckpointable.__new__,   sh    77??3''
 
 	A 	A ACL  A  A  A	A 	A 
     r   r   returnc                 "    t          | g|R i |S )z3Resubmits the same callable with the same argumentsr   )selfr   r   s      r   
checkpointzCheckpointable.checkpoint3   s"     !7777777r   )
r   
__module____qualname____doc__r   tpAnyr	   r   __classcell__)r   s   @r   r   r      sp             8 8"& 8=N 8 8 8 8 8 8 8 8r   r   c                       e Zd ZdZddeddfdZdej        dej        f         d	ej        d
ej        ddfdZ	de
fdZdej        e         fdZdej        ej                 fdZdS )FunctionSequencea  This is for gathering several estimations into one function, which
    will return the sequence of outputs.
    Also this "function" is stateful, hence it can be stopped, and recovered,
    which is useful when job can be preempted.

    Usage
    -----
    func = FunctionSequence()
    func.add(my_function1, arg1, kwarg1=value_kwarg1)
    func.add(my_function2, arg1, arg2)
    result1, result2 = func()

    Note
    ----
    This function is checkpointable because:
    - it derives from Checkpointable
    - it keeps DelayedSubmission objects as attribute, which in turn store the
      results of the computation in memory once they are computed. So at checkpoint
      time, those results will be saved, and only the non-computed results
      will be computed once the job restarts.
    Fverboser   Nc                 "    || _         g | _        d S N)r%   delayed_functions)r   r%   s     r   __init__zFunctionSequence.__init__Q   s    =?r   func.r   r   c                 V    | j                             t          |g|R i |           d S r'   )r(   appendr	   )r   r*   r   r   s       r   addzFunctionSequence.addU   s8    %%&7&Nt&N&N&Nv&N&NOOOOOr   c                 *    t          | j                  S r'   )lenr(   r   s    r   __len__zFunctionSequence.__len__X   s    4)***r   c                 *    t          | j                  S r'   )iterr(   r0   s    r   __iter__zFunctionSequence.__iter__[   s    D*+++r   c                     | j         rBt          d | D                       }t          d| dt          | j                   d           d | j        D             S )Nc              3   >   K   | ]}|                                 V  d S r'   )done.0fs     r   	<genexpr>z,FunctionSequence.__call__.<locals>.<genexpr>`   s*      ..Aqvvxx......r   zStarting from /Tflushc                 6    g | ]}|                                 S  )resultr8   s     r   
<listcomp>z-FunctionSequence.__call__.<locals>.<listcomp>b   s-     
 
 
AHHJJ
 
 
r   )r%   sumprintr/   r(   )r   r7   s     r   __call__zFunctionSequence.__call__^   s|    < 	U.......DG4GG#d.D*E*EGGtTTTT
 
 $ 6
 
 
 	
r   F)r   r   r   r   boolr)   r    Callabler!   r-   intr1   Iteratorr	   r4   ListrE   r@   r   r   r$   r$   :   s         ,@ @ @ @ @ @ @PCK0 P P26 PVZ P P P P+ + + + +,"+&78 , , , ,
"'"&/ 
 
 
 
 
 
r   r$   
   jobstimeoutpoll_frequencyr   c              #     K   t          j                     }t                      }	 |!t          j                     |z
  |k    rt          t          |           D ]7\  }}||v r
|                                r|                    |           |V  8t          |          t          |           k    rdS t          j        |           )u8  
    Yields jobs as they complete (finished, failed or were cancelled).
    Raises a TimeoutError if the result isn’t available after timeout seconds.
    timeout can be an int or float. If timeout is not specified or None, there is no
    limit to the wait time.

    Parameters
    ----------
    jobs: list
        Jobs instances

    timeout: int/float
        Maximum time (in sec) to wait for jobs completion

    poll_frequency: float
        Frequency in second at which we check job status.

    Yields
    ------
    Job
        The next completed job
    TN)timesetTimeoutError	enumerater7   r-   r/   sleep)rM   rN   rO   start	jobs_doneijobs          r   as_completedrZ   g   s      6 IKKE UUI#49;;#6#@#@oo 	 	FAsI~~xxzz a   			y>>SYY&&E
>"""#r   c                 p    t          j        | fi |                    d                                          S )Nzutf-8)
subprocesscheck_outputdecodestrip)str_argsr   s     r   run_cmdra      s5    "866v66==gFFLLNNNr   c                       e Zd ZdZ	 	 	 	 ddedej        e         dedej        e	         d	ej        e	         f
d
Z
eddedefd            ZddZd ZdS )RsyncSnapshota  Takes a snapshot of the git repository that the script lives in.

    This ensures that remote jobs always use the code from when they are scheduled
    and not the code from when they are launched / re-started.


    Parameters
    ----------
    snapshot_dir: Path
        A path to where the snapshot should be created
    with_submodules: bool
        Whether or not submodules should be included in the snapshot
    exclude: Sequence[str]
        An optional list of patterns to exclude from the snapshot
    include: Sequence[str]
        A list of relative file names to include from the snapshot.
        Useful for .so or other build artifacts that are genarally not tracked by git.

    Note
    ----
    - Only files that are checked in to the repository are included in the snapshot.
        If you have experimental code that you would like to include in the snapshot,
        you'll need to `git add` the file first for it to be included, or use `include` arg.
    NFr@   snapshot_dirroot_dirwith_submodulesexcludeincludec                     |                      d           t          |          | _        |pt          g d          | _        t          j                    | _        || _        || _        || _	        d S )NTthrow)gitz	rev-parsez--show-toplevel)
	availabler   rd   ra   re   cwdoriginal_dirrf   rg   rh   )r   rd   re   rf   rg   rh   s         r   r)   zRsyncSnapshot.__init__   sk     	T""" .. TG,S,S,S$T$T HJJ.r   rk   r   c                 T    t          j        d          s| rt          d          dS dS )Nrsyncz-RsyncSnapshot requires rsync to be installed.FT)shutilwhichRuntimeErrorrj   s    r   rm   zRsyncSnapshot.available   s6    |G$$ 	 T"#RSSS5tr   c                 p   t          j                    | _        t          | j                  }| j        rdnd}| j                                        sO| j        j        	                    dd           t          j        dddd| t          | j                  g           t          j                    5 }t          d	| d
|j         |d           t!          t"          j                            d | j        D                                 }t+          |j        dd          5 }| j        D ]}t/          ||           	 d d d            n# 1 swxY w Y   t          ddd|j        |t          | j                  g|z              d d d            n# 1 swxY w Y   t1          j        | j                   d S )Nz--recurse-submodulesz-sT)parentsexist_okrl   clonez	--depth=2zfile://zgit ls-files z | grep -v ^16 | cut -f2- > )rn   shellc              3      K   | ]}d |fV  	dS )z	--excludeNr@   )r9   pats     r   r;   z*RsyncSnapshot.__enter__.<locals>.<genexpr>   s(      8d8dPS+s9K8d8d8d8d8d8dr   autf8)encoding)filerq   z-az--files-from)r   rn   ro   strre   rf   rd   existsparentmkdirr\   
check_calltempfileNamedTemporaryFilera   namelist	itertoolschainfrom_iterablerg   openrh   rD   oschdir)r   re   subtfilerg   oincs          r   	__enter__zRsyncSnapshot.__enter__   sH    HJJt}%%(,(<F$$$ '')) 	o$**4$*GGG!5';@T(@T@TVYZ^ZkVlVl"mnnn (** 	meQCQQUZQQW_gkllll9?888d8dW[Wc8d8d8dddeeGej#777 '1< ' 'C#A&&&&&'' ' ' ' ' ' ' ' ' ' ' ' ' ' ' WdNEJ#dN_J`J`adkklll	m 	m 	m 	m 	m 	m 	m 	m 	m 	m 	m 	m 	m 	m 	m 	"#####s7   3A0F#E FE	FE	2FFFc                 8    t          j        | j                   d S r'   )r   r   ro   )r   r   s     r   __exit__zRsyncSnapshot.__exit__   s    
"#####r   )NFr@   r@   rF   r   N)r   r   r   r   r   r    OptionalrG   Sequencer   r)   staticmethodrm   r   r   r@   r   r   rc   rc      s         8 '+ %$&$&  +d# 	
 S! S!       $    \$ $ $ $,$ $ $ $ $r   rc   monitoring_start_timen_jobs
state_jobsc           	      Z   t          j                     | z
  }t          j                                                            d          }t	          |d                   }t          t          |                    }t          d| dt          |dz             dt          |d                   | d| d	t          |          | d| d
t          |d                   t          |          z
  | d| dd           t          |          dk    rt          d| d| d           d S d S )Nz%Y-%m-%d %H:%M:%SFAILED[z] Launched <   z minutes ago,RUNNINGr<   z jobs running,z jobs failed,DONEz
 jobs doneTr=   r   z] Failed jobs, indices )	rQ   datetimenowstrftimesortedr/   r   rD   rI   )r   r   r   run_time	date_timefailed_job_indicesn_charss          r   _default_custom_loggingr      s^   y{{22H!%%''001DEEI
8 455#f++G	CICC#hm"4"4CCCz)$%%wHHH&HHH!""GDDDvDDDz&!""S);%<%<<g[[[[[[    ""H)HH4FHHPTUUUUUU #"r      F	test_modecustom_loggingc                    |s|dk    s
J d            t          |           }|dk    rt          d           dS d                    t          t	          d | D                                           }t          d| d	| d
           t          j                    }	 |s| d                             d           t          j        t                    }t          |           D ]f\  }}	||	j
                                                                     |           |	                                r|d                             |           gt          |d                   }
t          |d                   t          |           k    rt          d|
 dd           n# ||||           t          j        |           !t          dt          t          j                    |z
  dz             d           dS )ay  Continuously monitors given jobs until they are all done or failed.

    Parameters
    ----------
    jobs: List[Jobs]
        A list of jobs to monitor
    poll_frequency: int
        The time (in seconds) between two refreshes of the monitoring.
        Can't be inferior to 30s.
    test_mode: bool
        If in test mode, we do not check the length of poll_frequency
    r   z@You can't refresh too often (>= 30s) to avoid overloading squeuer   zThere are no jobs to monitorNz, c              3   r   K   | ]2}t          |j                                      d d          d         V  3dS )_r   r   N)r   job_idsplit)r9   rY   s     r   r;   zmonitor_jobs.<locals>.<genexpr>  s?      %W%W3c#*oo&;&;C&C&CA&F%W%W%W%W%W%Wr   zMonitoring z jobs from job arrays z 
Tforce)moder   r   z%All jobs finished, jobs with indices z failedr=   z Whole process is finished, took r   z minutes)r/   rD   joinr   rR   rQ   get_infocollectionsdefaultdictrT   stateupperr-   r7   rU   rI   )rM   rO   r   r   r   
job_arraysr   r   rX   rY   r   s              r   monitor_jobsr      s	   &  h###%g###YYF{{,---6#%W%WRV%W%W%W"W"WXXYYJ	
E
E
Ej
E
E
EFFF IKK# 	+G'*** ,S11
oo 	* 	*FAssy(()--a000xxzz *6"&&q)))#Jx$899z&!""c$ii//U:LUUU]abbbb,fjAAA
>"""#" 

fS$)++@U2UY[1[-\-\
f
f
fgggggr   r@   extra_namesc              #       K   d fdt           j        D             }	 dV  t           j                            |           dS # t           j                            |           w xY w)a  Removes slurm and submitit related environment variables so as to avoid interferences
    when submiting a new job from a job.

    Parameters
    ----------
    extra_names: Sequence[str]
        Additional environment variables to hide inside the context,
        e.g. TRITON_CACHE_DIR and TORCHINDUCTOR_CACHE_DIR when using torch.compile.

    Note
    ----
    A slurm job submitted from within a slurm job inherits some of its attributes, which may
    be confusing a cause weird gres errors (or pytorch distributed).
    Submitting within this context should prevent this.

    Usage
    -----
    with submitit.helpers.clean_env():
        executor.submit(...)
    MASTER_ADDRMASTER_PORTRANK
WORLD_SIZE
LOCAL_RANKLOCAL_WORLD_SIZEc                     i | ]?}|                     d           s|v s|v |t          j                            |          @S ))SLURM_SLURMD_SRUN_SBATCH_	SUBMITIT_)
startswithr   environpop)r9   xdistrib_namesr   s     r   
<dictcomp>zclean_env.<locals>.<dictcomp>>  sb       LLOPP	
 M!!K 	
2:>>!
  r   N)r   r   update)r   cluster_envr   s   ` @r   	clean_envr   '  s      , kM      K'

+&&&&&
+&&&&s   A !A'c                   <    e Zd Zd
dZdefdZ	 	 ddededd fd	ZdS )TorchDistributedEnvironmentr   Nc                 .   t                      | _        | j        j        d         | _        |                                 | _        | j        j        | _        | j        j        | _	        | j        j
        | _
        | j        j        | j        j        z  | _        dS )a  Construct a class holding the parameters required to properly setup
        PyTorch distributed (with the default env:// initialization method).

        Examples
        --------
        >>> dist_env = TorchDistributedEnvironment().export()
        >>> torch.distributed.init_process_group(backend="nccl")
        >>> print(f"master: {dist_env.master_addr}:{dist_env.master_port}")
        r   N)r   _job_env	hostnamesmaster_addr_get_master_portmaster_portglobal_rankrank	num_tasks
world_size
local_rank	num_nodeslocal_world_sizer0   s    r   r)   z$TorchDistributedEnvironment.__init__N  sx     '((=2150022M-	-1-2 $ 74=;R Rr   c                     d\  }}t           j                            d          }|4t          j        | j        j                  }|                    ||          S t          |          }|S )N)i N  i`  r   )	r   r   getrandomRandomr   r   randintrI   )r   MIN_MASTER_PORTMAX_MASTER_PORTmaster_port_strrngr   s         r   r   z,TorchDistributedEnvironment._get_master_port`  s`    +9(*..77"- 455C;;@@@/**r   TFset_cuda_visible_devices	overwritec                    | j         t          | j                  t          | j                  t          | j                  t          | j                  t          | j                  d}|s&|D ]#}|t          j        v rt          d| d          $|rt          | j                  |d<   t          j        
                    |           | S )a  Export all the environment variables required to properly setup
        PyTorch distributed (with the default env:// initialization method) i.e.
        MASTER_ADDR, MASTER_PORT, RANK, WORLD_SIZE (to which LOCAL_RANK and
        LOCAL_WORLD_SIZE are added).

        Parameter
        ----------
        set_cuda_visible_device: bool
            if True, updates CUDA_VISIBLE_DEVICES to use only the device
            matching the local rank.
        overwrite: bool
            if True, overwrites the environment variables if they exist;
            this can be useful when launching a job from another job.

        Returns
        --------
        TorchDistributedEnvironment
            the current instance
        r   z'Cannot export environment variables as z is already setCUDA_VISIBLE_DEVICES)r   r   r   r   r   r   r   r   r   rt   r   )r   r   r   env_varskeys        r   exportz"TorchDistributedEnvironment.exportm  s    8  +t/00	NNdo..do.. #D$9 : :
 
  	g g g"*$$&'eQT'e'e'efff % $ 	D/24?/C/CH+,

(###r   r   )TF)r   r   r   r)   rI   r   rG   r   r@   r   r   r   r   M  s        S S S S$#     *.+ +"&+ + 
'	+ + + + + +r   r   )NrL   )r@   ),r   
contextlibr   r   r   r   rr   r\   r   rQ   typingr    pathlibr   r   core.job_environmentr   
core.utilsr   r	   r
   r   r$   r   JobRr   UnionrI   floatrJ   rZ   ra   rc   Dictr   Setr   rG   rH   r   contextmanagerr   r   r@   r   r   <module>r     s/                				                         0 0 0 0 0 0 : : : : : : > > > > > > F F F F F F8 8 8 8 8 8 8 8:*
 *
 *
 *
 *
~ *
 *
 *
^ 26(# (#
+dhtv&
'(#[#u*-.(# (# [$&!"	(# (# (# (#VO O OI$ I$ I$ I$ I$ I$ I$ I$XV5 V# VSUSZ[^`b`fgj`k[kSl V V V V( "9	0h 0h
+dhtv&
'0h0h 0h K	0h
 
0h 0h 0h 0hf "' "'2;s+ "'R[5F "' "' "' "'JK K K K K K K K K Kr   