
    Wh                     
   d dl mZ d dlmZ d dlZd dlZd dlm	Z	 d dlm
Z
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZmZmZmZmZmZ d d
lmZ d dlmZ  ej        e	j                                        ej                                        ej                                        ej                                        ej                                        ej                                        ej                                        ej                                        ej                                        ej                                        ej                                        ej                                        ej                                                  D ]#Z  e!e e
j"                  r ej#        e            $ ej$        ej%                  dej&        dej'        dej(        fd            Z)dej&        de*e+df         dej(        dz  fdZ,dddej&        de*e+df         de-dej(        fdZ.  ej$        ej/                  d              ej$        ej0                  d            dej&        de*e+df         dej(        fdZ1  ej$        ej2                  e1             ej$        ej3                  e1            ej$        ej4                  dej&        de*e+df         dej(        fd            Z5 ej$        ej6                  dej&        de*e+df         dej(        fd            Z7 ej$        ej8                  dej&        de*e+df         de*e*e9e9f         df         dej(        fd            Z:dS )    )defaultdict)replaceN)ad_util)coreutil)ops)prng)random)	annconvolutionfftlaxlinalgparallelslicingspecialwindowed_reductions)roofline)	shard_mapctxdimension_numbersreturnc                   d | j         D             \  }t          j                            | j        d                   }|\  \  }}\  }}dj        z  |j        z  t          j        fd|D                       z  t          j        fd|D                       z  }	d}
| j        s|
j	        z  }
|
|j	        z  }
| j
        s
|
|j	        z  }
t          j        t          |	          |
          S )Nc              3   T   K   | ]#}t           j                            |          V  $d S Nr   RooflineShape	from_aval.0avals     i/var/www/html/movieo_spanner_bot/venv/lib/python3.11/site-packages/jax/experimental/roofline/rooflines.py	<genexpr>z(_dot_general_roofline.<locals>.<genexpr>A   s3      NNh$..t44NNNNNN    r      c                 *    g | ]}j         |         S  shaper    ilhss     r"   
<listcomp>z)_dot_general_roofline.<locals>.<listcomp>I   s    222sy|222r$   c                 *    g | ]}j         |         S r'   r(   r*   s     r"   r-   z)_dot_general_roofline.<locals>.<listcomp>J   s    ///sy|///r$   )flops	hbm_bytes)avals_inr   r   r   	avals_outsizenpprodpin_lhs_in_vmembytespin_rhs_in_vmemRooflineResultint)r   r   argskwrhsoutlhs_contract_	lhs_batchr/   r0   r,   s              @r"   _dot_general_rooflinerB   :   s    ONNNN(#s((q)9::#&7#<^i 	h	h 	g2222\222334 	g////Y///00	1  )		 II		 I		 s5zzY	G	G	GGr$   axes.c                      t          j         fd|D                       }|dk    rd S t          j        d |D             d |D                       S )Nc                 4    g | ]}j         j        |         S r'   )meshr)   )r    axisr   s     r"   r-   z3_return_zeros_if_one_sized_axis.<locals>.<listcomp>Z   s"    ===sx~d+===r$      c                     i | ]}|d S r   r'   r    rG   s     r"   
<dictcomp>z3_return_zeros_if_one_sized_axis.<locals>.<dictcomp>^   s    (((4tQ(((r$   c                     i | ]}|d S rJ   r'   rK   s     r"   rL   z3_return_zeros_if_one_sized_axis.<locals>.<dictcomp>_   s    ***Tq***r$   	ici_bytesici_latency)r4   r5   r   r9   )r   rC   	axes_sizes   `  r"   _return_zeros_if_one_sized_axisrR   W   ss     g=======>>)]]4		 ((4(((**T***
 
 
 r$   T)	is_reducerS   c                   t          | |          x}r|S | j        j        t          j                            | j                  }|r#|t          j        fd|D                       z  }t          |fdd          }t          |          }d||z  }|D ]}	|	         }
||
dz
  z  z  ||
z  }|d                  |z  t          j        fd|D             fd|D             	          S )
Nc                      g | ]
}|         S r'   r'   r    rG   rF   s     r"   r-   z-_ring_collective_roofline.<locals>.<listcomp>p   s    "?"?"?$4:"?"?"?r$   c                     |          S r   r'   xrF   s    r"   <lambda>z+_ring_collective_roofline.<locals>.<lambda>s   s    47 r$   T)keyreverser   rH   c                 0    i | ]}|t                    S r'   r:   r    rG   rO   s     r"   rL   z-_ring_collective_roofline.<locals>.<dictcomp>   s!    <<<tS^^<<<r$   c                 0    i | ]}|t                    S r'   r^   r    rG   rP   s     r"   rL   z-_ring_collective_roofline.<locals>.<dictcomp>   s#    @@@Ds;''@@@r$   rN   )rR   rF   r)   r   r   total_bytesr1   r4   r5   sortedlenr9   )r   rC   rS   r;   r<   zeros_resultcurrent_shard_sizesorted_axesnum_axesrG   	axis_sizerO   rP   rF   s              @@@r"   _ring_collective_rooflinerj   c   sM    5S$???\ 	$-99#,GG A"'"?"?"?"?$"?"?"?@@@ t!2!2!2!2DAAA+()! $ $dT
I#y1}55I)# [^$x/+		 <<<<<<<@@@@K@@@
 
 
 r$   c                     t          |d| i|S )NrC   rj   	axis_namer;   r<   s      r"   rZ   rZ      s    !:D!Wy!WTV!W!W r$   c                      t          || dd|S )NFrC   rS   rl   rm   s      r"   rZ   rZ      s%    !:		U" ".0" " r$   c                    d | j         D             }t          | d |D                       } t          | g|R |dd|S )Nc                 L    g | ]!}t           j                            |          "S r'   r   r   s     r"   r-   z/_scalar_collective_roofline.<locals>.<listcomp>   s)    LLLtH",,T22LLLr$   c                 B    g | ]}t          j        d |j                  S )rH   )r   ShapedArraydtype)r    r)   s     r"   r-   z/_scalar_collective_roofline.<locals>.<listcomp>   s'    WWWut/ekBBWWWr$   )r1   Frp   )r1   r   rj   )r   rC   r;   r<   shapess        r"   _scalar_collective_rooflinerx      sa     MLs|LLL&WWPVWWWXXX#	"3	P	P	PDE	P	PR	P	PPr$   c                    t          | g|R d|i|}dt          t          t          f         dt          t          t          f         fd}t	          j         ||j                   ||j                            S )NrC   dr   c                 >    d |                                  D             S )Nc                      i | ]\  }}||d z  S )r%   r'   )r    kvs      r"   rL   z8_psum2_roofline.<locals>.double_dict.<locals>.<dictcomp>   s"    +++AAq1u+++r$   )items)rz   s    r"   double_dictz$_psum2_roofline.<locals>.double_dict   s    ++++++r$   rN   )rj   dictstrr:   r   r9   rO   rP   )r   rC   r;   r<   ring_roofliner   s         r"   _psum2_roofliner      s     ,CH$HHHTHRHH-,T#s(^ ,S#X , , , , 
	 k-122M566
 
 
 r$   rn   c                  	
 t          | |          x}r|S | j        j        t          j                            | j                  t          j        fd|D                       z  }t          |fd          d         }t          |          }|         |dz
  z  }|         dk    r|dz  }|dz  |z  	t          fd|D                       
t          j        	fd|D             
fd	|D             
          S )Nc                      g | ]
}|         S r'   r'   rV   s     r"   r-   z(_all_to_all_roofline.<locals>.<listcomp>   s0     E E EDJE E Er$   c                     |          S r   r'   rX   s    r"   rZ   z&_all_to_all_roofline.<locals>.<lambda>   s    $q' r$   )r[   r   rH   r%   c              3   .   K   | ]}|         d z  V  dS )r%   Nr'   rV   s     r"   r#   z'_all_to_all_roofline.<locals>.<genexpr>   s+      99tDJN999999r$   c                 0    i | ]}|t                    S r'   r^   r_   s     r"   rL   z(_all_to_all_roofline.<locals>.<dictcomp>   !    :::tS^^:::r$   c                 0    i | ]}|t                    S r'   r^   ra   s     r"   rL   z(_all_to_all_roofline.<locals>.<dictcomp>   #    >>>Ds;''>>>r$   rN   )rR   rF   r)   r   r   rb   r1   r4   r5   rc   rd   sumr9   )r   rn   r;   r<   re   r3   smallest_axisrh   bisection_bwrO   rP   rF   s            @@@r"   _all_to_all_roofliner      s\    5S)DDD\ 	$			+	+CL	9	9BG E E E E$E E E = = 
$ (9(9(9(9:::1=-^^(m$A6,	-1AL Qh%) 9999y99999+		 ::::	:::>>>>I>>>
 
 
 r$   permc          	      .   t          | |          x}r|S | j        j        fd|D             }t          j                            | j                  }t          t                    }d|D ]S\  }	}
|	|
k    rt          d t          j        |	|          D                       }t          d t          j        |
|          D                       }d}t          t          |                    D ]}||         }||         }||         }||k    r||z
  |z  }||z
  |z  }||k    rdnd}|}||k    rht          j        |||          }||z   |z  }t          j        |||          }|t          t!          ||g                    xx         dz  cc<   |}||k    ht#          ||          }||z  }t%          |          U|t%          |                                d          z  t          j        fd|D             fd	|D             
          S )Nc                 <    g | ]}                     |d           S rt   )getrV   s     r"   r-   z&_ppermute_roofline.<locals>.<listcomp>   s'    BBB$((4++BBBr$   r   c              3   4   K   | ]}t          |          V  d S r   r^   r    r+   s     r"   r#   z%_ppermute_roofline.<locals>.<genexpr>   (      HH!s1vvHHHHHHr$   c              3   4   K   | ]}t          |          V  d S r   r^   r   s     r"   r#   z%_ppermute_roofline.<locals>.<genexpr>   r   r$   rH   )defaultc                 0    i | ]}|t                    S r'   r^   r_   s     r"   rL   z&_ppermute_roofline.<locals>.<dictcomp>  r   r$   c                 0    i | ]}|t                    S r'   r^   ra   s     r"   rL   z&_ppermute_roofline.<locals>.<dictcomp>  r   r$   rN   )rR   rF   r)   r   r   rb   r1   r   floattupler4   unravel_indexrangerd   r   tuple_updaterc   minmaxvaluesr9   )r   rn   r   r;   r<   re   	mesh_dims
shard_sizeici_contentionsrcdst
src_coords
dst_coordsici_latency_for_permr+   dim_sizesrc_posdst_posclockwise_distcounter_dist	directioncurr_poscurr_coordsnext_posnext_coordsdistancerO   rP   rF   s                             @@@r"   _ppermute_roofliner      s    5S)DDD\ 	$BBBB	BBB)%11#,??*=H=O=O.+  9  9hc3
czzHHr'7Y'G'GHHHHHJHHr'7Y'G'GHHHHHJ 3y>>"" ) )1h1g1g	G		!G+x7')X5'<77AAR	'!!)*aBB+*h6()+q(CC+
v{K&@AABB
C
C
Cq
H
C
C
C( '!! ~|44(k#788KK3~4466BBBB)		 ::::	:::>>>>I>>>
 
 
 r$   );collectionsr   dataclassesr   	itertoolsitnumpyr4   jax._srcr   r   r   r   r	   r
   jax._src.laxr   r   r   r   r   r   lax_parallelr   r   r   jax.experimentalr   r   chain__dict__r   prim
isinstance	Primitiveregister_standard_rooflineregister_rooflinedot_general_pRooflineRuleContextDotDimensionNumbersr9   rB   r   r   rR   boolrj   reduce_scatter_pall_gather_prx   pmin_ppmax_ppsum2_pr   all_to_all_pr   
ppermute_pr:   r   r'   r$   r"   <module>r      sz   $ # # # # #                                              
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 & % % % % % & & & & & & BH	,,,/,-/		%%''  . .D Zdn%% .'H'--- C-..H#H ,H
 H H H /.H8	#	+0c?	t#	 	 	 	  	# # ### 	c3h# 	# # # # #L :  <8 9 9WW   6  <4 5 5   Q#Q 	c3hQ
 Q Q Q Q 0  <. / /0K L L L /  <. / /0K L L L I-..# 	c3h
    /." L566# 38_
    76B L3447#7 38_7 	eCHos"#	7 7 7 7 547 7 7r$   