
    VhrB                     
   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ ddl	m
Z
 ddlmZmZ ddlmZ  G d d	ej                  Zd
 Zd Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    N)nn)
functional   )capture_init)center_trimunfold)
LayerScalec                   *     e Zd ZdZd fd	Zd Z xZS )BLSTMz
    BiLSTM with same hidden units as input dim.
    If `max_steps` is not None, input will be splitting in overlapping
    chunks and the LSTM applied separately on each chunk.
    r   NFc                     t                                                       ||dz  dk    sJ || _        t          j        d|||          | _        t          j        d|z  |          | _        || _        d S )N   r   T)bidirectional
num_layershidden_size
input_size   )	super__init__	max_stepsr   LSTMlstmLinearlinearskip)selfdimlayersr   r   	__class__s        S/var/www/html/movieo_spanner_bot/venv/lib/python3.11/site-packages/demucs/demucs.pyr   zBLSTM.__init__   sw     IMQ$6$6$66"G$6s_bccc	iC--			    c           	      ~   |j         \  }}}|}d}| j        d|| j        k    rY| j        }|dz  }t          |||          }	|	j         d         }
d}|	                    dddd                              d||          }|                    ddd          }|                     |          d         }|                     |          }|                    ddd          }|rg }|                    |d||          }	|dz  }t          |
          D ]}|dk    r'|                    |	d d |d d d | f                    /||
dz
  k    r&|                    |	d d |d d |d f                    ^|                    |	d d |d d || f                    t          j
        |d          }|dd |f         }|}| j        r||z   }|S )	NFr   Tr   r      .)shaper   r   permutereshaper   r   rangeappendtorchcatr   )r   xBCTyframedwidthstrideframesnframesoutlimitks                 r   forwardzBLSTM.forward"   s   '1a>%!dn*<*<NEaZFAuf--Fl1oGFq!Q**222q%@@AIIaAIIaLLOKKNNIIaA 	CYYq"a//FaKE7^^ > >66JJvaaaAAAww&678888'A+%%JJvaaaAAAuvvo67777JJvaaaAAAueV|&;<====)C$$Cc2A2g,CA9 	AAr    )r   NF)__name__
__module____qualname____doc__r   r8   __classcell__r   s   @r   r   r      sV         
     ! ! ! ! ! ! !r    r   c                     | j                                                                         }||z  dz  }| j         xj        |z  c_        | j        | j        xj        |z  c_        dS dS )zTRescale initial weight scale. It is unclear why it helps but it certainly does.
          ?N)weightstddetachdatabias)conv	referencerB   scales       r   rescale_convrI   F   sk     +//


"
"
$
$C9_s"EKy	% r    c                     |                                  D ]N}t          |t          j        t          j        t          j        t          j        f          rt          ||           Od S N)modules
isinstancer   Conv1dConvTranspose1dConv2dConvTranspose2drI   )modulerG   subs      r   rescale_modulerT   P   s[    ~~ ) )cBIr'929bFXYZZ 	)i((() )r    c            	       B     e Zd ZdZ	 	 	 dded	ed
edef fdZd Z xZS )DConva  
    New residual branches in each encoder layer.
    This alternates dilated convolutions, potentially with LSTMs and attention.
    Also before entering each residual branch, dimension is projected on a smaller subspace,
    e.g. of dim `channels // compress`.
    r   r   -C6?TFr"   channelscompressdepthinitc                 x   t                                                       |dz  dk    sJ || _        || _        t	          |          | _        |dk    }d }|rd }t          ||z            }|
rt          j        }nt          j	        }t          j
        g           | _        t          | j                  D ]}|rd|z  nd}||dz  z  }t          j        |||||           ||           |            t          j        |d|z  d           |d|z            t          j        d          t          ||          g}|r&|                    dt#          |||                     |	r'|                    dt%          |dd	d
                     t          j        | }| j                            |           dS )a  
        Args:
            channels: input/output channels for residual branch.
            compress: amount of channel compression inside the branch.
            depth: number of layers in the residual branch. Each layer has its own
                projection, and potentially LSTM and attention.
            init: initial scale for LayerNorm.
            norm: use GroupNorm.
            attn: use LocalAttention.
            heads: number of heads for the LocalAttention.
            ndecay: number of decay controls in the LocalAttention.
            lstm: use LSTM.
            gelu: Use GELU activation.
            kernel: kernel size for the (dilated) convolutions.
            dilate: if true, use dilation, increasing with the depth.
        r   r   r   c                 (    t          j                    S rK   r   Identityds    r   <lambda>z DConv.__init__.<locals>.<lambda>y   s    BKMM r    c                 ,    t          j        d|           S )Nr   r   	GroupNormr`   s    r   rb   z DConv.__init__.<locals>.<lambda>{   s    Q 2 2 r    )dilationpaddingr"   )headsndecay   T)r   r   r   N)r   r   rX   rY   absrZ   intr   GELUReLU
ModuleListr   r'   rN   GLUr	   insert
LocalStater   
Sequentialr(   )r   rX   rY   rZ   r[   normattnrh   ri   r   gelukerneldilatenorm_fnhiddenactra   rf   rg   modslayerr   s                        r   r   zDConv.__init__]   s   ( 	zQ  ZZ
 *) 	322GX())  	'CC'CmB''tz"" 	& 	&A!'.qAvvQH&A+.G	(FFXwWWW	&!h,22H%%rvayy8T**D  OAz&fMMMNNN RAuVA4PPPQQQM4(EKu%%%%	& 	&r    c                 6    | j         D ]}| ||          z   }|S rK   )r   )r   r+   r}   s      r   r8   zDConv.forward   s+    [ 	 	EEE!HHAAr    )r   r   rW   TFr   r   FTr"   T)	r9   r:   r;   r<   rl   floatr   r8   r=   r>   s   @r   rV   rV   V   s          Z^LP"&8& 8& 8& 8&# 8&QV 8& 8& 8& 8& 8& 8&t      r    rV   c            	       <     e Zd ZdZd
dedededef fdZd	 Z xZS )rr   a  Local state allows to have attention based only on data (no positional embedding),
    but while setting a constraint on the time window (e.g. decaying penalty term).

    Also a failed experiments with trying to provide some frequency based attention.
    r   r   rX   rh   nfreqsri   c                 |   t                                                       ||z  dk    sJ ||f            || _        || _        || _        t          j        ||d          | _        t          j        ||d          | _        t          j        ||d          | _	        |rt          j        |||z  d          | _
        |r\t          j        |||z  d          | _        | j        j        xj        dz  c_        | j        j        J d| j        j        j        d d <   t          j        |||z  z   |d          | _        d S )Nr   r   g{Gz?)r   r   rh   r   ri   r   rN   contentquerykeyquery_freqsquery_decayrA   rD   rE   proj)r   rX   rh   r   ri   r   s        r   r   zLocalState.__init__   s:   %1$$$x&7$$$
y8Q77Yx155
9Xx33 	F!y56>1EED 	/!y56>1EED#((D0((#(444,.D!&qqq)Ih71EE			r    c                    |j         \  }}}| j        }t          j        ||j        |j                  }|d d d f         |d d d f         z
  }|                     |                              ||d|          }|                     |                              ||d|          }	t          j	        d|	|          }
|
|	j         d         dz  z  }
| j
        rt          j        d| j
        dz   |j        |j                  }t          j        dt          j        z  |z  |                    ddd          z            }|                     |                              ||d|          | j
        dz  z  }|
t          j	        d||          z  }
| j        rt          j        d| j        dz   |j        |j                  }|                     |                              ||d|          }t          j        |          dz  }|                    ddd           |                                z  | j        dz  z  }|
t          j	        d||          z  }
|
                    t          j        ||
j        t          j                  d           t          j        |
d	          }|                     |                              ||d|          }t          j	        d
||          }| j
        r-t          j	        d||          }t          j        ||gd          }|                    |d|          }||                     |          z   S )N)devicedtyper#   zbhct,bhcs->bhtsr   r@   r   zfts,bhfs->bhtsir   zbhts,bhct->bhcszbhts,fts->bhfs)r$   rh   r)   aranger   r   r   viewr   einsumr   cosmathpir   ri   r   sigmoidrk   masked_fill_eyeboolsoftmaxr   r*   r&   r   )r   r+   r,   r-   r.   rh   indexesdeltaquerieskeysdotsperiodsfreq_kernelfreq_qdecaysdecay_qdecay_kernelweightsr   resulttime_sigs                        r   r8   zLocalState.forward   s   '1a
,qAAA4 747#33**Q--$$Qr155xx{{5"a00|-tW==
1s""; 	Hl1dkAoahagVVVG)AK%$7',,r1a:P:P$PQQK%%a((--aA>>PSASSFEL!1;GGGD; 	J\!T[1_QXQWUUUF&&q))..q%Q??GmG,,q0G#[[Q222UYY[[@4;PSCSSLEL!1<IIID 	%)AdkLLLdSSS-!,,,,,q//&&q%Q77/'BB; 	6|$4g{KKHY1155F2q))499V$$$$r    )r   r   r   )r9   r:   r;   r<   rl   r   r8   r=   r>   s   @r   rr   rr      s         
F F FS Fc Fs F F F F F F&"% "% "% "% "% "% "%r    rr   c                   x     e Zd Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	            Zd Zd Zd fd	Z xZS )Demucsr   @          @   Tr      r   r   rW   皙?D  (   c           
         t                                                       || _        || _        || _        |
| _        |	| _        || _        || _        || _	        || _
        || _        || _        t          j                    | _        t          j                    | _        t          j                    | _        |rt          j        d          }d}nt          j                    }d}|rt          j        }nt          j        }|}d}t+          |          D ]}d } ||k    rfd} g }!|!t          j        ||||	           | |           |            gz  }!||k    }"||k    }#|dz  r|!t/          |||||"|#          gz  }!|r+|!t          j        |||z  d           | ||z            |gz  }!| j                            t          j        |!            g }$|dk    r|}%nt5          | j                  |z  }%|r3|$t          j        |||z  d|
z  dz   |
           | ||z            |gz  }$|dz  r|$t/          |||||"|#          gz  }$|$t          j        ||%||	|          gz  }$|dk    r|$ | |%           |            gz  }$| j                            dt          j        |$            |}t;          ||z            }|}|rt=          ||          | _        nd	| _        |rtA          | |
           d	S d	S )ab	  
        Args:
            sources (list[str]): list of source names
            audio_channels (int): stereo or mono
            channels (int): first convolution channels
            depth (int): number of encoder/decoder layers
            growth (float): multiply (resp divide) number of channels by that
                for each layer of the encoder (resp decoder)
            depth (int): number of layers in the encoder and in the decoder.
            rewrite (bool): add 1x1 convolution to each layer.
            lstm_layers (int): number of lstm layers, 0 = no lstm. Deactivated
                by default, as this is now replaced by the smaller and faster small LSTMs
                in the DConv branches.
            kernel_size (int): kernel size for convolutions
            stride (int): stride for convolutions
            context (int): kernel size of the convolution in the
                decoder before the transposed convolution. If > 1,
                will provide some context from neighboring time steps.
            gelu: use GELU activation function.
            glu (bool): use glu instead of ReLU for the 1x1 rewrite conv.
            norm_starts: layer at which group norm starts being used.
                decoder layers are numbered in reverse order.
            norm_groups: number of groups for group norm.
            dconv_mode: if 1: dconv in encoder only, 2: decoder only, 3: both.
            dconv_depth: depth of residual DConv branch.
            dconv_comp: compression of DConv branch.
            dconv_attn: adds attention layers in DConv branch starting at this layer.
            dconv_lstm: adds a LSTM layer in DConv branch starting at this layer.
            dconv_init: initial scale for the DConv branch LayerScale.
            normalize (bool): normalizes the input audio on the fly, and scales back
                the output by the same amount.
            resample (bool): upsample x2 the input and downsample /2 the output.
            rescale (float): rescale initial weights of convolutions
                to get their standard deviation closer to `rescale`.
            samplerate (int): stored as meta information for easing
                future evaluations of the model.
            segment (float): duration of the chunks of audio to ideally evaluate the model on.
                This is used by `demucs.apply.apply_model`.
        r   r   r   r   c                 (    t          j                    S rK   r^   r`   s    r   rb   z!Demucs.__init__.<locals>.<lambda>F  s     r    c                 .    t          j        |           S rK   rd   )ra   norm_groupss    r   rb   z!Demucs.__init__.<locals>.<lambda>H  s    BLa$@$@ r    )rZ   r[   rY   ru   r   )rg   N)rG   )!r   r   audio_channelssourceskernel_sizecontextr2   rZ   resamplerX   	normalize
sampleratesegmentr   ro   encoderdecoderskip_scalesrp   rn   rm   r'   rN   rV   r(   rs   lenrO   rq   rl   r   r   rT   )'r   r   r   rX   growthrZ   rewritelstm_layersr   r2   r   rv   glunorm_startsr   
dconv_modedconv_depth
dconv_comp
dconv_attn
dconv_lstm
dconv_initr   r   rescaler   r   
activationch_scaleact2in_channelsrg   indexry   encoderu   r   decodeout_channelsr   s'                 `                       r   r   zDemucs.__init__   s   X 	,&
  "$}}=?? 	AJHHJH 	7DD7D$5\\ (	. (	.E--G##@@@@F	+xfEE!! F
 J&DJ&DA~ M5:*44dL L L M M >Ih8(;Q??GHx/00*> > Lv 6777Fqyy*"4<00>A >Ih8(;Q[1_V]^^^GHx/00*> > A~ M5:*44dL L L M Mr)(L"FG= = = > >Fqyy77<00$$&&99L2=&#9:::"K6H,--HH 	h44DIIDI 	447333333	4 	4r    c                 h   | j         r|dz  }t          | j                  D ]9}t          j        || j        z
  | j        z            dz   }t          d|          }:t          | j                  D ]}|dz
  | j        z  | j        z   }| j         rt          j        |dz            }t          |          S )aX  
        Return the nearest valid length to use with the model so that
        there is no time steps left over in a convolution, e.g. for all
        layers, size of the input - kernel_size % stride = 0.

        Note that input are automatically padded if necessary to ensure that the output
        has the same length as the input.
        r   r   )	r   r'   rZ   r   ceilr   r2   maxrl   )r   length_idxs       r   valid_lengthzDemucs.valid_lengthx  s     = 	aKFtz"" 	$ 	$AY)9 9T[HIIAMFF^^FF$$ 	C 	CCqjDK/$2BBFF= 	+Yvz**F6{{r    c                    |}|j         d         }| j        rQ|                    dd          }|                    dd          }|                    dd          }||z
  d|z   z  }nd}d}|                     |          |z
  }t          j        ||dz  ||dz  z
  f          }| j        rt          j	        |dd          }g }| j
        D ]"}	 |	|          }|                    |           #| j        r|                     |          }| j        D ]5}
|                    d          }t          ||          } |
||z             }6| j        rt          j	        |dd          }||z  |z   }t          ||          }|                    |                    d          t%          | j                  | j        |                    d                    }|S )Nr#   r   T)r   keepdimgh㈵>r   r   )r$   r   meanrB   r   Fpadr   juliusresample_fracr   r(   r   r   popr   r   sizer   r   r   )r   mixr+   r   monor   rB   r   savedr   r   r   s               r   r8   zDemucs.forward  s   > 	884800D99T922D((r4(00CTdSj)AADC!!&))F2E!eqj%%1*"4566= 	.$Q1--Al 	 	Fq		ALLOOOO9 			!Al 	! 	!F99R==DtQ''Dq4x  AA= 	.$Q1--AGdN6""FF166!99c$,//1DaffRjjQQr    c                     t          | j                  D ]@}dD ];}dD ]6}| d| d| }| d| d| }||v r||vr|                    |          ||<   7<At                                          ||           d S )N)r   r   )rE   rA   .z.3.z.2.)strict)r'   rZ   r   r   load_state_dict)	r   stater   r   abnewoldr   s	           r   r   zDemucs.load_state_dict  s    $$ 	4 	4C+ 4 4+ 4 4A,,,,,,C,,,,,,Ce||5(8(8%*YYs^^c
	44 	f55555r    )r   r   r   r   Tr   r   r   r   TTr   r   r   r   r   r   r   rW   TTr   r   r   )T)	r9   r:   r;   r   r   r   r8   r   r=   r>   s   @r   r   r      s         !" !EY4 Y4 Y4 Y4 Y4 \Y4v  .% % %N	6 	6 	6 	6 	6 	6 	6 	6 	6 	6r    r   )r   typingtpr   r)   r   torch.nnr   r   statesr   utilsr   r   transformerr	   Moduler   rI   rT   rV   rr   r    r    r   <module>r      s{                $ $ $ $ $ $             & & & & & & & & # # # # # #/ / / / /BI / / /d     ) ) )D D D D DBI D D DN;% ;% ;% ;% ;% ;% ;% ;%|d6 d6 d6 d6 d6RY d6 d6 d6 d6 d6r    