
    h                         d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	 ddl
mZmZmZmZmZmZmZ dd	lZd
eeeef         eeeef                  f         defdZ G d de          Zd	S )zd
Support for the OpenAI Whisper speech recognition library.

See https://github.com/openai/whisper

   )
FormatBase   )SSAEvent)SSAFile)	make_timetimestamp_to_ms    )UnionListDictAnyOptionalSequenceTextIONresult_or_segmentsreturnc                    t          | t                    r	| d         }n't          | t                    r| }nt          d          t	                      }|D ]o}t          t          |d                   t          |d                             }|d                                         |_        |	                    |           p|S )a  
    Load subtitle file from OpenAI Whisper transcript

    Example:
        >>> import whisper
        >>> import pysubs2
        >>> model = whisper.load_model("base")
        >>> result = model.transcribe("audio.mp3")
        >>> subs = pysubs2.load_from_whisper(result)
        >>> subs.save("audio.ass")

    See also:
        https://github.com/openai/whisper

    Arguments:
        result_or_segments: Either a dict with a ``"segments"`` key
            that holds a list of segment dicts, or the segment list-of-dicts.
            Each segment is a dict with keys ``"start"``, ``"end"`` (float, timestamps
            in seconds) and ``"text"`` (str with caption text).

    Returns:
        :class:`pysubs2.SSAFile`

    segmentszAExpected either a dict with 'segments' key, or a list of segmentsstart)send)r   r   text)

isinstancedictlist	TypeErrorr   r   r   strip	plaintextappend)r   r   subssegmentevents        W/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/pysubs2/formats/whisper.pyload_from_whisperr$      s    2 $d++ ]%j1	&	-	- ]%[\\\99D  y77+;<<<)gV[nB]B]B]^^^!&///11EK    c                      e Zd ZdZ ej        d          Z ej        d          Zede	de
e	         fd            Zede	de
e         fd            Zed	ee	         defd
            Zedddede	deddf
d            ZdS )WhisperJAXFormatzw
    Parser for Whisper JAX transcription, one event per line, eg. ``[00:02.880 -> 00:07.240]  transcribed text``

    z((?:(\d{1,2}):)?(\d{2}):(\d{2})[.](\d{3})z\[([^]]+) -> ([^]]+)] (.*)r   r   c                     |                                                                 D ]}|                     |          r dS  dS dS )z5See :meth:`pysubs2.formats.FormatBase.guess_format()`whisper_jaxN)lstrip
splitlines
parse_line)clsr   lines      r#   guess_formatzWhisperJAXFormat.guess_format@   sN     KKMM,,.. 	 	D~~d## $}}tttr%   r.   c                    | j                             |          }|d S | j                            |                    d                    }| j                            |                    d                    }|                    d                                          }||d S |                     |                                          }|                     |                                          }t          |||          S )Nr   r      )r   r   r   )	LINEmatch	TIMESTAMP	fullmatchgroupr   r   groupsr   )r-   r.   mm_startm_endr   start_msend_mss           r#   r,   zWhisperJAXFormat.parse_lineK   s    HNN4  94-))!''!**55''

33wwqzz!!?em4&&w~~'7'788$$U\\^^44hF>>>>r%   r7   c                 4    t          d | D                       S )Nc                     g | ]}|pd S )0 ).0xs     r#   
<listcomp>z4WhisperJAXFormat.timestamp_to_ms.<locals>.<listcomp>_   s    999QS999r%   )r   )r7   s    r#   r   z WhisperJAXFormat.timestamp_to_ms]   s    99&999:::r%   r    r   fpformat_kwargsNc                     |                                 D ]B}|                                }|                     |          }||                    |           CdS )zD
        See :meth:`pysubs2.formats.FormatBase.from_file()`
        N)	readlinesr   r,   r   )r-   r    rD   rE   rF   r.   es          r#   	from_filezWhisperJAXFormat.from_filea   sX    
 LLNN 	 	D::<<Dt$$A}A		 	r%   )__name__
__module____qualname____doc__recompiler4   r2   classmethodstrr   r/   r   r,   staticmethodr   intr   r   r   rJ   r@   r%   r#   r'   r'   8   s         
FGGI2:344D     [ ?c ?hx&8 ? ? ? [?" ; ;# ; ; ; \; Y F S C TX    [  r%   r'   )rN   baser   ssaeventr   ssafiler   timer   r   typingr
   r   r   r   r   r   r   rO   rR   r$   r'   r@   r%   r#   <module>rZ      s                      - - - - - - - - E E E E E E E E E E E E E E E E E E 				&%S#XT#s(^@T0T*U &Za & & & &R2 2 2 2 2z 2 2 2 2 2r%   