
    !Sh?                    F   d Z ddlmZ ddlZddlZddlZddlmZm	Z	 erddl
mZ ej                            d          Zej                            e          Zej                            e           eej        d<    ej        d          e_         ej        d	          e_         ej        d
          e_        ej        e_         ej        dej                  e_         ej        dej                  e_         ej        d          Z G d dej                  ZdS )a  
This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
A copy is imported rather than the module being directly imported as this ensures that the user can import
and  use the unmodified library for their own needs.
    )annotationsN)TYPE_CHECKINGSequence)Markdownzhtml.parser
htmlparserz<[a-zA-Z]|</>z\?>z&([a-zA-Z][-.a-zA-Z0-9]*);a  
  <[a-zA-Z][^`\t\n\r\f />\x00]*       # tag name <= added backtick here
  (?:[\s/]*                           # optional whitespace before attribute name
    (?:(?<=['"\s/])[^`\s/>][^\s/=>]*  # attribute name <= added backtick here
      (?:\s*=+\s*                     # value indicator
        (?:'[^']*'                    # LITA-enclosed value
          |"[^"]*"                    # LIT-enclosed value
          |(?!['"])[^`>\s]*           # bare value <= added backtick here
         )
         (?:\s*,)*                    # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                 # trailing whitespace
a  
  [a-zA-Z][^`\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^`\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:=                            # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
z^([ ]*\n){2}c                  
    e Zd ZU dZd, fdZ fdZ fdZed-d	            Zd.dZ	d/dZ
d0dZd1dZd2dZd3dZd1dZd4dZd4dZd2dZd5 fdZd2dZd2d Zd2d!Zd6 fd"Zd6 fd#Zd7d8 fd&Zd'Zd(ed)<   d9d*Zd6d+Z xZS ):HTMLExtractorz
    Extract raw HTML from text.

    The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
    [`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
    is stored in `cleandoc` as a list of strings.
    mdr   c                    d|vrd|d<   t          dg          | _        dg| _        d| _         t	                      j        |i | || _        d S )Nconvert_charrefsFhrr   )set
empty_tagslineno_start_cacheoverride_comment_updatesuper__init__r
   )selfr
   argskwargs	__class__s       Y/var/www/html/movieo_spanner_bot/venv/lib/python3.11/site-packages/markdown/htmlparser.pyr   zHTMLExtractor.__init__e   se    V++).F%& tf++#$#',$ 	$)&)))    c                    d| _         d| _        g | _        g | _        g | _        dg| _        t                                                       dS )z1Reset this instance.  Loses all unprocessed data.Fr   N)inrawintailstack_cachecleandocr   r   resetr   r   s    r   r    zHTMLExtractor.resett   sE    
 "
!##%#$#r   c                   t                                                       t          | j                  r[| j        r:| j        s3|                     t                              | j                             n|                     | j                   t          | j	                  rX| j
                            | j        j                            d                    | j	                                       g | _	        dS dS )zHandle any buffered data. N)r   closelenrawdatar   
cdata_elemhandle_datar   unescaper   r   appendr
   	htmlStashstorejoinr!   s    r   r$   zHTMLExtractor.close   s    t| 	/ $ /T_ /  !4!4T\!B!BCCCC  ...t{ 	M  !2!8!89M9M!N!NOOODKKK	 	r   returnintc                J   t          t          | j                  dz
  | j        dz
            D ]a}| j        |         }| j                            d|          }|dk    rt          | j                  }| j                            |dz              b| j        | j        dz
           S )zHReturns char index in `self.rawdata` for the start of the current line.    
)ranger%   r   linenor&   findr*   )r   iilast_line_start_poslf_poss       r   line_offsetzHTMLExtractor.line_offset   s     D344Q6AFF 	5 	5B"&"9""=\&&t-@AAF||T\**#**6!84444&t{1}55r   boolc                    | j         dk    rdS | j         dk    rdS | j        | j        | j        | j         z                                            dk    S )z
        Returns True if current position is at start of line.

        Allows for up to three blank spaces at start of line.
        r   T   Fr#   )offsetr&   r:   stripr   s    r   at_line_startzHTMLExtractor.at_line_start   sW     ;!4;??5|D,T-=-KKLRRTTXZZZr   tagstrc                    | j         | j        z   }t          j                            | j        |          }|r!| j        ||                                         S d                    |          S )z
        Returns the text of the end tag.

        If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`.
        z</{}>)r:   r>   r   	endendtagsearchr&   endformat)r   rB   startms       r   get_endtag_textzHTMLExtractor.get_endtag_text   sb      4;. ''e<< 	'<aeegg.. >>#&&&r   attrsSequence[tuple[str, str]]c                   || j         v r|                     ||           d S | j                            |          rC| j        s|                                 r(| j        s!d| _        | j                            d           | 	                                }| j        r6| j
                            |           | j                            |           d S | j                            |           || j        v r|                                  d S d S )NTr2   )r   handle_startendtagr
   is_block_levelr   rA   r   r   r*   get_starttag_textr   r   CDATA_CONTENT_ELEMENTSclear_cdata_mode)r   rB   rL   texts       r   handle_starttagzHTMLExtractor.handle_starttag   s   $/!!##C///F7!!#&& 	'DK 	'D<N<N<P<P 	'Y]Yc 	'DJM  &&&%%'': 	(Jc"""Kt$$$$$M  &&&d111%%''''' 21r   c                   |                      |          }| j        rH| j                            |           || j        v r,| j        r%| j                                        |k    rn| j        %t          | j                  dk    rt                              | j	        | j
        | j        z   t          |          z   d                    r| j                            d           nd| _        d| _        | j                            | j        j                            d                    | j                                       | j                            d           g | _        d S d S | j                            |           d S )Nr   r2   TFr#   

)rK   r   r   r*   r   popr%   blank_line_rematchr&   r:   r>   r   r   r
   r+   r,   r-   )r   rB   rT   s      r   handle_endtagzHTMLExtractor.handle_endtag   sj   ##C((: 	'Kt$$$dj  j z~~''3.. j  4:!## &&t|D4Dt{4RUXY]U^U^4^4_4_'`aa 'K&&t,,,, #'DK"
$$TW%6%<%<RWWT[=Q=Q%R%RSSS$$V,,,  $# M  &&&&&r   datac                    | j         rd|v rd| _         | j        r| j                            |           d S | j                            |           d S )Nr2   F)r   r   r   r*   r   r   r\   s     r   r(   zHTMLExtractor.handle_data   s]    ; 	 44<<DK: 	'Kt$$$$$M  &&&&&r   is_blockc                   | j         s| j        r| j                            |           dS |                                 r |rt
                              | j        | j        | j	        z   t          |          z   d                   r|dz  }nd| _        | j        r| j        d         nd}|                    d          s/|                    d          r| j                            d           | j                            | j        j                            |                     | j                            d           dS | j                            |           dS )z Handle empty tags (`<data>`). Nr2   Tr3   r#   rW   )r   r   r   r*   rA   rY   rZ   r&   r:   r>   r%   r   endswithr
   r+   r,   )r   r\   r_   items       r   handle_empty_tagzHTMLExtractor.handle_empty_tag   sR   : 	' 	'Kt$$$$$!! 	'h 	'""4<0@4;0NQTUYQZQZ0Z0[0[#\]] # #(,=4=$$2D==(( +T]]4-@-@ +$$T***M  !2!8!8!>!>???M  (((((M  &&&&&r   c                    |                      |                                 | j                            |                     d S )Nr_   )rc   rQ   r
   rP   )r   rB   rL   s      r   rO   z HTMLExtractor.handle_startendtag  s>    d4466AWAWX[A\A\]]]]]r   namec                Z    |                      d                    |          d           d S )Nz&#{};Fre   rc   rH   r   rf   s     r   handle_charrefzHTMLExtractor.handle_charref	  s-    gnnT22UCCCCCr   c                Z    |                      d                    |          d           d S )Nz&{};Fre   rh   ri   s     r   handle_entityrefzHTMLExtractor.handle_entityref  s-    fmmD11EBBBBBr   c                   | j         | j        z   t          |          z   dz   }| j        ||dz            dk    r|                     d           d| _        d S |                     d                    |          d           d S )N   r=   z--><Tz	<!--{}-->re   )r:   r>   r%   r&   r(   r   rc   rH   )r   r\   is      r   handle_commentzHTMLExtractor.handle_comment  s    t{*SYY6:<!a% E))S!!!+/D(Fk0066FFFFFr   rp   jc                l    | j         rd| _         d}d}t                                          ||          S )NFr   r1   )r   r   	updatepos)r   rp   rr   r   s      r   rt   zHTMLExtractor.updatepos  s:    ' 	+0D(AAww  A&&&r   c                Z    |                      d                    |          d           d S )Nz<!{}>Tre   rh   r^   s     r   handle_declzHTMLExtractor.handle_decl  s-    gnnT22TBBBBBr   c                Z    |                      d                    |          d           d S )Nz<?{}?>Tre   rh   r^   s     r   	handle_pizHTMLExtractor.handle_pi"  s-    hood33dCCCCCr   c                    |                     d          rdnd}|                     d                    ||          d           d S )NzCDATA[z]]>z]>z<![{}{}Tre   )
startswithrc   rH   )r   r\   rG   s      r   unknown_declzHTMLExtractor.unknown_decl%  sK    x00:eedi..tS99DIIIIIr   c                    |                                  s| j        r!t                                          |          S |                     d           |dz   S )Nz<?   )rA   r   r   parse_pir(   )r   rp   r   s     r   r~   zHTMLExtractor.parse_pi)  sW     	'4; 	'77##A&&& 	1ur   c                   |                                  s| j        r| j        ||dz            dk    r]| j        ||dz            dk    sG|                     |          }|dk    r*|                     | j        ||dz                       |dz   S |S t                                          |          S |                     d           |dz   S )	Nr=   z<![	   z	<![CDATA[r3   r1   z<!r}   )rA   r   r&   parse_bogus_commentr(   r   parse_html_declaration)r   rp   resultr   s      r   r   z$HTMLExtractor.parse_html_declaration1  s     		54; 		5|AacE"e++DL1Q34G;4V4V 11!44R<<$$T\!AE'%:;;;q5L7711!444 	1ur   r   reportc                    t                                          ||          }|dk    rdS |                     | j        ||         d           |S )Nr3   Fre   )r   r   rc   r&   )r   rp   r   posr   s       r   r   z!HTMLExtractor.parse_bogus_commentA  sU     gg))!V44"992dl1S51EBBB
r   Nz
str | None_HTMLExtractor__starttag_textc                    | j         S )z)Return full source of start tag: `<...>`.)r   r@   s    r   rQ   zHTMLExtractor.get_starttag_textP  s    ##r   c                X   | j         ||dz            dk    r*|                     | j         ||dz                       |dz   S d | _        |                     |          }|dk     r*|                     | j         ||dz                       |dz   S | j         }|||         | _        g }t          j                            ||dz             }|s
J d            |                                }|                    d          	                                x| _
        }||k     rt          j                            ||          }|sn|                    ddd          \  }	}
}|
sd }nI|d d         dcxk    r|dd          k    s"n |d d         d	cxk    r|dd          k    rn n
|dd         }|rt                              |          }|                    |		                                |f           |                                }||k     |||                                         }|d
vr|                                 \  }}d| j        v rM|| j                            d          z   }t#          | j                  | j                            d          z
  }n|t#          | j                  z   }|                     |||                    |S |                    d          r|                     ||           n4|| j        v r|                     |           |                     ||           |S )Nr=   z</>r   r1   z#unexpected call to parse_starttag()r}   'r3   ")>/>r2   r   )r&   r(   r   check_for_whole_start_tagr   tagfind_tolerantrZ   rG   grouplowerlasttagattrfind_tolerantr)   r*   r?   getposcountr%   rfindra   rO   rR   set_cdata_moderU   )r   rp   endposr&   rL   rZ   krB   rJ   attrnamerest	attrvaluerG   r5   r>   s                  r   parse_starttagzHTMLExtractor.parse_starttagT  s`   <!a% E))T\!AE'2333q5L#//22A::T\!AE'2333q5L,&qx0 +11'1Q3??;;;;;uIIKK"[[^^11333s&jj,227A>>A ()1a(8(8%HdI , 		2A2$8888)BCC.88882A2#777723377777%adO	 ;&//	::	LL(..**I6777A &jj ah%%''k!!![[]]NFFt+++$"6"<"<T"B"BBT122/55d;;<  #d&:";";;WQvX.///M<< 	-##C//// d111##C(((  e,,,r   )r
   r   )r.   r/   )r.   r;   )rB   rC   r.   rC   )rB   rC   rL   rM   )rB   rC   )r\   rC   )r\   rC   r_   r;   )rf   rC   )rp   r/   rr   r/   r.   r/   )rp   r/   r.   r/   )r   )rp   r/   r   r/   r.   r/   )r.   rC   )__name__
__module____qualname____doc__r   r    r$   propertyr:   rA   rK   rU   r[   r(   rc   rO   rj   rl   rq   rt   rv   rx   r{   r~   r   r   r   __annotations__rQ   r   __classcell__)r   s   @r   r	   r	   \   sw              	 	 	 	 	     
6 
6 
6 X
6[ [ [ [' ' ' '( ( ( (*' ' ' '6' ' ' '' ' ' '.^ ^ ^ ^D D D DC C C CG G G G' ' ' ' ' 'C C C CD D D DJ J J J                  #'O&&&&$ $ $ $6 6 6 6 6 6 6 6r   r	   )r   
__future__r   reimportlib.util	importlibsystypingr   r   markdownr   util	find_specspecmodule_from_specr   loaderexec_modulemodulescompilestarttagopenpiclose	entityref
incompleteVERBOSElocatestarttagend_tolerantlocatetagendrY   
HTMLParserr	    r   r   <module>r      s  (  # " " " " " 				     



 * * * * * * * * "!!!!!!
 ~..^,,T22
   
 # # #&L  %"*_55
   RZ''
 !rz"?@@
  #,
 (2
 4 Z) )
 % %"* & Z 
 " 
?++n n n n nJ) n n n n nr   