
    KJiV                        d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZ d	d
lmZmZmZmZmZ d	dlmZ  ej        d          Z ej        d          Zd Z eej                   eej                  z   ZddZ e	j        d          Z 	 	 ddZ! ej        d          Z" e	j        d          Z# ej        d          Z$	 	 	 	 ddZ% G d d          Z&d Z' G d de&          Z(dS ) zm
This module provides miscellaneous utilities used by the components
in the :mod:`imdb.parser.http` package.
    N)Callable)	Character)Movie)logger)Person)
_Containerflatten   )ElementTreePathRuleRules
build_tree)xpathzE(\([0-9\?]{4}(?:/[IVXLCDM]+)?\)(?: \(mini\)| \(TV\)| \(V\)| \(VG\))?)z:(title/tt|name/nm|company/co|companies=co|user/ur)([0-9]+)c                     | sdS t                               |           }|sdS t          |                    d                    S )zReturn an imdbID from an URL.N   )	re_imdbidsearchstrgroup)hrefmatchs     V/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/imdb/parser/http/utils.pyanalyze_imdbidr   ,   sG     tT""E tu{{1~~    c                 J   t          | t                    rt          t          |                     D ]}t          | |         t                    rL|t
          v rB|r|                    d| |                   | |<   |r|                    d| |                   | |<   it          | |         t          t          f          rt          | |         |||           dS t          | t                    rt          | 	                                          D ]\  }}|}t          |t                    r@|t
          v r6|r|                    d|          | |<   |r|                    d|          | |<   \t          |t          t          f          rt          | |         |||           dS dS )zIterate over the strings inside list items or dictionary values,
    substitutes movie titles and person names with the (qv) references.z	'\1' (qv)z	_\1_ (qv))lastKeyN)

isinstancelistrangelenr   _modify_keyssubdict_putRefsitems)d	re_titlesre_namesr   ikvs          r   r%   r%   9   s    !T Es1vv 	E 	EA!A$$$ El** @'||L!A$??!  A(}}\1Q4@@!AaD4,// E1y(GDDDD	E 	E 
At		 
EOO 		E 		EDAqG!S!! El** ='||L!<<!  >(}}\1==!Ad|,, E1y(GDDDD
E 
E		E 		Er   build_personhttpc                 R
   d}d}|                      d          dk    rd}	n5|                      d          dk    rd}	nd}	|                     ddd          } |                     |	d          }
t          |t                    rd |D             }|sdg}|
d	                                         }t          |
          d
k    rt                              d|
d                                                   }t          j
        dt          j        t          j        z  t          j        z            }|                    |          }|rK|                                r!||                                dz
           dk    r|                    d|          }|dd         dk    r|dd                                         }nU|dd         dk    r|dd                                         }n*|dd         dk    r|dd                                         }|]t          |t                    sE|                     d          }|dk    r'|d|                                         }||d         }n|}n|}n|}|dk    rd}g }t          |t                    r9|                    d          }g }|D ]}|                     d          }|dk    rM|                    |d|                                                    |                    ||d                    j|                    |           |                    d           t          |          }t          |          }||k    r|dg||z
  z  z  }n||k     r
|d|         }t%          |          D ]\  }}|t'          |          ||<   |dk    r|d	         }|d	         }|d	         pd}n|t'          |          }|t'          |          }|r|t(                              d|            |r6t          |t                    rd |D             }n|                                }|r6t          |t                    rd |D             }n|                                }i }|r||d<   t-          |||||||||	  	        }|rPt          |          t          |          k    r0t%          |j                  D ]\  }}||         r||         |_        n;|j        r4t          |j        t2                    r|j        j        s|r||j        _        |S )z`Return a Person instance from the tipical <tr>...</tr> strings
    found in the IMDb's web site. z....z...(z...(r
   c                     g | ]}||S  r4   .0rs     r   
<listcomp>z build_person.<locals>.<listcomp>j   s    )))q)!)))r   r   r    z(\d+ episodes.*)z(\1)Nz andz &iz& ..../zempty name or personID for "%s"c                 6    g | ]}|                                 S r4   stripr5   s     r   r8   z build_person.<locals>.<listcomp>   s     ,,,!AGGII,,,r   c                 6    g | ]}|                                 S r4   r>   )r6   ns     r   r8   z build_person.<locals>.<listcomp>   s     ...1QWWYY...r   headshot)	namepersonIDcurrentRoleroleIDnotes
billingPosmodFunctaccessSystemdata)findreplacesplitr   r   r?   r!   	re_spacesr#   recompileIMSr   startrstripappend	enumerater   _b_p_loggerdebugr   rE   rG   r   )txtrD   rH   rF   rJ   rI   rB   rG   rolesep	txt_splitrC   role_commentre_episodesep_matchcmt_idx	roleNotes	rolesplitr7   nidxlrlridr*   ridrK   personidxs                              r   r-   r-   V   s    ED
xx2	%B		 kk#vq))		#q!!I&$ ))V))) 	TFQ<D
9~~ }}S)A,77==??j!4bdRTkBD6HII%%l33 	BX^^-- 	Bhnn>N>NQR>R1SWZ1Z1Z&??7LAAL&&',3355LL"##$&&',3355LL"##(**',3355Lfd++ 	$&++C00b=='188::D(2EE (DD# !Ev~~I&$ JJsOO	 	' 	'A66#;;DrzzAeteHOO--...  455****A  &&&&YY6{{99tfr	**FF$YYCRC[F'' 	% 	%FAsHHq	777DAYFaL&BE		Vx== Bh&;SAAA  dD!! 	 ,,t,,,DD::<<D "eT"" 	".....EEKKMMED $#Zd!:%LtM M MF  )S^^s6{{22"6#566 	, 	,IC~ ,&s^
	, 
	 )
6+=y I I )"()-2)#( Mr   z[0-9]{7}build_moviez\s+Fc           
         |rd}nd}t                               d|                                           }|                    |d          }d}d}g }t	          |          dk    r4|d                                         }|d                                         }|dd	         d
k    r|d	d                                         }n|dd	         dk    r|d	d         dz   }|rB|                    |                                          r|d	t	          |           dz            }	 |rn|dd	         dk    r-|dd	         d
k    r|d	d                                         }>nD|                    d          }|dk    r||d	         	                    d          ||d	         	                    d          k    r_|d	|                             d          }|dk    r<||d	         	                    d          ||d	         	                    d          k    _|dk    rn||dz   |dz            }|
                                s|dk    r||dz   |dz            dv rnJ||dz   d         dv rn:|r||d	         d|}n
||d	         }|d	|                                         }|r}|                                }|dd	         dk    rS|                    d          }|dk    r8|r||d	         d|}n
||d	         }|d	|                                         }|d|d}|sd	}nt	          |          dk    r|d         }|sc|	rat          |t                    rLt                              |          }d                    d |	                    d          D                       }t          |t                     r/|                    d          }g }|D ]}|                    d          }|dk    rM|                    |d	|                                                    |                    ||d	                    j|                    |           |                    d	           t	          |          }t	          |          }||k    r|d	g||z
  z  z  }n||k     r
|d	|         }t'          |          D ]\  }}|t          |          ||<   |dk    r|d         }|d         }n|t          |          }|t          |          }|r|t(                              d|            |
rKd d |
                    d          D             D             }
|sg }nt          |t                     s|g}||
z  }|                                }|r9t                               d|                                          }|r|dz  }||z  }t-          ||||||||          }|r'd
|v rd |d!<   nd"|v rd#|d!<   nd$|v rd%|d!<   n	d&|v rd'|d!<   |rat	          |          t	          |          k    rAt'          |j                  D ],\  }}	 ||         r||         |_        # t2          $ r Y  nw xY w|r||d(<   |S ))z{Given a string as normally seen on the "categorized" page of
    a person on the IMDb's web site, returns a Movie instance.z ... z .... r9   r
   r0   r   r   iNz	TV SeriesizTV mini-seriesz (mini)Tr1   )r2      z????   )rm   r<   )TVVminiVGzTV moviez	TV seriesshortz (z / c                     g | ]}||S r4   r4   r6   _fs     r   r8   zbuild_movie.<locals>.<listcomp>'  s    CCC"C2CCCr   z@@r<   zempty title or movieID for "%s"c                     g | ]}||S r4   r4   rv   s     r   r8   zbuild_movie.<locals>.<listcomp>G  s    VVVbSUVrVVVr   c                 6    g | ]}|                                 S r4   r>   r6   xs     r   r8   zbuild_movie.<locals>.<listcomp>G  s     $O$O$O1QWWYY$O$O$Or   )titlemovieIDrG   rE   rF   roleIsPersonrI   rJ   z	tv serieskindz
Video Gamez
video gamezTV Movieztv moviezTV Shortztv shortstatus)rO   r#   r?   rN   r!   rV   lstripendswithrfindcountisdigitr   r   
_re_chrIDsfindalljoinr   rL   rW   rX   _b_m_loggererrorr   rE   rG   
IndexError)r[   r}   rF   r   rJ   rI   _parsingCharacter_parsingCompanyyearchrRolesrolesNoCharadditionalNotes_defSepr|   tsplitr\   rG   rc   re   first4fpIdxtmproler7   rf   rg   r*   rh   mrj   s                                r   rk   rk      s     MM#s##))++E[[!$$FDEI
6{{aq	  ""ay!!RSSz[  crc
!!## 
stt(	(	(dsdi' * 0 011 *(G}q(()!& 	:RSSz[((crc
))++  {{3bjjU455\//44dee8J8J38O8OOO$<%%c**D bjjU455\//44dee8J8J38O8OOO 2::taxq()NN 	& 0 0eD1HTAX<M6NR\6\6\"!\\\  	!$TUU|||UU3EE$%%LEete##%%C!&D  
*zz||:KK$$E{{ *',UVV}}}ee<EE!%&&MEfuf,,.."UUDDD) 	V		 EH EFC!8!8 E##F++zzCCt(<(<CCCDD&$ **S// 	' 	'A66#;;DrzzAeteHOO--...  455****A  &&&&YY6{{99tfr	**FF$YYCRC[F'' 	% 	%FAsHHq	777DAYF		Vg,, Bw;SAAA VV$O$O8I8I#8N8N$O$O$OVVV 	DDD$'' 	6DKKMME !#--_==CCEE 	SLE E7%T*;l	< 	< 	<A  #/))#AfII_,,$AfII?**"AfII?**"AfI S^^s6{{22"1=11 	 	ICS> 0!*3DJ     (Hs   Y
Y,+Y,c                       e Zd ZdZdZdZg Zg Z ej	        d          Z
d Zd Zd Zd Zdd	Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd ZdS )DOMParserBasez;Base parser to handle HTML data from the IMDb's web server.F	domparserc                     d| _         d| _        | j        j        | _        |                                  |                                  dS )zInitialize the parser.Nr.   )	_modFunct_as	__class____name___cname_initresetselfs    r   __init__zDOMParserBase.__init__v  s:    n-



r   c                 J    i | _         i | _        |                                  dS )zReset the parser.N)
_namesRefs_titlesRefs_resetr   s    r   r   zDOMParserBase.reset~  s$     r   c                     dS z/Subclasses can override this method, if needed.Nr4   r   s    r   r   zDOMParserBase._init      r   c                     dS r   r4   r   s    r   r   zDOMParserBase._reset  r   r   Nc                 4   |                                   ||| _        n| j        | _        |                     |          }|r|                    dd          }|                     |          }	 |                     |          }n2# t          $ r% | j        	                    d| j
        d           Y nw xY w| j        rI	 |                     |           n2# t          $ r% | j                            d| j
        d           Y nw xY w|                     |          }ni }	 |                     |          }n2# t          $ r% | j        	                    d| j
        d           Y nw xY w| j        r|                     |           |                     |          }|S )	zReturn the dictionary generated from the given html string;
        getRefs can be used to force the gathering of movies/persons
        references.Nz&nbsp;r9   z&%s: caught exception preprocessing DOMTexc_infoz%s: unable to gather refsz(%s: caught exception postprocessing data)r   getRefs_defGetRefspreprocess_stringrM   get_dompreprocess_dom	Exception_loggerr   r   gather_refswarn	parse_dompostprocess_data_containsObjectsset_objects_paramsadd_refs)r   html_stringr   kwdsdomrK   s         r   parsezDOMParserBase.parse  s    	

"DLL+DL,,[99 	%--h<<K,,{++C?))#.. ? ? ?""#K#'; # ? ? ? ? ?? | BB$$S))))  B B BL%%&A&*kD & B B B B BB >>#&&DDD	;((..DD 	; 	; 	;LI#{T  ; ; ; ; ;	;   	*##D)))}}T""s6   .B ,B32B3>C ,DDD5 5,E$#E$c                    	 t          |d          }|/t          d          }| j                            d| j                   |S # t          $ r4 | j                            d| j        d           t          d          cY S w xY w)z+Return a dom object, from the given string.T)
force_htmlNr0   z%s: using a fake empty DOMz %s: caught exception parsing DOMr   )r   r   r   r   r   )r   r   r   s      r   r   zDOMParserBase.get_dom  s    		"[T:::C{ nn""#?MMMJ 	" 	" 	"LA#{T  ; ; ;b>>!!!	"s   AA ;BBc                     	 t          ||          S # t          $ r( | j                            d| j        |d           g cY S w xY w)z)Return elements matching the given XPath.z*%s: caught exception extracting XPath "%s"Tr   )piculet_xpathr   r   r   r   )r   elementpaths      r   r   zDOMParserBase.xpath  si    	 $/// 	 	 	LK#{D4  A A AIII	s    /AAc                     t          |t                    rt          |          S 	 t          j        |d          S # t          $ r& | j                            d| j        d           Y dS w xY w)z Convert the element to a string.utf8)encodingz%s: unable to convert to stringTr   r0   )r   r   r   tostringr   r   r   r   r   r   s     r   r   zDOMParserBase.tostring  s    gs## 	w<<"+GfEEEE   ""#D#'; # ? ? ?rrs   < ,A,+A,c                 F    t          |                     |                    S )zClone an element.)r   r   r   s     r   clonezDOMParserBase.clone  s    $--00111r   c                    |s|S 	 | j         }n# t          $ r |cY S w xY w|D ]\  }}t          t          |dd          t                    r|                    ||          }@t          |t                    r|                    ||          }lt          |t                    rA	  ||          }# t          $ r' d}| j	        
                    || j        d           Y w xY w|S )z0Here we can modify the text, before it's parsed.r#   Nz'%s: caught exception preprocessing htmlTr   )preprocessorsAttributeErrorr   getattrr   r#   r   rM   r   r   r   r   )r   r   r   srcr#   _msgs         r   r   zDOMParserBase.preprocess_string  s.    		 .MM 	 	 		% 	 	HC'#ud33X>> 
!ggc;77C%% )11#s;;C** "%#k"2"2KK    DDL&&tT[4&HHHH s    $B00.C! C!c                     t                      }| j        |_        | j        |_        |                    |          }|                    |          }|d         | _        |d         | _        dS )zCollect references.
names refstitles refsN)
GatherRefsr   r   r   r   r   r   )r   r   grParserrefss       r   r   zDOMParserBase.gather_refs  sc    <<x!^!!#&&((..|,.r   c                     |S )z<Last chance to modify the dom, before the rules are applied.r4   r   r   s     r   r   zDOMParserBase.preprocess_dom  s    
r   c                 P    t          | j                                      |          S )zCParse the given dom according to the rules specified in self.rules.)r   rulesextractr   s     r   r   zDOMParserBase.parse_dom  s     TZ  ((---r   c                     |S )zHere we can modify the data.r4   r   rK   s     r   r   zDOMParserBase.postprocess_data  s    r   c                 j    t          |dt                    D ]}| j        |_        | j        |_        dS )ziSet parameters of Movie/Person/... instances, since they are
        not always set in the parser's code.T)yieldDictKeysscalarN)r	   r   r   rJ   r   rI   )r   rK   objs      r   r   z DOMParserBase.set_objects_params  sA     4tJGGG 	* 	*C#xC>CLL	* 	*r   c                 
   | j         rdd                    d t          | j                                                  D                       z  }|dk    r t          j        |t
          j                  }nd}dd                    d t          | j                                                  D                       z  }|dk    r t          j        |t
          j                  }nd}t          |||           || j        | j        dS )z-Modify data according to the expected output.z(%s)|c                 6    g | ]}t          j        |          S r4   rP   escaperz   s     r   r8   z*DOMParserBase.add_refs.<locals>.<listcomp>  s     EEE!1EEEr   z()Nc                 6    g | ]}t          j        |          S r4   r   rz   s     r   r8   z*DOMParserBase.add_refs.<locals>.<listcomp>  s     DDD!1DDDr   )rK   
titlesRefs	namesRefs)
r   r   r   r   keysrP   rQ   Ur   r%   )r   rK   titl_rer(   nam_rer)   s         r   r   zDOMParserBase.add_refs  s
   < 	0EEtD,<,A,A,C,C'D'DEEE! ! G %Jw55		 	sxxDDtDO,@,@,B,B'C'CDDD    F :fbd33T9h///".!_  	r   N)r   
__module____qualname____doc__r   r   r   r   r   getChildr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r4   r   r   r   r   l  s6       EEKMEfok**G        $ $ $ $L" " "  
 
 
2 2 2  ./ / /  . . .  * * *    r   r   c                    |                     d          dk    rVt                              |          }|r:|                                dk    r"| d|d|                                         z  z  } |                     dd          |fS )zManage links to references.z	/title/ttr1   r   z %sN
r9   )rL   re_yearKind_indexr   rU   endrM   )textlinkinfoyearKs       r   
_parse_refr   %  s    yy##!''-- 	/U[[]]a''ED%))++...D<<c""D((r   c            	       
   e Zd ZdZ ed ed                     ed ed                     ed ed                    gZd	 Z ed
 edee                     ed edee                    gZ	d Z
d ZdS )r   z4Parser used to gather references to movies, persons.r   z./text())key	extractorr   z./@hrefr   z./following::text()[1]c                     t          |                     d          pd|                     d          pd|                     d          pd                                          S )Nr   r0   r   r   )r   getr?   )r{   s    r   <lambda>zGatherRefs.<lambda>?  sR    *	f	f	
v	"##%%# # r   r   z#//a[starts-with(@href, "/name/nm")])foreachr   	transformr   z$//a[starts-with(@href, "/title/tt")]c                 r   i }dD ]}i ||<   |                     |g           D ]\  }}|                                }|                                }|r|s2t          |          }|dk    rt          ||| j        | j                  }n#|dk    rt          ||| j        | j                  }|||         |<   |S )N)r   r   r   )rD   rC   rJ   rI   r   )r}   r|   rJ   rI   )r  r?   r   r   r   r   r   )r   rK   resultitemr+   r,   imdbIDr   s           r   r   zGatherRefs.postprocess_dataX  s    1 	& 	&DF4Lr** & &1GGIIGGII a '**<'' &q.2hQ Q QCC]**a-1XP P PC"%tQ& r   c                     |S r   r4   r   s     r   r   zGatherRefs.add_refsk  s    r   N)r   r   r   r   r   r   _common_rules_common_transformr   r   r   r   r4   r   r   r   r   .  s.       >>d:&&	
 	
 	
 	d9oo	
 	
 	
 	d344	
 	
 	
M  	e=#+  	
 	
 	
 	e>#+  	
 	
 	
E&  &    r   r   r   )NNNr.   NN)NNNr.   NFFNNNN))r   rP   collections.abcr   imdb.Characterr   
imdb.Movier   imdb.parser.http.loggingr   imdb.Personr   
imdb.utilsr   r	   piculetr   r   r   r   r   r   r   rQ   r   r   r   r   keys_tomodify_listr"   r%   r   rY   r-   r   r   rO   rk   r   r   r   r4   r   r   <module>r     sH  $ 
 
			 $ $ $ $ $ $ $ $ $ $ $ $       + + + + + +       * * * * * * * * ? ? ? ? ? ? ? ? ? ? ? ? ? ? + + + + + + BJL  
 BJTUU	   tE,--V5N0O0OOE E E E4 fon-- 15KOp p p pf RZ
##
fom,, BJv	 8<FK;?26X X X Xvv v v v v v v vr) ) )> > > > > > > > > >r   