
    KJi[9                     v   d Z ddlZddlmZ ddlmZ ddlmZ ddlZ	ddl	m
Z ddlmZ d	Zej        Zej        j        Z ei           Z G d
 d          Z G d de          Z G d de          Z G d d          Zd Zd Zd Zd$dZ G d d          ZeeedZ ee          Z	  ed           eej        d           eej        d           eej        d          d d dZ  ee           Z!	 e"e#e$e%ej&        ej'        ej(        ej)        ej*        ej+        d 
Z, ee,          Z-	 d! Z.d%d"Z/d# Z0dS )&a>  Piculet is a module for scraping XML and HTML documents using XPath queries.

It consists of this single source file with no dependencies other than
the standard library, which makes it very easy to integrate into applications.

For more information, please refer to the documentation:
https://piculet.readthedocs.io/
    N)partial)
itemgetter)MappingProxyType)etree   )jselz1.2b2c                   >    e Zd ZdZddZd Zd	dZed             ZdS )
	Extractorz?Abstract base extractor for getting data out of an XML element.Nc                 F    || _         	 |t          |          nd| _        dS )zInitialize this extractor.

        :param transform: Function to transform the extracted value.
        :param foreach: Path to apply for generating a collection of values.
        N)	transformXPathforeach)selfr   r   s      X/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/imdb/parser/http/piculet.py__init__zExtractor.__init__5   s*     #8)0)<uW~~~$BB    c                      t          d          )zGet the raw data from an element using this extractor.

        :param element: Element to apply this extractor to.
        :return: Extracted raw data.
        z.Concrete extractors must implement this method)NotImplementedError)r   elements     r   applyzExtractor.applyA   s     ""RSSSr   Tc                     |                      |          }||t          u s|s|S | j        |n|                     |          S )a	  Get the processed data from an element using this extractor.

        :param element: Element to extract the data from.
        :param transform: Whether the transformation will be applied or not.
        :return: Extracted and optionally transformed data.
        )r   _EMPTYr   )r   r   r   values       r   extractzExtractor.extractI   sH     

7##Mu	L.uuDNN54I4IIr   c                 (   |                      d          }|d}n+t                               |          }|t          d          |                      d          }|                      d          }|Y|                      d          }|d}n+t                               |          }|t          d          t	          ||||          }nG|                      d	          }d
 |D             }	t          |	|                      d          ||          }|S )zGenerate an extractor from a description map.

        :param item: Extractor description.
        :return: Extractor object.
        :raise ValueError: When reducer or transformer names are unknown.
        r   NzUnknown transformerr   pathreducezUnknown reducerr   r   itemsc                 B    g | ]}t                               |          S  Rulefrom_map).0is     r   
<listcomp>z&Extractor.from_map.<locals>.<listcomp>t   s$    555!T]]1%%555r   section)r(   r   r   )gettransformers
ValueErrorreducersPathRules)
itemtransformerr   r   r   reducerr   	extractorr   ruless
             r   r$   zExtractor.from_mapU   s$    hh{++II$((55I  !6777((9%%xxhhx((G!g..>$%6777T6YPPPIIHHW%%E55u555EeTXXi-@-@(17D D DI r   )NN)T)	__name__
__module____qualname____doc__r   r   r   staticmethodr$   r!   r   r   r
   r
   2   sw        II
C 
C 
C 
CT T T
J 
J 
J 
J " " \" " "r   r
   c                   *     e Zd ZdZd fd	Zd Z xZS )r-   z4An extractor for getting text out of an XML element.Nc                     t                                          ||           t          |          | _        	 |t          j        }|| _        dS )a3  Initialize this extractor.

        :param path: Path to apply to get the data.
        :param reduce: Function to reduce selected texts into a single string.
        :param transform: Function to transform extracted value.
        :param foreach: Path to apply for generating a collection of data.
        r   N)superr   r   r   r,   concatr   )r   r   r   r   r   	__class__s        r   r   zPath.__init__~   sK     	9g>>>$KK	7>_FEEr   c                     |                      |          }t          |          dk    rd}n|                     |          }|S )zApply this extractor to an element.

        :param element: Element to apply this extractor to.
        :return: Extracted text.
        r   N)r   lenr   )r   r   selectedr   s       r   r   z
Path.apply   sC     99W%%x==AEEKK))Er   NNNr4   r5   r6   r7   r   r   __classcell__r=   s   @r   r-   r-   {   sX        >>F F F F F F&      r   r-   c                   *     e Zd ZdZd fd	Zd Z xZS )r.   z:An extractor for getting data items out of an XML element.Nc                     t                                          ||           || _        	 |t          |          nd| _        dS )a  Initialize this extractor.

        :param rules: Rules for generating the data items.
        :param section: Path for setting the root of this section.
        :param transform: Function to transform extracted value.
        :param foreach: Path for generating multiple items.
        r   N)r;   r   r3   r   r(   )r   r3   r(   r   r   r=   s        r   r   zRules.__init__   sH     	9g>>>
2)0)<uW~~~$HHr   c                 l   | j         |}nY|                      |          }t          |          dk    rt          S t          |          dk    rt          d          |d         }i }| j        D ],}|                    |          }|                    |           -t          |          dk    r|nt          S )zApply this extractor to an element.

        :param element: Element to apply the extractor to.
        :return: Extracted mapping.
        Nr   r   z.Section path should select exactly one element)r(   r?   r   r+   r3   r   update)r   r   subrootsubrootsdatarule	extracteds          r   r   zRules.apply   s     <GG||G,,H8}}!!8}}q   !QRRRqkGJ 	# 	#DW--IKK	""""4yy1}}tt&0r   rA   rB   rD   s   @r   r.   r.      sX        DDI I I I I I 1 1 1 1 1 1 1r   r.   c                   6    e Zd ZdZddZed             Zd ZdS )r#   z?A rule describing how to get a data item out of an XML element.Nc                 f    || _         	 || _        	 |t          |          nd| _        	 d| _        dS )zInitialize this rule.

        :param key: Name to distinguish this data item.
        :param extractor: Extractor that will generate this data item.
        :param foreach: Path for generating multiple items.
        N)keyr2   r   r   json_extractor)r   rP   r2   r   s       r   r   zRule.__init__   s?     1":)0)<uW~~~$<"FFr   c                     | d         }t          |t                    r|nt                              |          }t                              | d                   }t	          |||                     d                    S )zvGenerate a rule from a description map.

        :param item: Item description.
        :return: Rule object.
        rP   r   r   )rP   r2   r   )
isinstancestrr
   r$   r#   r)   )r/   item_keyrP   r   s       r   r$   zRule.from_map   sk     ;$Xs33Uhh9K9KH9U9U""4=11udhhy6I6IJJJJr   c                     i } j         |gn                      |          }|D ]C}t           j        t                    r j        n j                            |          }|A j        j         b j                            |          } j        r/t          |t                    rt          j        | j                  }|	|t          u r|||<    fd j                             |          D             }d |D             } j        r fd|D             }t          |          dk    r j        j        |n&t          t           j        j        |                    ||<   E|S )zExtract data out of an element using this rule.

        :param element: Element to extract the data from.
        :return: Extracted data.
        Nc                 H    g | ]}j                             |d           S )F)r   )r2   r   )r%   rr   s     r   r'   z Rule.extract.<locals>.<listcomp>   sD     H H H"# #n44Q%4HH H H Hr   c                 (    g | ]}||t           u|S N)r   )r%   vs     r   r'   z Rule.extract.<locals>.<listcomp>  s"    YYYAMQW!r   c                 D    g | ]}t          j        |j                  S r!   )r   selectrQ   )r%   r[   r   s     r   r'   z Rule.extract.<locals>.<listcomp>  s(    RRRadk!T-@AARRRr   r   )r   rS   rP   rT   r   r2   rQ   r   r]   r   r?   r   listmap)	r   r   rK   rJ   rI   rP   r   
raw_valuesvaluess	   `        r   r   zRule.extract   s     $ 4G99$,,w:O:O 	@ 	@G(377V$((TX=M=Mg=V=VC{~%-..w77& D:eS+A+A D Kt/BCCEMu!S		H H H H'+~'='=g'F'FH H H
YYZYYY& SRRRR6RRRFv;;!##&*n&>&FFFT^5v>>?? S		r   rZ   )r4   r5   r6   r7   r   r8   r$   r   r!   r   r   r#   r#      s_        IIG G G G& 	K 	K \	K    r   r#   c                      t          |          |           }t          |          dk    r,|D ]+}|                                                    |           *dS dS )zRemove selected elements from the tree.

    :param root: Root element of the tree.
    :param path: XPath to select the elements to remove.
    r   N)r   r?   	getparentremove)rootr   elementsr   s       r   remove_elementsrg     sn     uT{{4  H
8}}q 	0 	0G&&w//// 	0 	0r   c                 p    t          |          |           }|D ]}t          |t                    r|n,t                              |                              |          }|It          |t                    r|n,t                              |                              |          }|||j        |<   dS )a  Set an attribute for selected elements.

    :param root: Root element of the tree.
    :param path: XPath to select the elements to set attributes for.
    :param name: Description for name generation.
    :param value: Description for value generation.
    N)r   rS   rT   r
   r$   r   attrib)re   r   namer   rf   r   	attr_name
attr_values           r   set_element_attrrm     s     uT{{4  H / /&tS11 6DDt$$,,W55 	(44 7UUu%%--g66 	$.y!!/ /r   c                      t          |          |           }|D ]M}t          |t                    r|n,t                              |                              |          }||_        NdS )zSet the text for selected elements.

    :param root: Root element of the tree.
    :param path: XPath to select the elements to set attributes for.
    :param text: Description for text generation.
    N)r   rS   rT   r
   r$   r   text)re   r   ro   rf   r   element_texts         r   set_element_textrq   0  sw     uT{{4  H $ $)$44 6ttt$$,,W55 	 $	$ $r   Fc                 l    |rt           j                            |           S t          j        |           S )zBuild a tree from an XML document.

    :param document: XML document to build the tree from.
    :param force_html: Force to parse from HTML without converting.
    :return: Root element of the XML tree.
    )lxmlhtml
fromstringElementTree)document
force_htmls     r   
build_treery   ?  s3      .y##H---!(+++r   c                   $    e Zd ZdZd Zd Zd ZdS )Registryz$A simple, attribute-based namespace.c                 :    | j                             |           dS )z\Initialize this registry.

        :param entries: Entries to add to this registry.
        N)__dict__rH   )r   entriess     r   r   zRegistry.__init__N  s     
 	W%%%%%r   c                 6    | j                             |          S )zGet the value of an entry from this registry.

        :param item: Entry to get the value for.
        :return: Value of entry.
        )r}   r)   )r   r/   s     r   r)   zRegistry.getU  s     }  &&&r   c                     || j         |<   dS )zRegister a new entry in this registry.

        :param key: Key to search the entry in this registry.
        :param value: Value to store for the entry.
        N)r}   )r   rP   r   s      r   registerzRegistry.register]  s     #cr   N)r4   r5   r6   r7   r   r)   r   r!   r   r   r{   r{   K  sG        ..& & &' ' '# # # # #r   r{   )rd   set_attrset_text  |c                     t          j        ddd                    |                               dd                                                    S )Nz\s+r   r       )resubjoinreplacestripxss    r   <lambda>r   u  s:    vsBGGBKK,?,?,L,LMMSSUU r   c                     t          j        ddd                    |                                                               dd                    S )Nz
[^a-z0-9_]r   r   _)r   r   r   lowerr   r   s    r   r   r   v  s<    BF<RWWR[[5F5F5H5H5P5PQTVY5Z5Z[[ r   )firstr<   r   	pipe_joinclean	normalize)
intfloatboolr?   r   upper
capitalizelstriprstripr   c                    |D ]}|d         }|dk    rt          | |d                    '|dk    r&t          | |d         |d         |d                    S|dk    rt          | |d         |d	         
           xt          d          dS )zProcess a tree before starting extraction.

    :param root: Root of tree to process.
    :param pre: Descriptions for processing operations.
    oprd   r   r   rj   r   )rj   r   r   ro   )ro   zUnknown preprocessing operationN)rg   rm   rq   r+   )re   prestepr   s       r   
preprocessr     s      	@ 	@$Z>>D$v,////:T4<d6l$w-XXXXX:T4<d6lCCCCC>???	@ 	@r   c                 b    t          d |D             |          }|                    |           S )zExtract data from an XML element.

    :param element: Element to extract the data from.
    :param items: Descriptions for extracting items.
    :param section: Path to select the root element for these items.
    :return: Extracted data.
    c                 B    g | ]}t                               |          S r!   r"   )r%   r/   s     r   r'   zextract.<locals>.<listcomp>  s$    99944==&&999r   r(   )r.   r   )r   r   r(   r3   s       r   r   r     s7     9959997KKKE==!!!r   c                     t          |           }|                    d          }|t          ||           t          ||                    d          |                    d                    }|S )zExtract data from a document after optionally preprocessing it.

    :param document: Document to scrape.
    :param spec: Extraction specification.
    :return: Extracted data.
    r   Nr   r(   r   )ry   r)   r   r   )rw   specre   r   rK   s        r   scraper     sf     hD
((5//C
44'**DHHY4G4GHHHDKr   )FrZ   )1r7   r   	functoolsr   operatorr   typesr   	lxml.htmlrs   r   rv   r   r   __version__r   _Elementxpathr   r
   r-   r.   r#   rg   rm   rq   ry   r{   _PREPROCESSORSpreprocessorsrT   r   	_REDUCERSr,   r   r   r   r?   r   r   r   r   r   r   _TRANSFORMERSr*   r   r   r   r!   r   r   <module>r      s     
			             " " " " " "     % % % % % %       	" 
	"		F F F F F F F FR! ! ! ! !9 ! ! !H'1 '1 '1 '1 '1I '1 '1 '1T@ @ @ @ @ @ @ @F
0 
0 
0/ / /.$ $ $	, 	, 	, 	,# # # # # # # #8     ((  Z]]gch##GCHc""3''UU[[ 	 8I  YY.jjY  x&& @ @ @$	" 	" 	" 	"    r   