o
    Zh0hF-                     @   s  d Z ddlZddlmZ ddlmZmZmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ e
 Zeed dZeed dZeed dZeeedZeeedZeedZdZede dZdd Zdd Zi Z dd Z!dd Z"dd Z#e"dd d! Z$e"d"d#d$ Z%e"d%d&d' Z&e"d(d)d* Z'e"d+d,d- Z(e"d.d/d0 Z)d1d2 Z*d3d4 Z+ed5ej,Z-d6d7 Z.dS )8aV  Specific handling for some HTML elements, especially replaced elements.

Replaced elements (eg. <img> elements) are rendered externally and behave as an
atomic opaque box in CSS. In general, they may or may not have intrinsic
dimensions. But the only replaced elements currently supported in WeasyPrint
are images with intrinsic dimensions.

    N)files   )CSS
Attachmentcss)get_child_text)CounterStyle)boxes)SVGImage)LOGGER)get_url_attributezhtml5_ua.csszutf-8zhtml5_ua_form.csszhtml5_ph.css)stringcounter_styler   z 	
z[^z]+c                 C   s   |     S )a4  Transform (only) ASCII letters to lower case: A-Z is mapped to a-z.

    This is used for `ASCII case-insensitive
    <https://whatwg.org/C#ascii-case-insensitive>`_ matching.

    This is different from the :meth:`str.lower` method of Unicode strings
    which also affect non-ASCII characters,
    sometimes mapping them into the ASCII range:

    >>> keyword = 'Bac\N{KELVIN SIGN}ground'
    >>> assert keyword.lower() == 'background'
    >>> assert ascii_lower(keyword) != keyword.lower()
    >>> assert ascii_lower(keyword) == 'bac\N{KELVIN SIGN}ground'

    )encodelowerdecoder    r   E/var/www/html/rh/venv/lib/python3.10/site-packages/weasyprint/html.pyascii_lower$   s   r   c                    s(   t | dd}t fdd|D S )zDReturn whether element has a ``rel`` attribute with given link type.rel c                 3   s    | ]	}t | kV  qd S )N)r   ).0token	link_typer   r   	<genexpr>;   s    z(element_has_link_type.<locals>.<genexpr>)HTML_SPACE_SEPARATED_TOKENS_REfindallgetany)elementr   tokensr   r   r   element_has_link_type8   s   r#   c                 C   s$   |j tv rt| j | |||S |gS )zbHandle HTML elements that need special care.

    :returns: a (possibly empty) list of boxes.
    )element_tagHTML_HANDLERStag)r!   boxget_image_from_uribase_urlr   r   r   handle_elementB   s
   
r*   c                    s    fdd}|S )zDReturn a decorator registering a function handling ``tag`` elements.c                    s   | t  < | S )z;Decorator registering a function handling ``tag`` elements.)r%   )functionr&   r   r   	decoratorP   s   zhandler.<locals>.decoratorr   )r&   r-   r   r,   r   handlerN   s   r.   c                 C   s@   d|j d v r
tjntj}|| j|j | |}|j|_|j|_|S )zWrap an image in a replaced box.

    That box is either block-level or inline-level, depending on what the
    element should be.

    blockdisplay)styler	   BlockReplacedBoxInlineReplacedBoxr&   
string_setbookmark_label)r!   r'   imagetype_new_boxr   r   r   make_replaced_boxW   s   r9   imgc                 C   s   t | d|}| d}|r=|||jd d}|dur!t| ||gS |r/tj||g|_|gS |dkr5g S |du s;J g S |rKtj||g|_|gS g S )zHandle ``<img>`` elements.

    Return either an image or the alt-text.

    See: https://www.w3.org/TR/html5/embedded-content-1.html#the-img-element

    srcaltimage_orientation)urlorientationNr   )r   r   r1   r9   r	   TextBoxanonymous_fromchildren)r!   r'   r(   r)   r;   r<   r6   r   r   r   
handle_imgi   s&   	

rC   embedc                 C   sN   t | d|}| dd }|r%||||jd d}|dur%t| ||gS g S )zHandle ``<embed>`` elements, return either an image or nothing.

    See: https://www.w3.org/TR/html5/embedded-content-0.html#the-embed-element

    r;   typer   r=   r>   forced_mime_typer?   Nr   r   stripr1   r9   )r!   r'   r(   r)   r;   r7   r6   r   r   r   handle_embed   s   rJ   objectc                 C   sP   t | d|}| dd }|r%||||jd d}|dur%t| ||gS |gS )zHandle ``<object>`` elements, return either an image or the fallback.

    See: https://www.w3.org/TR/html5/embedded-content-0.html#the-object-element

    datarE   r   r=   rF   NrH   )r!   r'   r(   r)   rL   r7   r6   r   r   r   handle_object   s   rM   colgroupc                    s>   t  tjrtdd | D s fddt jD  _ gS )Handle the ``span`` attribute.c                 s   s    | ]}|j d kV  qdS )colNr,   )r   childr   r   r   r      s    z"handle_colgroup.<locals>.<genexpr>c                    s   g | ]	}t j g qS r   )r	   TableColumnBoxrA   )r   _r'   r   r   
<listcomp>   s    z#handle_colgroup.<locals>.<listcomp>)
isinstancer	   TableColumnGroupBoxr    rangespanrB   r!   r'   _get_image_from_uri	_base_urlr   rT   r   handle_colgroup   s   
r]   rP   c                    s4   t  tjr jdkr fddt jD S  gS )rO   r   c                    s   g | ]}   qS r   )copy)r   _irT   r   r   rU      s    zhandle_col.<locals>.<listcomp>)rV   r	   rR   rY   rX   rZ   r   rT   r   
handle_col   s   r`   z{http://www.w3.org/2000/svg}svgc              
   C   sx   |j d }|j d }z	t| |||}W n! ty4 } ztd| tjd|d g W  Y d}~S d}~ww t| ||gS )zUHandle ``<svg>`` elements.

    Return either an image or the fallback content.

    url_fetchercontextzFailed to load inline SVG: %szError while loading inline SVG:)exc_infoN)keywordsr
   	Exceptionr   errordebugr9   )r!   r'   r(   r)   ra   rb   r6   	exceptionr   r   r   
handle_svg   s   

ri   c                 C   s  d}d}d}g }g }d}d}g }i }	| j jdd}
| jdddD ]}|j }|jdkr5|du r5t|}q"|jdkrt|dd}|dd}|d	kratt	|
d
D ]}||vr_|| qTq"|dkrk|| q"|dkrv|du ru|}q"|dkr|du r|}q"|dkr|du rt||}q"|dkr|du rt||}q"|r||	vr||	|< q"|jdkrt|drt|d| j}|dd}|du rtd q"t||| jd}|| q"|||||||||
|	d
S )a2  Get metadata dictionary out of HTML object.

    Relevant specs:

    https://www.whatwg.org/html#the-title-element
    https://www.whatwg.org/html#standard-metadata-names
    https://wiki.whatwg.org/wiki/MetaExtensions
    https://microformats.org/wiki/existing-rel-values#HTML5_link_type_extensions

    Nlangtitlemetalinknamer   contentrd   ,authordescription	generatorzdcterms.createdzdcterms.modified
attachmenthrefz'Missing href in <link rel="attachment">)r>   rr   ra   )
rk   rr   rs   rd   authorscreatedmodifiedattachmentsrj   custom)etree_elementattribr   wrapper_element	query_allr&   r   r   mapstrip_whitespacesplitappendparse_w3c_dater#   r   r)   r   rf   r   ra   )htmlrk   rr   rs   rd   rv   rw   rx   ry   rz   rj   r!   rn   ro   keywordr>   attachment_titlert   r   r   r   get_html_metadata   s   





r   c                 C   s
   |  tS )zUse the HTML definition of "space character",
    not all Unicode Whitespace.

    https://www.whatwg.org/html#strip-leading-and-trailing-whitespace
    https://www.whatwg.org/html#space-character

    )rI   HTML_WHITESPACEr   r   r   r   r   &  s   
r   aG  
    ^
    [ 	
]*
    (?P<year>\d\d\d\d)
    (?:
        -(?P<month>0\d|1[012])
        (?:
            -(?P<day>[012]\d|3[01])
            (?:
                T(?P<hour>[01]\d|2[0-3])
                :(?P<minute>[0-5]\d)
                (?:
                    :(?P<second>[0-5]\d)
                    (?:\.\d+)?  # Second fraction, ignored
                )?
                (?:
                    Z |  # UTC
                    (?P<tz_hour>[+-](?:[01]\d|2[0-3]))
                    :(?P<tz_minute>[0-5]\d)
                )
            )?
        )?
    )?
    [ 	
]*
    $
c                 C   s    t |r|S td| | dS )zYParse datetimes as defined by the W3C.

    See https://www.w3.org/TR/NOTE-datetime

    z#Invalid date in <meta name="%s"> %rN)W3C_DATE_REmatchr   warning)	meta_namer   r   r   r   r   T  s
   
r   )/__doc__reimportlib.resourcesr   r   r   r   r   r   css.countersr   formatting_structurer	   imagesr
   loggerr   urlsr   HTML5_UA_COUNTER_STYLE	read_textHTML5_UAHTML5_UA_FORMHTML5_PHHTML5_UA_STYLESHEETHTML5_UA_FORM_STYLESHEETHTML5_PH_STYLESHEETr   compiler   r   r#   r%   r*   r.   r9   rC   rJ   rM   r]   r`   ri   r   r   VERBOSEr   r   r   r   r   r   <module>   s^    	
	
$




	
I