
    Mh]                    r   S r SSKJr  SSKrSSKrSSKrSSKJrJrJ	r	  SSK
r
SSK
Jr  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKrSSKJrJr  SSK J!r!  SSK"J#r#  SSK$J%r%J&r&J'r'J(r(J)r)  \(       a  SSK*J+r+J,r,J-r-J.r.J/r/  S S jr0   S!           S"S jjr1 " S S5      r2 " S S\25      r3 " S S\25      r4\" \!S   S9       S#                 S$S jj5       r5\" \!S   S9SSS\Rl                  \Rl                  SS4                 S%S jj5       r7g)&zparquet compat     )annotationsN)TYPE_CHECKINGAnyLiteral)catch_warnings)using_pyarrow_string_dtype)_get_option)lib)import_optional_dependencyAbstractMethodError)doc)find_stack_level)check_dtype_backend)	DataFrame
get_option)_shared_docs)arrow_string_types_mapper)	IOHandles
get_handleis_fsspec_urlis_urlstringify_path)DtypeBackendFilePath
ReadBufferStorageOptionsWriteBufferBaseImplc                2   U S:X  a  [        S5      n U S:X  a-  [        [        /nSnU H  n U" 5       s  $    [        SU 35      eU S:X  a
  [        5       $ U S:X  a
  [        5       $ [        S	5      e! [         a  nUS[	        U5      -   -  n SnAMi  SnAff = f)
zreturn our implementationautozio.parquet.engine z
 - NzUnable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:pyarrowfastparquetz.engine must be one of 'pyarrow', 'fastparquet')r   PyArrowImplFastParquetImplImportErrorstr
ValueError)engineengine_classes
error_msgsengine_classerrs        C/var/www/html/env/lib/python3.13/site-packages/pandas/io/parquet.py
get_enginer0   3   s    /0%7
*L1#~% + C l
 	
 }	=	   
E
FF%  1gC00
1s   A00
B:BBstorage_optionsc                   [        U 5      nUb  [        SSS9n[        SSS9nUb-  [        XR                  5      (       a  U(       a  [	        S5      eOIUb%  [        XR
                  R                  5      (       a  O![        S[        U5      R                   35      e[        U5      (       aq  Ucn  Uc4  [        S5      n[        S5      n UR                  R                  U 5      u  pUc3  [        S5      nUR                  R                  " U40 U=(       d    0 D6u  pO(U(       a!  [!        U5      (       a  US	:w  a  [        S
5      eSn	U(       dY  U(       dR  [        U["        5      (       a=  [$        R&                  R)                  U5      (       d  [+        XSSUS9n	SnU	R,                  nXYU4$ ! [        UR                  4 a     Nf = f)zFile handling for PyArrow.Nz
pyarrow.fsignore)errorsfsspecz8storage_options not supported with a pyarrow FileSystem.z9filesystem must be a pyarrow or fsspec FileSystem, not a r#   rbz8storage_options passed with buffer, or non-supported URLFis_textr1   )r   r   
isinstance
FileSystemNotImplementedErrorspecAbstractFileSystemr)   type__name__r   from_uri	TypeErrorArrowInvalidcore	url_to_fsr   r(   ospathisdirr   handle)
rF   fsr1   modeis_dirpath_or_handlepa_fsr5   pahandless
             r/   _get_path_or_handlerP   U   s    $D)N	~*<I+HXFB0@0@!A!A)N   Jr;;3Q3Q$R$Rb**+-  ^$$"+I6B.|<E%*%5%5%>%>t%D" :/9F!'!6!6"#2#8b"B 
&"8"8DDL STTG~s++n--
 %
  B&&7 r/ s   	F+ +GGc                  @    \ rS rSr\SS j5       rSS jrS	S
S jjrSrg)r      c                D    [        U [        5      (       d  [        S5      eg )Nz+to_parquet only supports IO with DataFrames)r9   r   r)   )dfs    r/   validate_dataframeBaseImpl.validate_dataframe   s     "i((JKK )    c                    [        U 5      eNr   )selfrT   rF   compressionkwargss        r/   writeBaseImpl.write       !$''rW   Nc                    [        U 5      erY   r   )rZ   rF   columnsr\   s       r/   readBaseImpl.read   r_   rW    )rT   r   returnNone)rT   r   rY   )re   r   )	r?   
__module____qualname____firstlineno__staticmethodrU   r]   rb   __static_attributes__rd   rW   r/   r   r      s%    L L(( (rW   c                      \ rS rSrSS jr     S	             S
S jjrSSS\R                  SS4       SS jjrSr	g)r%      c                4    [        SSS9  SS KnSS KnXl        g )Nr#   z(pyarrow is required for parquet support.extrar   )r   pyarrow.parquet(pandas.core.arrays.arrow.extension_typesapi)rZ   r#   pandass      r/   __init__PyArrowImpl.__init__   s    "G	
 	 	8rW   Nc                   U R                  U5        SUR                  SS 5      0n	Ub  XIS'   U R                  R                  R                  " U40 U	D6n
UR
                  (       aO  S[        R                  " UR
                  5      0nU
R                  R                  n0 UEUEnU
R                  U5      n
[        UUUSUS LS9u  pn[        U[        R                  5      (       a|  [        US5      (       ak  [        UR                   ["        [$        45      (       aF  [        UR                   [$        5      (       a  UR                   R'                  5       nOUR                   n Ub-  U R                  R(                  R*                  " U
U4UUUS.UD6  O+U R                  R(                  R,                  " U
U4UUS.UD6  Ub  UR/                  5         g g ! Ub  UR/                  5         f f = f)	Nschemapreserve_indexPANDAS_ATTRSwb)r1   rJ   rK   name)r[   partition_cols
filesystem)r[   r~   )rU   poprs   Tablefrom_pandasattrsjsondumpsrx   metadatareplace_schema_metadatarP   r9   ioBufferedWriterhasattrr|   r(   bytesdecodeparquetwrite_to_datasetwrite_tableclose)rZ   rT   rF   r[   indexr1   r}   r~   r\   from_pandas_kwargstabledf_metadataexisting_metadatamerged_metadatarL   rO   s                   r/   r]   PyArrowImpl.write   s    	#.6

8T8R-S38/0**2D1CD88)4::bhh+?@K % 5 5B!2BkBO11/BE.A+!-/
+ ~r'8'899//>..e==.--u55!/!4!4!;!;!=!/!4!4	 )  11" !,#1)    ,," !,)	
  " #w" #s   "AG G(Fc                   SUS'   0 n	US:X  a  SSK Jn
  U
" 5       nUR                  U	S'   O6US:X  a  [        R                  U	S'   O[        5       (       a  [        5       U	S'   [        SSS	9nUS
:X  a  SU	S'   [        UUUSS9u  pn U R                  R                  R                  " U4UUUS.UD6nUR                  " S0 U	D6nUS
:X  a  UR                  S
SS9nUR                  R                  (       aN  SUR                  R                  ;   a4  UR                  R                  S   n[         R"                  " U5      Ul        UUb  UR'                  5         $ $ ! Ub  UR'                  5         f f = f)NTuse_pandas_metadatanumpy_nullabler   )_arrow_dtype_mappingtypes_mapperr#   zmode.data_manager)silentarraysplit_blocksr6   )r1   rJ   )ra   r~   filtersF)copys   PANDAS_ATTRSrd   )pandas.io._utilr   getpd
ArrowDtyper   r   r	   rP   rs   r   
read_table	to_pandas_as_managerrx   r   r   loadsr   r   )rZ   rF   ra   r   use_nullable_dtypesdtype_backendr1   r~   r\   to_pandas_kwargsr   mappingmanagerrL   rO   pa_tableresultr   s                     r/   rb   PyArrowImpl.read   s}    )-$%,,<*,G/6{{^,i'/1}}^,'))/H/J^,1$?g/3^,.A+	/
+	 xx''22%	
 H '';*:;F'!++G%+@''"hoo&>&>>"*//":":?"KK#'::k#:FL" #w" #s   B=E E.rs   re   rf   snappyNNNN)rT   r   rF   zFilePath | WriteBuffer[bytes]r[   
str | Noner   bool | Noner1   StorageOptions | Noner}   list[str] | Nonere   rf   )r   boolr   DtypeBackend | lib.NoDefaultr1   r   re   r   )
r?   rg   rh   ri   ru   r]   r
   
no_defaultrb   rk   rd   rW   r/   r%   r%      s    	 #+!15+/@ @  ,@   	@ 
 @  /@  )@  
@ J $)69nn156 
 "6  46  /6  
6  6 rW   r%   c                  `    \ rS rSrSS jr     S       S	S jjr    S
   SS jjrSrg)r&   i(  c                $    [        SSS9nXl        g )Nr$   z,fastparquet is required for parquet support.ro   )r   rs   )rZ   r$   s     r/   ru   FastParquetImpl.__init__)  s     1!O
 rW   Nc                  ^^	 U R                  U5        SU;   a  Ub  [        S5      eSU;   a  UR                  S5      nUb  SUS'   Ub  [        S5      e[	        U5      n[        U5      (       a  [        S5      m	U	U4S jUS'   OT(       a  [        S	5      e[        S
S9   U R                  R                  " UU4UUUS.UD6  S S S 5        g ! , (       d  f       g = f)Npartition_onzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datahivefile_scheme9filesystem is not implemented for the fastparquet engine.r5   c                Z   > TR                   " U S40 T=(       d    0 D6R                  5       $ )Nr{   )open)rF   _r5   r1   s     r/   <lambda>'FastParquetImpl.write.<locals>.<lambda>T  s,    &++d3.4"3df3rW   	open_withz?storage_options passed with file object or non-fsspec file pathT)record)r[   write_indexr   )
rU   r)   r   r;   r   r   r   r   rs   r]   )
rZ   rT   rF   r[   r   r}   r1   r~   r\   r5   s
         `  @r/   r]   FastParquetImpl.write1  s     	#V#(BK  V##ZZ7N%$*F=!!%K 
 d#/9F#F; Q  4(HHNN (!+  )((s   !#C
Cc                   0 nUR                  SS5      nUR                  S[        R                  5      n	SUS'   U(       a  [        S5      eU	[        R                  La  [        S5      eUb  [	        S5      e[        U5      nS n
[        U5      (       a6  [        S5      nUR                  " US	40 U=(       d    0 D6R                  US
'   OQ[        U[        5      (       a<  [        R                  R                  U5      (       d  [        US	SUS9n
U
R                   n U R"                  R$                  " U40 UD6nUR&                  " SX#S.UD6U
b  U
R)                  5         $ $ ! U
b  U
R)                  5         f f = f)Nr   Fr   pandas_nullszNThe 'use_nullable_dtypes' argument is not supported for the fastparquet enginezHThe 'dtype_backend' argument is not supported for the fastparquet enginer   r5   r6   rI   r7   )ra   r   rd   )r   r
   r   r)   r;   r   r   r   r   rI   r9   r(   rE   rF   rG   r   rH   rs   ParquetFiler   r   )rZ   rF   ra   r   r1   r~   r\   parquet_kwargsr   r   rO   r5   parquet_files                r/   rb   FastParquetImpl.readf  sn    *,$jj)>F

?CNNC).~&%  .%  !%K  d#/9F#);;tT#Uo>SQS#U#X#XN4 c""277==+>+> !dE?G >>D	 88//GGL))U'UfU" #w" #s   0E" "E8r   r   r   )rT   r   r[   z*Literal['snappy', 'gzip', 'brotli'] | Noner1   r   re   rf   )NNNN)r1   r   re   r   )r?   rg   rh   ri   ru   r]   rb   rk   rd   rW   r/   r&   r&   (  ss     CK1533 @	3 /3 
3p 150 
 /0  
0  0 rW   r&   )r1   r!   c           	        [        U[        5      (       a  U/n[        U5      n	Uc  [        R                  " 5       OUn
U	R
                  " U U
4UUUUUS.UD6  Uc1  [        U
[        R                  5      (       d   eU
R                  5       $ g)a  
Write a DataFrame to the parquet format.

Parameters
----------
df : DataFrame
path : str, path object, file-like object, or None, default None
    String, path object (implementing ``os.PathLike[str]``), or file-like
    object implementing a binary ``write()`` function. If None, the result is
    returned as bytes. If a string, it will be used as Root Directory path
    when writing a partitioned dataset. The engine fastparquet does not
    accept file-like objects.
engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
    Parquet library to use. If 'auto', then the option
    ``io.parquet.engine`` is used. The default ``io.parquet.engine``
    behavior is to try 'pyarrow', falling back to 'fastparquet' if
    'pyarrow' is unavailable.

    When using the ``'pyarrow'`` engine and no storage options are provided
    and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
    (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
    Use the filesystem keyword with an instantiated fsspec filesystem
    if you wish to use its implementation.
compression : {{'snappy', 'gzip', 'brotli', 'lz4', 'zstd', None}},
    default 'snappy'. Name of the compression to use. Use ``None``
    for no compression.
index : bool, default None
    If ``True``, include the dataframe's index(es) in the file output. If
    ``False``, they will not be written to the file.
    If ``None``, similar to ``True`` the dataframe's index(es)
    will be saved. However, instead of being saved as values,
    the RangeIndex will be stored as a range in the metadata so it
    doesn't require much space and is faster. Other indexes will
    be included as columns in the file output.
partition_cols : str or list, optional, default None
    Column names by which to partition the dataset.
    Columns are partitioned in the order they are given.
    Must be None if path is not a string.
{storage_options}

filesystem : fsspec or pyarrow filesystem, default None
    Filesystem object to use when reading the parquet file. Only implemented
    for ``engine="pyarrow"``.

    .. versionadded:: 2.1.0

kwargs
    Additional keyword arguments passed to the engine

Returns
-------
bytes if no path argument is provided else None
N)r[   r   r}   r1   r~   )r9   r(   r0   r   BytesIOr]   getvalue)rT   rF   r*   r[   r   r1   r}   r~   r\   implpath_or_bufs              r/   
to_parquetr     s    B .#&&()fDAESWKJJ
	  %'	 	 |+rzz2222##%%rW   c           
         [        U5      n	U[        R                  La/  Sn
USL a  U
S-  n
[        R                  " U
[
        [        5       S9  OSn[        U5        U	R                  " U 4UUUUUUS.UD6$ )a  
Load a parquet object from the file path, returning a DataFrame.

Parameters
----------
path : str, path object or file-like object
    String, path object (implementing ``os.PathLike[str]``), or file-like
    object implementing a binary ``read()`` function.
    The string could be a URL. Valid URL schemes include http, ftp, s3,
    gs, and file. For file URLs, a host is expected. A local file could be:
    ``file://localhost/path/to/table.parquet``.
    A file URL can also be a path to a directory that contains multiple
    partitioned parquet files. Both pyarrow and fastparquet support
    paths to directories as well as file URLs. A directory path could be:
    ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``.
engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
    Parquet library to use. If 'auto', then the option
    ``io.parquet.engine`` is used. The default ``io.parquet.engine``
    behavior is to try 'pyarrow', falling back to 'fastparquet' if
    'pyarrow' is unavailable.

    When using the ``'pyarrow'`` engine and no storage options are provided
    and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
    (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
    Use the filesystem keyword with an instantiated fsspec filesystem
    if you wish to use its implementation.
columns : list, default=None
    If not None, only these columns will be read from the file.
{storage_options}

    .. versionadded:: 1.3.0

use_nullable_dtypes : bool, default False
    If True, use dtypes that use ``pd.NA`` as missing value indicator
    for the resulting DataFrame. (only applicable for the ``pyarrow``
    engine)
    As new dtypes are added that support ``pd.NA`` in the future, the
    output with this option will change to use those dtypes.
    Note: this is an experimental option, and behaviour (e.g. additional
    support dtypes) may change without notice.

    .. deprecated:: 2.0

dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
    Back-end data type applied to the resultant :class:`DataFrame`
    (still experimental). Behaviour is as follows:

    * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
      (default).
    * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
      DataFrame.

    .. versionadded:: 2.0

filesystem : fsspec or pyarrow filesystem, default None
    Filesystem object to use when reading the parquet file. Only implemented
    for ``engine="pyarrow"``.

    .. versionadded:: 2.1.0

filters : List[Tuple] or List[List[Tuple]], default None
    To filter out data.
    Filter syntax: [[(column, op, val), ...],...]
    where op is [==, =, >, >=, <, <=, !=, in, not in]
    The innermost tuples are transposed into a set of filters applied
    through an `AND` operation.
    The outer list combines these sets of filters through an `OR`
    operation.
    A single list of tuples can also be used, meaning that no `OR`
    operation between set of filters is to be conducted.

    Using this argument will NOT result in row-wise filtering of the final
    partitions unless ``engine="pyarrow"`` is also specified.  For
    other engines, filtering is only performed at the partition level, that is,
    to prevent the loading of some row-groups and/or files.

    .. versionadded:: 2.1.0

**kwargs
    Any additional kwargs are passed to the engine.

Returns
-------
DataFrame

See Also
--------
DataFrame.to_parquet : Create a parquet object that serializes a DataFrame.

Examples
--------
>>> original_df = pd.DataFrame(
...     {{"foo": range(5), "bar": range(5, 10)}}
...    )
>>> original_df
   foo  bar
0    0    5
1    1    6
2    2    7
3    3    8
4    4    9
>>> df_parquet_bytes = original_df.to_parquet()
>>> from io import BytesIO
>>> restored_df = pd.read_parquet(BytesIO(df_parquet_bytes))
>>> restored_df
   foo  bar
0    0    5
1    1    6
2    2    7
3    3    8
4    4    9
>>> restored_df.equals(original_df)
True
>>> restored_bar = pd.read_parquet(BytesIO(df_parquet_bytes), columns=["bar"])
>>> restored_bar
    bar
0    5
1    6
2    7
3    8
4    9
>>> restored_bar.equals(original_df[['bar']])
True

The function uses `kwargs` that are passed directly to the engine.
In the following example, we use the `filters` argument of the pyarrow
engine to filter the rows of the DataFrame.

Since `pyarrow` is the default engine, we can omit the `engine` argument.
Note that the `filters` argument is implemented by the `pyarrow` engine,
which can benefit from multithreading and also potentially be more
economical in terms of memory.

>>> sel = [("foo", ">", 2)]
>>> restored_part = pd.read_parquet(BytesIO(df_parquet_bytes), filters=sel)
>>> restored_part
    foo  bar
0    3    8
1    4    9
zYThe argument 'use_nullable_dtypes' is deprecated and will be removed in a future version.TzFUse dtype_backend='numpy_nullable' instead of use_nullable_dtype=True.)
stacklevelF)ra   r   r1   r   r   r~   )	r0   r
   r   warningswarnFutureWarningr   r   rb   )rF   r*   ra   r1   r   r   r~   r   r\   r   msgs              r/   read_parquetr     s    r fD#..0# 	 $&XC 	c=5E5GH#&99	'/#	 	 	rW   )r*   r(   re   r   )Nr6   F)rF   z1FilePath | ReadBuffer[bytes] | WriteBuffer[bytes]rI   r   r1   r   rJ   r(   rK   r   re   zVtuple[FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], IOHandles[bytes] | None, Any])Nr!   r   NNNN)rT   r   rF   z$FilePath | WriteBuffer[bytes] | Noner*   r(   r[   r   r   r   r1   r   r}   r   r~   r   re   zbytes | None)rF   zFilePath | ReadBuffer[bytes]r*   r(   ra   r   r1   r   r   zbool | lib.NoDefaultr   r   r~   r   r   z&list[tuple] | list[list[tuple]] | Nonere   r   )8__doc__
__future__r   r   r   rE   typingr   r   r   r   r   pandas._configr   pandas._config.configr	   pandas._libsr
   pandas.compat._optionalr   pandas.errorsr   pandas.util._decoratorsr   pandas.util._exceptionsr   pandas.util._validatorsr   rt   r   r   r   pandas.core.shared_docsr   r   r   pandas.io.commonr   r   r   r   r   pandas._typingr   r   r   r   r   r0   rP   r   r%   r&   r   r   r   rd   rW   r/   <module>r      s    " 	  	 
  # 5 -  > - ' 4 7  1 5   GJ .2<'
;<'<' +<' 	<'
 <'<'~
( 
(D ( D Nn h n b \"345 26&-1'+UU
.U U 	U
 U +U %U U U 6Up \"345  $-10325..6:q
&qq q +	q
 .q 0q q 4q q 6qrW   