
    Mh;              	      V   S r SSKJr  SSKJr  SSKJr  SSKrSSKJr  SSK	r	SSK
rSSKJr  SSKJr  SSKrSS	KJr  SS
KJr  \(       a  SSKJrJrJrJr  SrSrSrSr/ SQrSr Sr!Sr"Sr#S\  S\" S\! S\# S3	r$S\  S\! S3r%Sr&S!S jr'S"S jr(S r)S r* " S S \\RV                  5      r,g)#a-  
Read a SAS XPort format file into a Pandas DataFrame.

Based on code from Jack Cushman (github.com/jcushman/xport).

The file format is defined here:

https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf
    )annotations)abc)datetimeN)TYPE_CHECKING)Appender)find_stack_level)
get_handle)
ReaderBase)CompressionOptionsDatetimeNaTTypeFilePath
ReadBufferzPHEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000  zKHEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000zPHEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000  zPHEADER RECORD*******OBS     HEADER RECORD!!!!!!!000000000000000000000000000000  )ntypenhfunfield_lengthnvar0namelabelnformnflnum_decimalsnfjnfillniformniflnifdnpos_zParameters
----------
filepath_or_buffer : str or file-like object
    Path to SAS file or object implementing binary read method.zindex : identifier of index column
    Identifier of column that should be used as index of the DataFrame.
encoding : str
    Encoding for text data.
chunksize : int
    Read file `chunksize` lines at a time, returns iterator.zBformat : str
    File format, only `xport` is currently supported.z\iterator : bool, default False
    Return XportReader object for reading file incrementally.z#Read a SAS file into a DataFrame.


a  

Returns
-------
DataFrame or XportReader

Examples
--------
Read a SAS Xport file:

>>> df = pd.read_sas('filename.XPT')

Read a Xport file in 10,000 line chunks:

>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
>>> for chunk in itr:
>>>     do_something(chunk)

z$Class for reading SAS Xport files.

z

Attributes
----------
member_info : list
    Contains information about the file
fields : list
    Contains information about the variables in the file
zRead observations from SAS Xport file, returning as data frame.

Parameters
----------
nrows : int
    Number of rows to read from data file; if None, read whole
    file.

Returns
-------
A DataFrame.
c                r     [         R                  " U S5      $ ! [         a    [        R                  s $ f = f)z1Given a date in xport format, return Python date.z%d%b%y:%H:%M:%S)r   strptime
ValueErrorpdNaT)datestrs    I/var/www/html/env/lib/python3.13/site-packages/pandas/io/sas/sas_xport.py_parse_dater'      s3      *;<< vvs    66c                ^    0 nSnU H  u  pEXX5-    R                  5       X$'   X5-  nM!     US	 U$ )z
Parameters
----------
s: str
    Fixed-length string to split
parts: list of (name, length) pairs
    Used to break up string, name '_' will be filtered from output.

Returns
-------
Dict of name:contents of string at given location.
r   r   )strip)spartsoutstartr   lengths         r&   _split_liner/      sF     CEen-335	  	CJ    c                    US:w  ah  [         R                  " [        U 5      [         R                  " S5      5      n[         R                  " SU SSU-
   35      nUR	                  US9nXS'   U$ U $ )N   S8Sz,Sdtypef0)npzeroslenr6   view)vecnbytesvec1r6   vec2s        r&   _handle_truncated_float_vecr@      sg     {xxC"((4.11VHBq6zl34yyuy%T
Jr0   c                r   [         R                  " S5      nU R                  US9nUS   nUS   nUS-  n[         R                  " [	        U 5      [         R
                  S9nSU[         R                  " US-  5      '   SU[         R                  " US	-  5      '   S
U[         R                  " US-  5      '   XV-  nXF-	  US-  SS
U-
  -   -  -  nUS-  nXSS-	  S-  S-
  S-  U-   S-   S-  US-  -  -  n[         R                  " [	        U5      4SS9nXXS'   XxS'   UR                  SS9nUR                  S5      nU$ )zZ
Parse a vector of float values representing IBM 8 byte floats into
native 8 byte floats.
z>u4,>u4r5   r7   f1i    i       i  @    i         l          A   i     l        z>f8f8)	r8   r6   r;   r9   r:   uint8whereemptyastype)	r<   r6   r>   xport1xport2ieee1shiftieee2ieees	            r&   _parse_float_vecrW      sZ   
 HHYE88%8 D$ZF$ZF ZE HHSXRXX.E+,E"((6J&
'(+,E"((6J&
'(+,E"((6J&
'( 
OE_&:"52U;K!LME 
ZE 
R<4'2-!3u<tCJ E 88SZM3DJJ9959!D;;tDKr0   c                      \ rS rSr\r    S         SS jjrSS jrS rSS jr	SS jr
SS jrSSS	 jjrS
 r\" \5      SSS jj5       rSrg)XportReader   Nc                    X0l         SU l        X l        X@l        [	        USUSUS9U l        U R
                  R                  U l         U R                  5         g ! [         a    U R                  5         e f = f)Nr   rbF)encodingis_textcompression)	_encoding_lines_read_index
_chunksizer	   handleshandlefilepath_or_buffer_read_header	Exceptionclose)selfrf   indexr]   	chunksizer_   s         r&   __init__XportReader.__init__  sv     "#!#
 #',,"5"5	 	JJL	s   A A5c                8    U R                   R                  5         g N)rd   ri   rj   s    r&   ri   XportReader.close  s    r0   c                T    U R                   R                  S5      R                  5       $ )NP   )rf   readdecoderq   s    r&   _get_rowXportReader._get_row   s"    &&++B/6688r0   c           
     (   U R                   R                  S5        U R                  5       nU[        :w  a  SU;   a  [	        S5      e[	        S5      eU R                  5       nSS/SS/S	S/S
S/SS//n[        X#5      nUS   S:w  a  [	        S5      e[        US   5      US'   X@l        U R                  5       n[        US S 5      US'   U R                  5       nU R                  5       nUR                  [        5      nU[        :H  n	U(       a  U	(       d  [	        S5      e[        USS 5      n
SS/SS/SS/SS/S	S/S
S/SS//n[        U R                  5       U5      nSS/S
S/SS/SS//nUR                  [        U R                  5       U5      5        [        US   5      US'   [        US   5      US'   Xl        SSS.n[        U R                  5       SS 5      nX-  nUS-  (       a  USUS-  -
  -  nU R                   R                  U5      n/ nSn[        U5      U
:  a  US U
 UU
S  nnUR!                  S5      n["        R$                  " SU5      n['        [)        [*        U5      5      nUS
	 UUS       US '   US!   nUS    S:X  a  US":  d  US:  a  S#U S$3n[-        U5      eUR/                  5        H  u  nn UR1                  5       UU'   M     UUS!   -  nUU/-  n[        U5      U
:  a  M  U R                  5       nU[4        :X  d  [	        S%5      eUU l        UU l        U R                   R;                  5       U l        U R?                  5       U l         U R6                   Vs/ s H  nUS&   RC                  5       PM     snU l"        [G        U R6                  5       VVs/ s H%  u  nnS'[I        U5      -   S([I        US!   5      -   4PM'     nnn[J        RL                  " U5      nUU l'        g ! [2         a     GMJ  f = fs  snf s  snnf ))Nr   z**COMPRESSED**z<Header record indicates a CPORT file, which is not readable.z#Header record is not an XPORT file.prefixrH   versionr2   OSr   created   zSAS     SAS     SASLIBz!Header record has invalid prefix.modifiedzMember header not foundset_namesasdatar   (   typenumericchar)rC   rD   6   :   rt      z>hhhh8s40s8shhh2s8shhl52sr   r   rD   zFloating field width z is not between 2 and 8.zObservation header not found.r   r*   r4   )(rf   seekrw   _correct_line1r"   r/   r'   	file_info
startswith_correct_header1_correct_header2intupdatemember_inforu   r:   ljuststructunpackdictzip
_fieldkeys	TypeErroritemsr)   AttributeError_correct_obs_headerfieldsrecord_lengthtellrecord_start_record_countnobsrv   columns	enumeratestrr8   r6   _dtype)rj   line1line2fifr   line3header1header2	headflag1	headflag2fieldnamelengthmemr   types
fieldcount
datalength	fielddatar   
obs_length
fieldbytesfieldstructfieldflmsgkvheaderxidtypelr6   s                                  r&   rg   XportReader._read_header#  sU   $$Q' N"5( !R  BCC"~	1~ay3)iQS_U+	X"::@AA*9Y+?@	)" +E#2J 7	* --/--/&&'78	//	i677gbn- qMONN1I"IO
 "$--/37B#rWbMFA;G;t}}<="-k*.E"FJ!,[-C!DI& &)B/0
$1
?"zB..J++00<	
)n/ *?+/*+ "J $))#.J --(CZPKZ56Ec
"5>2E'N~&BW~*aR!V-bT1IJn$1 wwyE!H & %//JugF7 )n/: ,,<==' 3388:&&(	48KK@Kq&	((*K@
 &dkk2
25 3q6\3U>%:!;;<2 	 
  / &  A
s   O7-P	*,P7
PPc                F    U R                  U R                  =(       d    SS9$ )NrC   nrows)ru   rc   rq   s    r&   __next__XportReader.__next__  s    yyt3!y44r0   c                   U R                   R                  SS5        U R                   R                  5       U R                  -
  nUS-  S:w  a  [        R
                  " S[        5       S9  U R                  S:  a3  U R                   R                  U R                  5        XR                  -  $ U R                   R                  SS5        U R                   R                  S5      n[        R                  " U[        R                  S9n[        R                  " US:H  5      n[        U5      S:X  a  SnOS	[        U5      -  nU R                   R                  U R                  5        X-
  U R                  -  $ )
z
Get number of records in file.

This is maybe suboptimal because we have to seek to the end of
the file.

Side effect: returns file position to record_start.
r   rD   rt   zxport file may be corrupted.)
stacklevelir5   l     @@  r2   )rf   r   r   r   warningswarnr   r   ru   r8   
frombufferuint64flatnonzeror:   )rj   total_records_lengthlast_card_bytes	last_cardixtail_pads         r&   r   XportReader._record_count  s6    	$$Q*#66;;=@Q@QQ"$)MM.+-
 "##(():):;'+=+===$$S!,1166r:MM/C	 ^^I)<<=r7a<H3r7{H$$T%6%67$/D4F4FFFr0   c                >    Uc  U R                   nU R                  US9$ )z
Reads lines from Xport file and returns as dataframe

Parameters
----------
size : int, defaults to None
    Number of lines to read.  If None, reads whole file.

Returns
-------
DataFrame
r   )rc   ru   )rj   sizes     r&   	get_chunkXportReader.get_chunk  s#     <??Dyyty$$r0   c                    UR                  SS9nUS   S:H  US   S:H  -  US   S:H  -  nUS   S:  US   S	:*  -  US   S
:H  -  US   S:H  -  nX4-  nU$ )Nzu1,u1,u2,u4r5   rB   r   f2f3r7   rJ   Z   _   .   )r;   )rj   r<   r   missmiss1s        r&   _missing_doubleXportReader._missing_double  s    HH=H)$14A.!D'Q,?go!D'T/2w$ w$  	
 	r0   c                   Uc  U R                   n[        XR                   U R                  -
  5      nX R                  -  nUS::  a  U R	                  5         [
        eU R                  R                  U5      n[        R                  " X@R                  US9n0 n[        U R                  5       H  u  pxUS[        U5      -      n	U R                  U   S   n
U
S:X  aJ  [        XR                  U   S   5      n	U R!                  U	5      n[#        U	5      n[        R$                  X'   OlU R                  U   S   S:X  aV  U	 Vs/ s H  oR'                  5       PM     nnU R(                  b*  U Vs/ s H  oR+                  U R(                  5      PM     nnUR-                  UW05        M     [.        R0                  " U5      nU R2                  c=  [.        R4                  " [7        U R                  U R                  U-   5      5      Ul        OUR;                  U R2                  5      nU =R                  U-  sl        U$ s  snf s  snf )Nr   )r6   countr*   r   r   r   r   )r   minra   r   ri   StopIterationrf   ru   r8   r   r   r   r   r   r   r@   r   rW   nanrstripr`   rv   r   r#   	DataFramerb   Indexrangerk   	set_index)rj   r   
read_linesread_lenrawdatadf_datajr   r<   r   r   r   ydfs                  r&   ru   XportReader.read  s   =IIE		D,<,< <=
 2 22q=JJL%%**84}}S:Fdll+DAsSV|$CKKN7+E	!1#{{1~n7UV++C0$S)&&Q(F2),-AXXZ->>-;<=1a$..11A=NNAq6" , \\'";;xxd&6&68H8H:8U VWBHdkk*BJ&	 . >s   ?H:+$H?)rc   r   r`   rb   ra   r   r   r   rf   rd   r   r   r   r   )Nz
ISO-8859-1Ninfer)
rf   zFilePath | ReadBuffer[bytes]r]   z
str | Nonerl   
int | Noner_   r   returnNone)r   r   )r   pd.DataFrame)r   r   rp   )r   r   r   r   )r   r   r   r   )__name__
__module____qualname____firstlineno___xport_reader_doc__doc__rm   ri   rw   rg   r   r   r   r   r   _read_method_docru   __static_attributes__ r0   r&   rY   rY      s    G
 + $*18 	
  ( 
89l\5$GL%"	 %  %r0   rY   )r%   r   r   r   )r*   r   )-r   
__future__r   collectionsr   r   r   typingr   r   numpyr8   pandas.util._decoratorsr   pandas.util._exceptionsr   pandasr#   pandas.io.commonr	   pandas.io.sas.sasreaderr
   pandas._typingr   r   r   r   r   r   r   r   r   _base_params_doc_params2_doc_format_params_doc_iterator_doc_read_sas_docr   r  r'   r/   r@   rW   IteratorrY   r  r0   r&   <module>r     s   #        , 4  ' . ' 
 R ' 
' 
(C @9 A
      2    	  ,&6r~*cll ~r0   