
    h"                        S SK r S SKJr  S SKrS SKrS SKJr  S SKJr  S SK	r	S SK
rS SK
Jr  S SKJr  S SKJrJrJr   S SKJr  S SKJrJr   S SKrS SKJr  S S	KJr  S S
KJ r    S SK!r"\	RF                  R(                  r$S r%S r&\	RF                  RB                  S 5       r'\	RF                  RB                  S 5       r(\	RF                  R2                  S 5       r)\	RF                  R2                  S 5       r*\	RF                  R2                  S 5       r+S r,S r-S r.\	RF                  R^                  S 5       r0\	RF                  R2                  S 5       r1\	RF                  R2                  S 5       r2S r3\	RF                  R2                  S 5       r4S r5\	RF                  Rm                  SS\Rn                  " 5       /5      \	RF                  Rm                  SS5      S 5       5       r8S r9S  r:S! r;S" r<S# r=S$ r>S% r?S& r@\	RF                  R2                  S' 5       rA\	RF                  R2                  S( 5       rB\	RF                  R2                  S) 5       rCS* rDS+ rE\	RF                  R2                  S, 5       rFS- rG\	RF                  R2                  \	RF                  R                  \	RF                  R                  S.5      \	RF                  R                  S/5      S0 5       5       5       5       rJ\	RF                  Rm                  S1S2 S3 S4 S5 /5      \	RF                  Rm                  S6S7S8/5      S9 5       5       rKS: rLS; rMS< rNS= rOS> rPS? rQ\	RF                  R                  S@ 5       rSg! \ a    Sr GNf = f! \ a    S=rr GNf = f! \ a    Sr" GN f = f)A    N)OrderedDict)copytree)Decimal)fs)util)_check_roundtrip_roundtrip_table_test_table)_read_table_write_table)dataframe_with_lists)alltypes_samplec                 8   [         R                  " S/ SQ05      n[        R                  " [        SS9   [        XS-  SS9  S S S 5        [        R                  " [        SS9   [        XS-  SS	9  S S S 5        g ! , (       d  f       N>= f! , (       d  f       g = f)
Na         z"Unsupported Parquet format versionmatchztest_version.parquetz2.2versionz%Unsupported Parquet data page version)data_page_version)patablepytestraises
ValueErrorr   )tempdirr   s     Z/home/james-whalen/.local/lib/python3.13/site-packages/pyarrow/tests/parquet/test_basic.pytest_parquet_invalid_versionr!   ;   s    HHc9%&E	z)M	NU&<<eL 
O	z *! 
"U&<<',	.
" 
" 
O	N
" 
"s   A:#B:
B
Bc                      [         R                  " / SQS-  5      n [         R                  R                  U /S/S9nSS/nU H  n[	        XS9  M     g )Nr   i f0namesi   i   )data_page_size)r   arrayTablefrom_arraysr   )arrt
page_sizestarget_page_sizes       r    test_set_data_page_sizer.   E   sQ    
((9v%
&C
cU4&1A 7#J&< '    c                  2    [        S5      n [        U SSSS9  g )Nd   
   r   2.4)r&   write_batch_sizer   )r
   r   r   s    r    test_set_write_batch_sizer6   O   s    Eb1er/   c                      [        S5      n [        U SSSS9  [        R                  " [        5         [        U SSSS9  S S S 5        g ! , (       d  f       g = f)Nr1   r   r2   r3   )dictionary_pagesize_limitr&   r   r   )r
   r   r   r   	TypeErrorr5   s    r    "test_set_dictionary_pagesize_limitr:   X   sH    EUa$&7 
y	!#(*E	; 
"	!	!s   A
Ac            
         / n [         R                  R                  [        SS95      nU R	                  [         R
                  R                  U/S-  5      5        [        5       u  p#[         R                  R                  U5      nU R	                  [         R
                  R                  U/S-  5      5        S H!  nS H  nU  H  n[        USUUS9  M     M     M#     g )Nr2   sizer   )z1.0z2.0)TF2.6)r   r   use_dictionary)	r   RecordBatchfrom_pandasr   appendr(   from_batchesr   r   )tablesbatchdf_r   r?   r   s          r    test_chunked_table_writerH   d   s     FNN&&B'?@E
MM"((''!45 "EBNN&&r*E
MM"((''!45++N 5&7#13   , ,r/   c                 D   [        SS9n[        R                  R                  U5      n[	        USS0SS9  [        U S-  5      n[        US5       n[        X$SS	9  S S S 5        [        R                  " USS
9nUR                  U5      (       d   eg ! , (       d  f       N<= f)Nr2   r<   
memory_mapTr>   read_table_kwargsr   tmp_filewbr   )rJ   r   r   r(   rA   r   stropenr   pqread_pandasequalsr   rF   r   filenamef
table_reads         r    test_memory_maprY   w   s    	b	!BHH  $EU|T.B"$ 7Z'(H	h	Uu- 
T:JU#### 
	   B
Bc                 D   [        SS9n[        R                  R                  U5      n[	        USS0SS9  [        U S-  5      n[        US5       n[        X$SS	9  S S S 5        [        R                  " US
S9nUR                  U5      (       d   eg ! , (       d  f       N<= f)Nr2   r<   buffer_sizei  r>   rK   rM   rN   r   i   )r\   rO   rU   s         r    test_enable_buffered_streamr]      s    	b	!BHH  $EU}d.C"$ 7Z'(H	h	Uu- 
d;JU#### 
	rZ   c                 \   [         R                  R                  [         R                  " S/5      /S/5      nSnX-  nUR	                  5       (       a   e[        U[        U5      5        UR	                  5       (       d   e[        [        U5      5      nUR                  U5      (       d   eg )N*   intsz	foo # bar)	r   r(   r)   r'   existsr   rP   r   rT   )r   r   rV   pathrX   s        r    test_special_chars_filenamerc      s    HH  "((B4.!1F8<EHD{{}}D	";;===SY'JU####r/   c                  (   [         R                  " [        SS9   [        R                  " S 5        S S S 5        [         R                  " [        SS9   [        R
                  " S 5        S S S 5        g ! , (       d  f       NG= f! , (       d  f       g = f)NNoner   )r   r   r9   rR   
read_tableParquetFile r/   r    test_invalid_sourceri      sY     
y	/
d 
0 
y	/
t 
0	/ 
0	/ 
0	/s   A2B2
B 
Bc                 d   SSK Jn   " S S5      nU S-  n[        R                  " S/ SQ05      n[	        XC5        UR                  SUS	9   [        R                  " [        S
S9   [        R                  " US/S9  S S S 5        [        R                  " [        SS9   [        R                  " USS/S9  S S S 5        [        R                  " [        SS9   [        R                  " X4R                  S9  S S S 5        [        R                  " [        5         [        R                  " U 5        S S S 5        [        R                  " U5      nXT:X  d   e S S S 5        g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       Nh= f! , (       d  f       g = f)Nr   )mockc                       \ rS rSrS rSrg);test_read_table_without_dataset.<locals>.MockParquetDataset   c                     [        S5      e)NMockParquetDataset)ImportError)selfargskwargss      r    __init__Dtest_read_table_without_dataset.<locals>.MockParquetDataset.__init__   s    233r/   rh   N)__name__
__module____qualname____firstlineno__ru   __static_attributes__rh   r/   r    rp   rm      s    	4r/   rp   test.parquetr   r   z#pyarrow.parquet.core.ParquetDataset)newzthe 'filters' keywordr   )integer=r   )filterszthe 'partitioning' keywordweekcolor)partitioningzthe 'schema' argumentschema)unittestrk   r   r   r   patchr   r   r   rR   rf   r   OSError)r   rk   rp   rb   r   results         r    test_read_table_without_datasetr      s$   4 4 ^#DHHc9%&E	9?Q	R]]:-DEMM$)<(=> F]]:-IJMM$fg->? K]]:-DEMM$||4 F ]]7#MM'" $t$ 
S	REEJJEE $# 
S	Rsl   F!%E<!F!E.5!F!E?5"F!F.%F!
E+	'F!.
E<	8F!?
F		F!
F	F!!
F/c                  j    [         R                  " [        [        S5      5      /S/S9n [	        U SS9  g )Ni@  r#   r$   r   )row_group_size)r   r   listranger   )r+   s    r    (test_file_with_over_int16_max_row_groupsr      s,     	$uU|$%dV4AQq)r/   c                  V   [        SS9n [        R                  R                  U 5      n[        R                  R	                  UR                  5        Vs/ s H  o"R                  S5      S S PM     snUR                  R                  S9nUR                  R                  S5      R                  [        R                  " 5       :X  d   eUR                  R                  S5      R                  [        R                  " [        R                  " 5       5      :X  d   e[        USS9  g s  snf )	Nr2   r<   r   r$   null	null_listr>   r   )r   r   r(   rA   r)   itercolumnschunkr   r%   fieldtyper   list_r   )rF   r   cols      r    test_empty_table_roundtripr      s    	b	!B HH  $EHH  %*%6%6%89%8c1bq	%89ll   ! "E <<f%**bggi777<<k*//288BGGI3FFFFu 	:s   D&c                      [         R                  " 5       n [        R                  R	                  U SS9n[        U5        g )NF)preserve_index)pd	DataFramer   r(   rA   r   )rF   emptys     r    test_empty_table_no_columnsr      s.    	BHH  E :EUr/   c            	      J   [        [        R                  " 5       [        R                  " [        R                  " 5       5      S9n / [        SSS9//nU Vs/ s H:  n[        R
                  " U[        R                  " U 5      S9R                  5       PM<     nnU Vs/ s H5  n[        R                  R                  U[        R                  " U 5      S9PM7     nn[        R                  R                  U[        R                  " U 5      5      n[        U5        g s  snf s  snf )N)int32list_stringr   )Gr   r   )r   r   r   r   stringr'   structflattenr@   r)   r   r(   rC   r   )colsdatarE   	my_arrays
my_batchestbls         r    1test_write_nested_zero_length_array_chunk_failurer      s    hhjHHRYY[)D 1&9<=D #$"U %biio6>>@"  $  )*(e ..,,U299T?,K(  *
((


BIIdO
<CS$*s   AD<D c                    U S-  n[         R                  " S[        R                  " S[        R                  S905      n[        X!5        [        U5      nUR                  5       n[        R                  " X$5        [        U 5      S-   n[         R                  " S[        R                  " S[        R                  S905      n[        X!5        [        U5      nUR                  5       n[        R                  " X$5        g )Nzzzz.parquetxr2   dtype)r   r   nparangeint64r   r   	to_pandastmassert_frame_equalrP   )r   rb   rF   rX   df_reads        r    test_multiple_path_typesr     s     ]"D	sBIIb9:	;BT"J""$G"& w<-'D	sBIIb9:	;BT"J""$G"&r/   c                 l   U S-  n[         R                  " S/ SQ05      n[        X!5        [        R                  " U5      n[        U5      nUR                  U5      (       d   e[        R                  " [        5         [        U[        R                  " 5       S9  S S S 5        g ! , (       d  f       g = f)Nr|   r   r   
filesystem)r   r   r   r   FSProtocolClassr   rT   r   r   r9   r   
FileSystem)r   rb   r   fs_protocol_objr   s        r    test_fspathr     s    ^#DHHc9%&E**40O)F== 
y	!O@ 
"	!	!s   >B%%
B3r   name)data.parquetu   例.parquetc                 l   [         R                  " S/ SQ05      nX-  n[        R                  " U[	        U5      5        [
        R                  " U 5         [        R                  " X!S9nS S S 5        WR                  U5      (       d   eUR                  5         UR                  5       (       a   e[
        R                  " U 5         [        R                  " X2US9  S S S 5        [        R                  " U5      nUR                  U5      (       d   eg ! , (       d  f       N= f! , (       d  f       NN= f)Nr   r   r   )r   r   rR   write_tablerP   r   
change_cwdrf   rT   unlinkra   )r   r   r   r   rb   r   s         r    test_relative_pathsr   $  s     HHc9%&E>D NN5#d)$		!t; 
"==KKM{{}} 
	!
uz: 
"]]4 F== 
"	! 
"	!s   DD%
D"%
D3c                      [         R                  " [        5         [        R                  " S5        S S S 5        g ! , (       d  f       g = f)Nzi-am-not-existing.parquet)r   r   FileNotFoundErrorrR   rf   rh   r/   r    test_read_non_existing_filer   =  s(    	(	)
12 
*	)	)s	   ;
A	c                       " S S[         R                  5      n [        R                  " [        SS9   [
        R                  " U " S5      5        S S S 5        g ! , (       d  f       g = f)Nc                        \ rS rSrS rS rSrg)3test_file_error_python_exception.<locals>.BogusFileiD  c                     [        S5      eNzorglubZeroDivisionErrorrr   rs   s     r    read8test_file_error_python_exception.<locals>.BogusFile.readE      #I..r/   c                     [        S5      er   r   r   s     r    seek8test_file_error_python_exception.<locals>.BogusFile.seekH  r   r/   rh   N)rw   rx   ry   rz   r   r   r{   rh   r/   r    	BogusFiler   D  s    	/	/r/   r   r   r   r/   )ioBytesIOr   r   r   rR   rf   )r   s    r     test_file_error_python_exceptionr   C  s?    /BJJ / 
(		:
in% 
;	:	:s   A
A'c                 &   [         R                  " S/ SQ05      n[        R                  " U[	        U S-  5      5        [        [	        U S-  5      S5       n[        R                  " U5      nS S S 5        WR                  U5      (       d   e[        [	        U S-  5      S5       n[        R                  " [         R                  " U5      5      nS S S 5        UR                  U5      (       d   eg ! , (       d  f       N= f! , (       d  f       N8= f)Nr   r   r   rb)	r   r   rR   r   rP   rQ   rf   rT   
PythonFile)r   r   rW   r   s       r    test_parquet_read_from_bufferr   P  s    HHc9%&ENN5#g678	c'N*+T	2aq! 
3==	c'N*+T	2ar}}Q/0 
3== 
3	2 
3	2s   C1%+D1
C?
Dc            
         [         R                  " [        [        [        [        S5      5      5      5      n [         R                  " [        [        [        [        S5      5      5      5      n[         R                  " SS/S-  5      nX /n[         R                  R                  USS/S9n[        XDSSSS	9  [        XDSS/S/S	9  [        XDSSS/SS/S	9  [         R                  R                  X X// S
QS9n[        XUSS/SS/S9  [         R                  R                  U/S/S9n[        R                  " [        SS9   [        XDSSS9  S S S 5        g ! , (       d  f       g = f)Nr1   TF2   r   br$   gzip)expectedcompressionr?   use_byte_stream_splitr   r   cdr   r   )r   r?   r   tmpBYTE_STREAM_SPLIT only supportsr   )r   r   r?   )r   r'   r   mapfloatr   intr(   r)   r   r   r   IOError)	arr_floatarr_intarr_bool
data_floatr   mixed_tables         r    test_byte_stream_splitr   ^  sJ   c%s456IhhtCU3Z012Gxxu*+H'JHH  C: >E U$)G
 U%(E,/52
 U%(#J,/:7
 ((&&	g'O-A ' CK[%(#J,/:7 HH  (E7 ;E	w&G	Hd(-	/ 
I	H	Hs   ?E
E"c           
      $   [         R                  " [        [        [        [        S5      5      5      [         R                  " SS5      S9n[         R                  " [        [        [        [        S5      5      5      [         R                  " SS5      S9n[         R                  " [        [        [        [        S5      5      5      [         R                  " SS5      S9n[         R                  " SS	/S
-  5      nXU/n[         R                  R                  U/ SQS9n[        UUSS	SS9  [        R                  R                  U S5      n[        R                  " XgSS	SS9  [        R                  " U5      nUR                   R#                  S5      n	UR                   R#                  S5      n
U	R$                  S:X  d   eU
R$                  S:X  d   e[        UUSS	SSSS.S9  [         R                  R                  XX4// SQS9n[        UUS	SS9  g )Nr1      r   r      	      TFr   r   r   r   r$   r   )r   r   r?   store_decimal_as_integerr|   )r   r?   r   r   r   INT32INT64DELTA_BINARY_PACKEDr   r   )r   r   r?   r   column_encodingr   )r   r?   r   )r   r'   r   r   r   r   
decimal128r(   r)   r   osrb   joinrR   r   rg   r   columnphysical_type)r   arr_decimal_1_9arr_decimal_10_18arr_decimal_gt18r   data_decimalr   pqtestfile_path
pqtestfilepqcol_decimal_1_9pqcol_decimal_10_18r   s               r    test_store_decimal_as_integerr    s   hhtCs$<=$&MM!Q$79Oc'5:&>!?&(mmB&:<xxS%*%= >%']]2q%9;xxu*+H#8HILHH  _ EE U#!'$).2	4 ggll7N;ONN5%"',02
 0J"))003$++2215**g555,,777 U#!'$).233& ((&&	-=H" ' $K [)$).24r/   c                     [         R                  " [        [        [        [        S5      5      5      5      n [         R                  " [        [        [        [        S5      5      5      5      n[         R                  " [        S5       Vs/ s H  n[        U5      PM     sn[         R                  " 5       S9n[         R                  " [        S5       Vs/ s H  n[        U5      R                  S5      PM     sn[         R                  " S5      S9n[         R                  " / SQS-  5      n[         R                  R                  XX4U// SQS9n[        XfSS	S	S
S	S.S9  [        XfSS
S9  [        XfSS
SS
S.S9  [        XfSS
SSS.S9  [        XfSS
SSSS.S9  [        XfSSS0S9  [        R                  " [        SS9   [        XfSS
S
S	S.S9  S S S 5        [        R                  " [         SS9   [        XfSSS
S
S.S9  S S S 5        [        R                  " ["        SS9   [        XfSSS9  S S S 5        [        R                  " ["        SS9   [        XfSSS0S9  S S S 5        [        R                  " ["        5         [        XfS/SS
0S9  S S S 5        [        R                  " ["        5         [        XfSS
0S9  S S S 5        [        R                  " ["        5         [        XfSS/SS	S
S.S9  S S S 5        [        R                  " ["        5         [        XfSSSS	S
S.S9  S S S 5        [        R                  " [$        5         [        XfSSS9  S S S 5        g s  snf s  snf ! , (       d  f       GN= f! , (       d  f       GN}= f! , (       d  f       GNb= f! , (       d  f       GNE= f! , (       d  f       GN&= f! , (       d  f       GN	= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f) Nr1   r   r2   )FTFF   )r   r   r   r   er$   FBYTE_STREAM_SPLITPLAINr   )r   r?   r  r  r   DELTA_LENGTH_BYTE_ARRAYDELTA_BYTE_ARRAYr  RLEr   r   )r   r   r  z)DELTA_BINARY_PACKED encoder only supportsz+'RLE_DICTIONARY' is already used by defaultRLE_DICTIONARYz/Unsupported column encoding: 'MADE_UP_ENCODING'r   MADE_UP_ENCODINGr   )r   r  )r   r?   r   r  T)r   r'   r   r   r   r   r   rP   binaryzfillr(   r)   r   r   r   r   r   r   r9   )r   r   r   arr_binarr_flbar   r   s          r    test_column_encodingr     s   c%s456IhhtCU3Z012Ghhc
3
1A
3"))+FGxx#(:.:aQb	:.RYYr]DHxx3b89H((&&	W9' ' )K [u+>+>+2+>&@A [$)%,.
 [$)+2+@+2&45 [$)+2+@+D&FG [$)+2+@+=+=&?@ [$)&)5\3 
w>
@(-/6/6/B*D	E
@ 
wH
J(-/D/6/6*8	9
J 
zJ
L(-)9	;
L 
zN
P(-*-/A)B	D
P 
z	"),*-w	9 
# 
z	"*-w	9 
# 
z	"(-03u/4/B/6*8	9 
# 
z	"(-/3/4/B/6*8	9 
# 
y	!(-)-	/ 
"	!o 4.`
@ 
@
J 
J
L 
L
P 
P 
#	" 
#	" 
#	" 
#	" 
"	!sx   
M$M  M%1M7"N	N?N-0N?OO"O3%
M47
N	
N
N*-
N<?
O
O"
O03
Pc            
         [         R                  " [        [        [        [        S5      5      5      5      n X /n[         R                  R                  USS/S9n[        X"SSS9  [        X"SSS9  [        X"SS	S
.S9  [        X"SSSS
.S9  [        X"SSS9  [        X"SSS9  / SQn[        R                  " 5       nU H9  u  pV[        R                  " [        [        45         [        X$UUS9  S S S 5        M;     g ! , (       d  f       MM  = f)N  r   r   r$   r   r   )r   r   compression_levelr   snappyr  )r   r   r   r   lz4r   ))r$     )r   i)re   i  )lzo   )r   r#  )r   r'   r   r   r   r   r(   r)   r   r   r   r   r   r   r   r   )r*   r   r   invalid_combinationsbufcodeclevels          r    test_compression_levelr-  :  s    
((4Ct-.
/C:DHH  c3Z 8E U'(*
 U'(* U'-H!=? U-.Q'79
 U'(* U'(*8
**,C.]]J01+02 21 /11s   !C::
D		c                      [         R                  " / SQ5      n Sn[         R                  R                  U /U/5      n[	        USS0S9nSnUR
                  S   R                  U:X  d   eg )N)r   r   r   r   r&  zprohib; ,	{}flavorspark)write_table_kwargsprohib______r   )r   r'   r(   r)   r	   r   r   )a0r   r   r   expected_names        r     test_sanitized_spark_field_namesr5  g  sb    	/	"BDHH  "v.Ee78KLF"M==  M111r/   c                  0   [        SS9n [        R                  R                  U 5      n[        R
                  " 5       n[        XSSS9  UR                  S5        [        USS9nUR                  S5        [        US	S9nUR                  U5      (       d   eg )
Ni'  r<   SNAPPYr>   )r   r   r   T)use_threadsF)
r   r   r(   rA   r   r   r   r   r   rT   )rF   r   r*  table1table2s        r    test_multithreaded_readr;  r  sz    	e	$BHH  $E
**,C5AHHQK$/FHHQK%0F==    r/   c                     [         R                  " [        R                  " S5      // SQS9n [        R
                  R                  U R                  5       5      n[        R                  " 5       n[        XSS9  UR                  S5        [        U5      nUR                  U5      (       d   e[        R                  " [         5         [        XSS9  S S S 5        g ! , (       d  f       g = f)Nr&  )ABCD)columns)
chunk_sizer   )r   r   r   r   r   r(   rA   reset_indexr   r   r   r   r   rT   r   r   r   )r   r   r*  r   s       r    test_min_chunksizerE    s    <<10DEDHH  !1!1!34E
**,C+HHQKF==	z	"UA. 
#	"	"s   C
C)c                 `   [         R                  " [        S5      [        [        SS5      5      [        R
                  " SS5      R                  S5      [        R
                  " SSS	S
9/ SQ[         R                  " [        S5      5      [         R                  " SSS9[         R                  " SSSS9[         R                  " SSSS9S.	5      n[        R                  R                  U5      nU S-  n [        X#SS9  UR                  5       (       a   eg ! [        R                   a     N.f = f)Nabcr   r&  r      u1      @      @float64r   TFT20130101periodsz
US/Eastern)rP  tzns)rP  freq)	r   r   r   r   r  rW   ghirM   r3   r   )r   r   r   r   r   r   astypeCategorical
date_ranger   r(   rA   r   ArrowExceptionra   )r   rF   pdfrV   s       r    (test_write_error_deletes_incomplete_filer\    s     
DK q!-IIaO2248IIc3i@/NN4;7MM*a@MM*a-9;MM*adK	M 	
NB ((

r
"C#H 	SE2        s   4
D D-,D-c                     Sn [         R                  " U5        g ! [         a  nXR                  S   ;   d   e S nAg S nAff = f)Nznonexistent-file.parquetr   )rR   rf   	Exceptionrs   )r   rb   r  s      r    test_read_non_existent_filer_    s<    %D!
d !vvay   !s    
A>Ac                     [         R                  " 5          [         R                  " SS9  [        R                  " U S-  5        S S S 5        g ! , (       d  f       g = f)Nerror)actionzv0.7.1.parquet)warningscatch_warningssimplefilterrR   rf   )datadirs    r    test_read_table_doesnt_warnrg    s:    		 	 	"W-
g 001 
#	"	"s   .A
Ac                  |   [         R                  R                  [         R                  " SS/5      /S/5      n [        R
                  " 5       n[        R                  " XSS9  UR                  S5        [        R                  " U5      n[        R                  " UR                  5       U R                  5       5        g )NrG  defsome_colr   r   r   )r   r(   r)   r'   r   r   rR   r   r   rf   r   r   r   )r   rW   	roundtrips      r    test_zlib_compression_bugrm    s~     HH  "((E5>":!;j\JE


ANN50FF1Ia I)--/1BCr/   c                 @   [        U S-  5      n[        R                  " [        R                  [
        4SS9   [        US5       n S S S 5        [        R                  " U5        S S S 5        [        R                  " [        R                  [
        4SS9   [        US5       nUR                  S5        S S S 5        [        R                  " U5        S S S 5        g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       NO= f! , (       d  f       g = f)Nr|   zsize is 0 bytesr   rN   zsize is 4 bytess   ffff)
rP   r   r   r   ArrowInvalidr   rQ   rR   rf   write)r   rb   rW   s      r    test_parquet_file_too_smallrq    s    w'(D	1.
0$ 
d	
0 
1.
0$GGG 
d	
0 
0	 
0 
0 
0 
0sG   C-CC-D#C>5D
C*	&C--
C;>
D	D
Dzignore:RangeIndex:FutureWarningz.ignore:tostring:DeprecationWarning:fastparquetc                    [         R                  " S5      n[        R                  " [	        S5      [	        [        SS5      5      [        R                  " SSSS9/ S	Q[        R                  " S
SS9[        R                  " / SQ5      S.5      n[        R                  " U5      n[        U S-  5      n[        R                  " X4S S9  UR                  U5      nUR!                  5       n["        R$                  " X&5        [        U S-  5      nUR'                  Xr5        [        R(                  " U5      nUS   R+                  [,        5      US'   ["        R$                  " UR!                  5       U5        g )NfastparquetrG  r   r&  rJ  rK  rL  r   rM  rN  r   rO  )r   r   r   )r   r   r   r   r  rW   zcross_compat_arrow.parquetrk  z cross_compat_fastparquet.parquetrW   )r   importorskipr   r   r   r   r   r   rY  rX  r   r   rP   rR   r   rg   r   r   r   rp  rS   rW  object)	r   fprF   r   
file_arrowfp_filedf_fpfile_fastparquettable_fps	            r    $test_fastparquet_cross_compatibilityr|    s"   
 
		]	+B	eeAqk"395$z150		

B HHRLE W;;<JNN5$7nnZ(GE"$ 7%GGHHH"~~./H gnnV$BsG(,,.3r/   array_factoryc                  8    [         R                  " SS /S-  5      $ Nr   r2   r   r'   rh   r/   r    <lambda>r    s    BHHaY^$r/   c                  T    [         R                  " SS /S-  5      R                  5       $ r  r   r'   dictionary_encoderh   r/   r    r  r    s    BHHaY^$668r/   c                  8    [         R                  " SS /S-  5      $ N r2   r  rh   r/   r    r  r    s    BHHb$Z"_%r/   c                  T    [         R                  " SS /S-  5      R                  5       $ r  r  rh   r/   r    r  r    s    BHHb$Z"_%779r/   read_dictionaryFTc                    [         R                  R                  SU " 5       05      n[        R                  " 5       n[
        R                  " X#SS9  UR                  S5        U(       a  S/OS n[
        R                  " USUS9nUR                   HG  nUR                  u  nUR                  5       S   nUR                  5       UR                  S-  :X  a  MG   e   g )	Nr   T)r?   r   F)r8  r  r       )r   r(   from_pydictr   r   rR   r   r   rf   rA  chunksbuffers
to_pybytesr=   )r}  r  
orig_tablebior   r   r   r*  s           r    test_buffer_contentsr    s     %%umo&>?J
**,CNN:48HHQK!0ugdOMM#5*9;E }}**mmoa ~~388e#3333 r/   c                     [         R                  " [         R                  " [        S5      5      /S/S9nU S-  n[        R
                  " XSS9  [        R                  " U5      nUR                  U5      (       d   eg )Nr&  r`   r$   zarrow-10480.pyarrow.gzGZIPrk  )r   r   r'   r   rR   r   rf   rT   )r   r   rb   r   s       r    "test_parquet_compression_roundtripr    sb    
 HHbhhuQx()&:E--DNN5F3]]4 F==r/   c                    [         R                  R                  [         R                  " / SS9/S/5      nU S-  nSn[        R
                  " X!R                  5       n[        U5       H  nUR                  U5        M     S S S 5        [        R                  " U5      nUR                  R                  U:X  d   e[        U5       H*  nUR                  U5      R                  U5      (       a  M*   e   g ! , (       d  f       Nz= f)Nr   r   r#   zempty_row_groups.parquetr   )r   r(   r)   r'   rR   ParquetWriterr   r   r   rg   metadatanum_row_groupsread_row_grouprT   )r   r   rb   
num_groupswriterrV  readers          r    test_empty_row_groupsr  )  s    HH  "((2G"<!=vFE//DJ			$	-z"Au% # 
. ^^D!F??))Z777:$$Q'..u5555  
.	-s   $C55
Dc                     S /S-  nUR                  S/5        [        R                  R                  U/S/5      nU S-  n[        R
                  " X#5        [        R                  " U5      nX$:X  d   eg )Ni   r   r  zarrow-11607.parquet)rB   r   r(   r)   rR   r   rf   )r   r   r   rb   r:  s        r    test_reads_over_batchr  ;  sg    6WDKK HH  $(4E**DNN5]]4 F??r/   c                    U S-  nUR                  SS9  [        R                  " / SQ/ SQ/SS/S9n[        R                  " X!S	-  5        [        R                  " / S
Q/ SQ/SS/S9n[        R                  " X1S-  5        [        R
                  " [        U5      5      n[        R                  " / SQ/ SQ/SS/S9nXE:X  d   eg )N dataset_column_order_permutationT)exist_okr   )皙?皙?333333?r   r   r$   zdata1.parquet)皙?      ?333333?)r&  r   rH  zdata2.parquet)r   r   r   r&  r   rH  )r  r  r  r  r  r  )mkdirr   r   rR   r   rf   rP   )r   casedata1data2r   r:  s         r     test_permutation_of_column_orderr  I  s    77DJJJHHi.sCjAENN501HHlI.sCjAENN501MM#d)$EXX)57 #J(F ??r/   c                    U S-  n[         R                  " [        [        S5      5      5      nSn[         R                  " U/U-  [        U5       Vs/ s H  nSU 3PM
     snS9n[
        R                  " XQ5        [        R                  " [        SS9   [
        R                  " USU-  S	9  S S S 5        [        R                  " [        SS9   [
        R                  " XS
9  S S S 5        [
        R                  " USU-  S	9nXe:X  d   e[
        R                  " USU-  S
9nXe:X  d   e[
        R                  " U5      nXe:X  d   eg s  snf ! , (       d  f       N= f! , (       d  f       N= f)Nzlargethrift.parquetr2   r"  some_long_column_name_r$   z1Couldn't deserialize thrift:.*Exceeded size limitr   r   )thrift_string_size_limit)thrift_container_size_limitr1   r   )r   r'   r   r   r   rR   r   r   r   r   rf   )r   rb   r'   num_colsrV  r   gots          r    test_thrift_size_limitsr  \  s9   **DHHT%)_%EHHH	(5:8_E_'s+_EGE NN5	E
G 	dR(]C
G 
E
G 	dA
G
 --sX~
FC<<
--!h,
GC<<
--
C<<# F
G 
G
G 
Gs   E
EE
E
E,c                    U S-  n[         R                  " S/ SQ05      n[        R                  " X!SS9  [        R                  " USS9nX#:X  d   e[        UR                  5       5      nUS   US   :w  d   eUS   US   sUS'   US'   U S	-  nUR                  U5        [        R                  " US
S9nXb:w  d   eU[         R                  " S/ SQ05      :X  d   e[        R                  " [        SS9   [        R                  " USS9nSSS5        [        R                  " US
S9nUR                  5       n	X:w  d   eU	[         R                  " S/ SQ05      :X  d   e[        R                  " USS9n[        R                  " [        SS9   UR                  5       nSSS5        g! , (       d  f       N= f! , (       d  f       g= f)zQCheck that checksum verification works for datasets created with
pq.write_table()zcorrect.parquetr   r   r   r   r&  Twrite_page_checksumpage_checksum_verification   $   zcorrupted.parquetFr   r   r   r&  CRC checksum verificationr   N)r   r   rR   r   rf   	bytearray
read_byteswrite_bytesr   r   r   rg   r   )
r   original_path
table_origtable_checkbin_datacorrupted_pathtable_corruptrG   corrupted_pq_filetable_corrupt2s
             r    +test_page_checksum_verification_write_tabler  w  s   
 //M3-.JNN:$G --$OK$$$
 1134H B<8B<'''!)"x|HRL(2, 22Nx( MM.=BDM &&&BHHc<%89999 
w&A	BMM.TJ 
C
 ~BGI&++-N'''RXXsL&9:::: ~BFH 
w&A	B""$ 
C	B! 
C	B  
C	Bs   4F&F7&
F47
Gc                    [         R                  " S/ SQ05      nU S-  n[        R                  " UUSS9  [	        UR                  5       5      n[        U5      S:X  d   eUS   n[        R                  " USS9nX:X  d   e[        UR                  5       5      nUS	   US
   :w  d   eUS
   US	   sUS	'   US
'   U S-  n[        X'5        XtR                  -  nUR                  U5        [        R                  " USS9n	X:w  d   eU	[         R                  " S/ SQ05      :X  d   e[        R                  " [        SS9   [        R                  " USS9n
SSS5        g! , (       d  f       g= f)zTCheck that checksum verification works for datasets created with
pq.write_to_datasetr   r  correct_dirTr  r   r   r  r  r  corrupted_dirFr  r  r   N)r   r   rR   write_to_datasetr   iterdirlenrf   r  r  r   r   r  r   r   r   )r   r  original_dir_pathoriginal_file_path_listr  r  r  corrupted_dir_pathcorrupted_file_pathr  rG   s              r    test_checksum_write_to_datasetr    sw   
 3-.J  -/
),02
 ##4#<#<#>?&'1,,,+A.M--$OK$$$
 1134H B<8B<'''!)"x|HRL(2, !?23,/A/AA##H- MM"5=BDM &&&BHHc<%89999 
w&A	BMM-$O 
C	B	Bs   =E
E*)Tr  collectionsr   r   rc  shutilr   decimalr   r   pyarrowr   r   pyarrow.testsr   pyarrow.tests.parquet.commonr   r	   r
   pyarrow.parquetparquetrR   r   r   rq   pandasr   pandas.testingtestingr   pyarrow.tests.pandas_examplesr   r   numpyr   mark
pytestmarkr!   r.   r6   r:   rH   rY   r]   rc   ri   r   slowr   r   r   r   r   r   parametrizeLocalFileSystemr   r   r   r   r   r  r   r-  r5  r;  rE  r\  r_  rg  rm  rq  rs  filterwarningsr|  r  r  r  r  r  r  r  datasetr  rh   r/   r    <module>r     s  $ 
 # 	       7 7 F
B< [[  
.=   ; ; 3 3$ $ $ $ $$2 * *    8 ' '$A  "


(  !@A  B *3
& #/L34l}/@*2Z2 ! !" / /  ! !4!2 D D  =>LM!4 N ?  !4H $8%9	+  *UDM:4 ;4(	 6$&67%t 1P 1P]  	B  NB
  	Bs5   M M, $M= M)(M),
M:9M:=N	N	