hyb
2025-11-10 e0a856b5072c5a09f3f6de6da85abf90e00ee704
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
Ë
oñúh*ãó2—dZddlmZmZddlZddlZddlZddlZddl    Z    ddl
m Z m Z ddl mZe    j j#d«Ze    j j'd«Zd„Zed„«Zee    j j/d    d
d g«e    j j/d gd ¢«d„«««Zd„Zd„Ze    j j/ddie ddgi«fdddie ddgi«fdddgie dddgi«fddgddœe ddgi«fddgddœe dej6dgi«fg«d „«Zd!„Ze    j j/d"gd#¢«d$„«Ze    j j/d%ddg«d&„«Zd'„Z e    j j/d gd(¢«d)„«Z!e    j j/d gd*¢«d+„«Z"d,„Z#d-„Z$e    j j'd.«e    j j/d/d0d1g«d2„««Z%y)3zZ
Tests encoding functionality during parsing
for all of the parsers defined in parsers.py
é)ÚBytesIOÚ TextIOWrapperN)Ú    DataFrameÚread_csvz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningÚ pyarrow_skipcó¸—d}|}tdj|««}|j|d|¬«}tddggddg¬    «}t    j
||«y)
NÚcp1255uשלום:1234
562:123ú:)ÚsepÚencodingi2é{uשלוםÚ1234©Úcolumns©rÚencoderrÚtmÚassert_frame_equal)Ú all_parsersr ÚparserÚdataÚresultÚexpecteds      úWH:\Change_password\venv_build\Lib\site-packages\pandas/tests/io/parser/test_encoding.pyÚtest_bytes_io_inputrs]€Ø€HØ €Fä Ð+×2Ñ2°8Ó<Ó =€DØ _‰_˜T s°Xˆ_Ó >€Fä˜3 ˜*˜°
¸FÐ/CÔD€HÜ×ј& (Õ+ócó¬—|}tdj««}|j|ddd¬«}tddgg«}t    j
||«y)Nu Łaski, Jan;1ú;úutf-8)r r Úheaderu Łaski, Janér)rrrrrs     rÚtest_read_csv_unicoder"(sT€à €FÜ Ð&×-Ñ-Ó/Ó 0€Dà _‰_˜T s°WÀTˆ_Ó J€FÜÐ,¨aÐ0Ð1Ó2€HÜ×ј& (Õ+rr ú,ú    r )úutf-16zutf-16lezutf-16becó8—|}djd|«}dtj«›d}|ddœ}d}tj|«5}|j |«}t |d«5}    |    j|«ddd«tt|j |««|¬    «5}
|j|fd
|i|¤Ž} |j|
fd
|i|¤Ž} ddd«tj  «ddd«y#1swYŒ€xYw#1swYŒ4xYw#1swYyxYw) Nz)skip this
skip this too
A,B,C
1,2,3
4,5,6r#Ú__z__.csvé)r ÚskiprowsrÚwb©r r ) ÚreplaceÚuuidÚuuid4rÚ ensure_cleanrÚopenÚwriterrrr) rr r rrÚpathÚkwargsÚutf8Ú
bytes_dataÚfÚ bytes_bufferrrs              rÚtest_utf16_bom_skiprowsr82s'€ð
€Fð     ÷
‰Ø ˆSó
ð        ð”—
‘
“ ˆ~˜VÐ $€DØ aÑ (€FØ €Dä     ‰˜Ó    ð    0 $Ø—[‘[ Ó*ˆ
ä $˜Ó ð      Ø G‰GJÔ ÷     ôœ7 4§;¡;¨tÓ#4Ó5ÀÔ Eð    NÈØ$V—_‘_ TÑG°HÐGÀÑGˆFØ&v—‘ |ÑM¸dÐMÀfÑMˆH÷    Nô     ×јf hÔ/÷    0ð    0÷     ð     ú÷    Nð    Nú÷     0ð    0ús<Á    DÁ'C8Á9-DÂ&+DÃDÃ8D    Ã=DÄD     Ä    DÄDcó—tjj|d«}|}|j|dd¬«}t    |«dk(sJ‚y)Nz utf16_ex.txtr%r$)r r é2)Úosr2ÚjoinrÚlen)rÚ csv_dir_pathr2rrs     rÚtest_utf16_exampler?OsA€Ü 7‰7<‰<˜  nÓ 5€DØ €FØ _‰_˜T¨H¸$ˆ_Ó ?€FÜ ˆv‹;˜"Ò ÐÑ rcó´—tjj|d«}|}|j|dd¬«}|j    d«}|dd}d}||k(sJ‚y)Núunicode_series.csvúlatin-1)r r rr!i`u$Á köldum klaka (Cold Fever) (1994))r;r2r<rÚ    set_index)rr>r2rrÚgotrs       rÚtest_unicode_encodingrEVs^€Ü 7‰7<‰<˜ Ð&:Ó ;€DØ €Fà _‰_˜T¨$¸ˆ_Ó C€FØ × Ñ ˜aÓ  €FØ
‰)D‰/€Cà9€HØ (Š?Љ?rzdata,kwargs,expectedza
1Úar!z"a"
1Ú    quotecharú"zb
1ÚnamesÚbÚ1ú
1T)rIÚskip_blank_linesFcóú‡‡    —|}dŠdŠ    ˆˆ    fd„}|jdk(r-|dk(r(|jdd«rtjd¬    «|j||«fd
‰    i|¤Ž}t j ||«y) NurcóB•—‰|zj‰«}t|«S)N)rr)Ú_dataÚbom_dataÚbomr4s  €€rÚ_encode_data_with_bomz,test_utf8_bom.<locals>._encode_data_with_bom{s"ø€Ø˜%‘K×'Ñ'¨Ó-ˆÜxӠРrÚpyarrowrLrMTz,https://github.com/apache/arrow/issues/38676)Úreasonr )ÚengineÚgetÚpytestÚskiprrr)
rrr3rÚrequestrrSrrRr4s
        @@rÚ test_utf8_bomr[bs|ù€ð*€FØ
€CØ €Dõ!ð
     ‰ ˜Ò"Ø EŠMØ J‰JÐ)¨4Ô 0ô     ‰ ÐIÕJà ˆV_‰_Ñ2°4Ó8Ñ RÀ4Ð RÈ6Ñ R€FÜ×ј& (Õ+rcóЗtdgdgdœ«}|}|j|«}dj|«}|jt    |«|¬«}t j ||«y)Ng333333@Útest)Úmb_numÚ    multibytezmb_num,multibyte
4.8,testr+)rÚformatrrrrr)rÚ    utf_valueÚ encoding_fmtrrr rrs        rÚtest_read_csv_utf_aliasesrc‹s`€ä S E¸¸ÑAÓB€HØ €Fà×"Ñ" 9Ó-€HØ '× .Ñ .¨xÓ 8€Dà _‰_œW T›]°Xˆ_Ó >€FÜ×ј& (Õ+rzfile_path,encoding)))ÚiorÚcsvz    test1.csvr))rdrrrArB))rdrrzsauron.SHIFT_JIS.csvÚshiftjiscóR—|}||Ž}|j||¬«}t||¬«5}|j|«}|jrJ‚    ddd«tj|«t|d¬«5}    |j|    |¬«}|    jrJ‚    ddd«tj||«t|dd¬«5}    |j|    |¬«}|    jrJ‚    ddd«tj||«y#1swYŒ½xYw#1swYŒ{xYw#1swYŒ8xYw)Nr+Úrb)Úmoder)riÚ    buffering)rr0Úclosedrr)
rÚ    file_pathr ÚdatapathrÚfpathrÚfarÚfbs
          rÚtest_binary_mode_file_buffersrq—s€ð€Fá iÐ  €E؏‰˜u¨xˆÓ8€Hä     ˆe˜hÔ    'ð¨2Ø—‘ Ó$ˆØ—9’9Ј}9÷ô×ј( FÔ+ä     ˆe˜$Ô    ð 2Ø—‘ ¨hÓ7ˆØ—9’9Ј}9÷ô×ј( FÔ+ä     ˆe˜$¨!Ô    ,ð°Ø—‘ ¨hÓ7ˆØ—9’9Ј}9÷ô×ј( FÕ+÷ðú÷
ðú÷
ðús#¨ DÁ4"DÃ"DÄDÄDÄD&Ú pass_encodingcó–—|}|j|«}|jdk(r|dur|dvrtjd«t    ddgi«}t j d|d¬«5}|jd    «|jd
«|j||r|nd¬ «}t j||«ddd«y#1swYyxYw) NrTT)éé zThese cases freezeÚfooÚbarzw+)rir Úreturn_filelikezfoo
barrr+) r`rVrXrYrrr/r1Úseekrr)    rrarbrrrr rr6rs             rÚtest_encoding_temp_filerz·s¶€ð€FØ×"Ñ" 9Ó-€Hà ‡}}˜    Ò! m°tÑ&;À    ÈXÑ@Uä ‰ Ð(Ô)ä˜% % Ð)Ó*€Hä     ‰˜d¨XÀtÔ    Lð0ÐPQØ    ‰
ÔØ    ‰ˆqŒ    à—‘ ¹©XÈDÓQˆÜ
×јf hÔ/÷ 0÷0ñ0ús Á&AB?Â?Ccó^—|}d}d}d}t||gi«}tj«5}|j|›d|›j    |««|j d«|j ||¬«}tj||«|jrJ‚    ddd«y#1swYyxYw)Nz    shift-jisu    ã¦ã™ã¨uこむú
rr+)
rÚtempfileÚNamedTemporaryFiler1rryrrrrk)rrr Útitlerrr6rs        rÚtest_encoding_named_temp_filer€Ës¦€à €FØ€Hà €EØ €Dä˜% $ ˜Ó)€Hä    ×    $Ñ    $Ó    &ð¨!Ø    ‰5'˜˜D˜6Ð"×)Ñ)¨(Ó3Ô4à    ‰ˆqŒ    à—‘ ¨XÓ6ˆÜ
×јf hÔ/Ø—8’8Ј|8÷÷ñús «A.B#Â#B,)rr%z    utf-16-bez    utf-16-lezutf-32có´—d}t|j|««}t|d|¬«}tddgddgdd    ggd
d g¬ «}t    j
||«y) Nua    b
:foo    0
bar    1
baz    2r$)Ú    delimiterr u:foorrwr!Úbazr(rFrJ)rrr)r rÚ encoded_datarrs     rÚ%test_parse_encoded_special_charactersr…ßsc€ð -€Dܘ4Ÿ;™; xÓ0Ó1€LÜ l¨d¸XÔ F€FäØ˜ˆm˜e Q˜Z¨%°¨Ð 4ؐc
ô€Hô×ј& (Õ+r)rNr%r    rBcó¾—|}tgd¢gd¢gd¢dœ«}tj«5}|j|d|¬«|jdk(rDd}t j t|¬    «5|j||d
¬ «ddd«    ddd«y|j||d
¬ «}ddd«tj|«y#1swYŒFxYw#1swYŒ,xYw) N)ÚRaphaelÚ    Donatelloz Miguel AngelÚLeonardo)ÚredÚpurpleÚorangeÚblue)Úsaizbo staffÚnunchunkÚkatana)ÚnameÚmaskÚweaponF)Úindexr rTúBThe 'memory_map' option is not supported with the 'pyarrow' engine©ÚmatchT)r Ú
memory_map)
rrr/Úto_csvrVrXÚraisesÚ
ValueErrorrr)rr rrÚfileÚmsgÚdfs       rÚtest_encoding_memory_maprŸðsۀð€FÜâHÚ7Ú?ñ    
ó€Hô
‰Ó    ð    G˜d؏‰˜ E°HˆÔ=à =‰=˜IÒ %ØVˆCÜ—‘œz°Ô5ñ JØ—‘ ¨xÀDÔI÷ Jà ÷    Gð    Gð_‰_˜T¨HÀˆ_Ó Fˆ÷    Gô×ј"˜hÕ'÷  Jð Jú÷     Gð    Gús*¬ACÁ-C    CÂCÃC    Ã CÃCcóÖ—|}tdgdz¬«}d|jd<tjd«5}|j    |ddd¬    «|j
d
k(rDd }t jt|¬ «5|j|d d¬«d d d «    d d d «y |j|d d¬«}d d d «tj|«y #1swYŒFxYw#1swYŒ,xYw)zO
    Chunk splits a multibyte character with memory_map=True
 
    GH 43540
    Úaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaai)ruaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaąiÿzbug-gh43540.csvFr©r”r r rTr•r–NT)r r˜) rÚilocrr/r™rVrXršr›rr)rrržÚfnamerÚdfrs      rÚ test_chunk_splits_multibyte_charr¦sæ€ð €Fä    ˜˜  dÑ*Ô    +€Bð%€B‡GGˆDMÜ     ‰Ð*Ó    +ð    C¨uØ
    ‰    %˜u¨U¸Wˆ    ÔEà =‰=˜IÒ %ØVˆCÜ—‘œz°Ô5ñ EØ—‘ ¨d¸tÔD÷ Eà ÷    Cð    Cðo‰o˜e¨D¸TˆoÓBˆ÷    Cô×ј#˜rÕ"÷  Eð Eú÷     Cð    Cús*·ACÁ9C    C CÃC    ÃCÃC(c    óê—g}d}d}d}tt|«t|«|«D]]}djt||dz«Dcgc] }t|«‘Œc}«dz}    |j    d«|j |«Œ_|}t|«}    tjd«5}
|    j|
ddd¬    «|jd
k(rEd } tjt| ¬ «5|j|
d dd¬«d d d «    d d d «y |j|
d dd¬«} d d d «tj |     «y cc}w#t
$rYŒ1wxYw#1swYŒ\xYw#1swYŒAxYw)zg
    GH 43787
 
    Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
    é€ú u𐂀Úr|rz utf8test.csvFr¢rTr•r–NT)r r˜r )ÚrangeÚordr<ÚchrrÚUnicodeEncodeErrorÚappendrrr/r™rVrXršr›rr) rÚlinesÚ line_lengthÚ
start_charÚend_charÚlnumÚcÚlinerržr¤rr¥s              rÚtest_readcsv_memmap_utf8r·"sq€ð €EØ€KØ€JØ€Hô”c˜*“o¤s¨8£}°kÓBòˆØw‰w¬¨d°D¸4±KÓ(@ÖA 1œ˜AÒAÓBÀTÑIˆð    Ø K‰K˜Ô  ð     ‰ TÕð ð€FÜ    5Ó    €BÜ     ‰˜Ó    (ð    U¨EØ
    ‰    %˜u¨U¸Wˆ    ÔEà =‰=˜IÒ %ØVˆCÜ—‘œz°Ô5ñ WØ—‘ ¨d¸tÈgÔV÷ Wà ÷    Uð    Uðo‰o˜e¨D¸TÈGˆoÓTˆ÷    Uô×ј"˜cÕ"ùò%Bøô"ò    Ú ð    ú÷ Wð Wú÷     Uð    UúsCÁE
Á$E Â*AE)Ã,EÄ    E)ÄE)Å     EÅEÅE&    Å"E)Å)E2Ú pyarrow_xfailrizw+bzw+tcó—|}d}d|vrd}tj|d¬«5}|j|«|jd«|j    |«}ddd«t gdg¬«}t j|«y#1swYŒ.xYw)NsabcdÚtÚabcdr)rir rr)r}ÚSpooledTemporaryFiler1ryrrrr)rrirÚcontentÚhandleržrs       rÚtest_not_readabler¿Ds†€ð€FØ€GØ
ˆd{؈ܠ   ×    &Ñ    &¨D¸7Ô    Cð%Àv؏ ‰ WÔØ ‰ AŒØ _‰_˜VÓ $ˆ÷%ô˜ f XÔ.€HÜ×ј"˜hÕ'÷ %ð%ús ¢4BÂB )&Ú__doc__rdrrr;r}r-ÚnumpyÚnprXÚpandasrrÚpandas._testingÚ_testingrÚmarkÚfilterwarningsÚ
pytestmarkÚ usefixturesÚ skip_pyarrowrr"Ú parametrizer8r?rEÚnanr[rcrqrzr€r…rŸr¦r·r¿©rrú<module>rÎsðñ÷ó
ÛÛ ãÛ ÷õà [‰[× 'Ñ 'ØCó€
ð{‰{×&Ñ& ~Ó6€ ò,ðñ,óð,ðØ‡×ј  d  Ó,؇×јÒ%GÓHñ0óIó-óð0ò4ò    ð‡×ÑØð
‘Y  a S˜zÓ*Ð+à    K Ð%¡y°#¸°s°Ó'<Ð=à    '˜C˜5Ð!¡9¨c°C¸°:Ð->Ó#?Ð@à    ˜3˜%°TÑ:¹IÀsÈQÈCÀjÓ<QÐRð Øe°Ñ 7Ù s˜RŸV™V Q˜KÐ(Ó )ð    
ðóñ&,ó'ð&,ò,    ,ð‡×ÑØòóñ,óð,ð0‡×ј¨4°¨-Ó8ñ0ó9ð0ò&ð(‡×ÑØÒGóñ ,óð ,ð‡×јÒ%SÓTñ(óUð(ò.#ò4#ðD‡×јÓ)؇×ј %¨ Ó0ñ (ó1ó*ñ (r