U
    b9                     @  s   d dl mZ d dlmZmZmZ d dlZd dlZd dl	m
  mZ d dlmZmZmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlm Z  d dl!m"Z"m#Z# G dd de"Z$dddddZ%dddddZ&dS )    )annotations)HashableMappingSequenceN)	ArrayLikeDtypeArgDtypeObjReadCsvBuffer)DtypeWarning)find_stack_level)is_categorical_dtypepandas_dtype)union_categoricals)ExtensionDtype)Index
MultiIndex)ensure_index_from_sequences)
ParserBaseis_index_colc                      s   e Zd ZU ded< ded< dd fddZd	d
ddZd	d
ddZd dddddZdddddZdd Z	d!dddddZ
  ZS )"CParserWrapperbool
low_memoryzparsers.TextReader_readerzReadCsvBuffer[str])srcc                   s  t  | | _| }|dd _ jdk	|d<  j|d<  jj	|d< dD ]}||d  qPt
|dd |d< tj|f| _ jj _ jd k} jjd krd  _n"  jj j|\ _ _ _} jd kr jr fdd	t jjD  _ntt jj _ jd d   _ jrȈ  j j jd k	sLt jd
krxt jsx  j t  jt krfdd	t! jD  _t  jt k rȈ  j  " j  #   j _ j$sr jj%dkr<t& jr<d _' ( j j j\} _ _ jd kr<| _ jjd krr|sr jd k	s`td gt  j  _ jj%dk _)d S )Nr   FZallow_leading_colsusecolson_bad_lines)Zstorage_optionsencodingZ
memory_mapcompressionZerror_bad_linesZwarn_bad_linesdtypec                   s   g | ]} j  | qS  )prefix).0iselfr   F/tmp/pip-unpacked-wheel-ck39h295/pandas/io/parsers/c_parser_wrapper.py
<listcomp>f   s    z+CParserWrapper.__init__.<locals>.<listcomp>stringc                   s$   g | ]\}}| ks| kr|qS r   r   )r!   r"   nr   r   r%   r&      s    r   T)*super__init__kwdscopypopr   	index_colr   r   valueensure_dtype_objsgetparsersZ
TextReaderr   unnamed_colsnamesheaderZ_extract_multi_indexer_columnsindex_names	col_namesr    rangeZtable_widthlist
orig_names_evaluate_usecolsAssertionErrorZusecols_dtypesetissubsetZ_validate_usecols_nameslen	enumerateZ_validate_parse_dates_presence_set_noconvert_columns_has_complex_date_colleading_colsr   Z_name_processed_clean_index_namesZ_implicit_index)r$   r   r,   keyZpassed_namesr7   	__class__)r$   r   r%   r+   -   s    


	




	zCParserWrapper.__init__None)returnc                 C  s(   z| j   W n tk
r"   Y nX d S N)r   close
ValueErrorr#   r   r   r%   rL      s    zCParserWrapper.closec                   s^   | j dk	stdd t| j D   fdd| jD }| || j}|D ]}| j| qHdS )z
        Set the columns that should not undergo dtype conversions.

        Currently, any column that is involved with date parsing will not
        undergo such conversions.
        Nc                 S  s   i | ]\}}||qS r   r   )r!   r"   xr   r   r%   
<dictcomp>   s      z9CParserWrapper._set_noconvert_columns.<locals>.<dictcomp>c                   s   g | ]} | qS r   r   r!   rN   Z
names_dictr   r%   r&      s     z9CParserWrapper._set_noconvert_columns.<locals>.<listcomp>)r;   r=   rA   r5   Z_set_noconvert_dtype_columnsr   Zset_noconvert)r$   Zcol_indicesZnoconvert_columnscolr   rQ   r%   rB      s    z%CParserWrapper._set_noconvert_columnsNz
int | Nonez_tuple[Index | MultiIndex | None, Sequence[Hashable] | MultiIndex, Mapping[Hashable, ArrayLike]])nrowsrJ   c              
     s`  z,| j r| j|}t|}n| j|}W n tk
r   | jrd| _| | j}| j	|| j
| j| jdd\} }|  | j | jd k	r|    fdd| D }| |f Y S |    Y nX d| _| j}| jjr| jrtdg }t| jjD ]H}| j
d kr||}	n|| j
| }	| j|	|dd}	||	 qt|}| jd k	rj| |}| |}t| }
d	d t||
D }| ||\}}nt| }
| jd k	st t!| j}| |}| jd k	r| |}d
d |
D }| jd kr| "|| dd t||
D }| ||\}}| #|||\}}| || j}|||fS )NFr   r   c                   s   i | ]\}}| kr||qS r   r   )r!   kvcolumnsr   r%   rO      s       z'CParserWrapper.read.<locals>.<dictcomp>z file structure not yet supportedT)try_parse_datesc                 S  s   i | ]\}\}}||qS r   r   r!   rU   r"   rV   r   r   r%   rO     s    
  c                 S  s   g | ]}|d  qS )   r   rP   r   r   r%   r&   0  s     z'CParserWrapper.read.<locals>.<listcomp>c                 S  s   i | ]\}\}}||qS r   r   rZ   r   r   r%   rO   4  s    
  )$r   r   Zread_low_memory_concatenate_chunksreadStopIterationZ_first_chunkZ_maybe_dedup_namesr;   Z_get_empty_metar/   r7   r,   r2   Z_maybe_make_multi_index_columnsr8   r   _filter_usecolsitemsrL   r5   rD   rC   NotImplementedErrorr9   r.   _maybe_parse_datesappendr   sortedzipZ_do_date_conversionsr=   r:   Z_check_data_lengthZ_make_index)r$   rS   chunksdatar5   indexZcol_dictZarraysr"   valuesZ	data_tupsZ	date_dataZalldataZ
conv_namesr   rW   r%   r]      sn    









zCParserWrapper.readzSequence[Hashable])r5   rJ   c                   s@   |  | j|  d k	r<t|t kr< fddt|D }|S )Nc                   s$   g | ]\}}| ks| kr|qS r   r   )r!   r"   namer)   r   r%   r&   B  s      z2CParserWrapper._filter_usecols.<locals>.<listcomp>)r<   r   r@   rA   )r$   r5   r   r)   r%   r_   >  s    
zCParserWrapper._filter_usecolsc                 C  sL   t | jjd }d }| jjdkrD| jd k	rD| || j| j\}}| _||fS )Nr   )r:   r   r6   rD   r/   rE   r4   )r$   r5   Z	idx_namesr   r   r%   _get_index_namesG  s      zCParserWrapper._get_index_namesTint)rh   rY   c                 C  s   |r|  |r| |}|S rK   )Z_should_parse_datesZ
_date_conv)r$   ri   rh   rY   r   r   r%   rb   R  s    
z!CParserWrapper._maybe_parse_dates)N)T)__name__
__module____qualname____annotations__r+   rL   rB   r]   r_   rk   rb   __classcell__r   r   rG   r%   r   )   s   
  g	r   zlist[dict[int, ArrayLike]]dict)rf   rJ   c                   s
  t | d  }g }i }|D ]  fdd| D }dd |D }dd |D }t|dkr|t|g }|tkr||t  | }t	|rt
|dd	| < qt|tr| }	|	|| < qt|| < q|rd
|}
dd|
 dg}tj|tt d |S )z
    Concatenate chunks of data read with low_memory=True.

    The tricky part is handling Categoricals, where different chunks
    may have different inferred categories.
    r   c                   s   g | ]}|  qS r   )r.   )r!   chunkrj   r   r%   r&   d  s     z'_concatenate_chunks.<locals>.<listcomp>c                 S  s   h | ]
}|j qS r   rT   )r!   ar   r   r%   	<setcomp>f  s     z&_concatenate_chunks.<locals>.<setcomp>c                 S  s   h | ]}t |s|qS r   )r   rP   r   r   r%   rv   h  s      r[   F)Zsort_categories, z	Columns (zK) have mixed types. Specify dtype option on import or set low_memory=False.)
stacklevel)r:   keysr@   npZfind_common_typeobjectrc   strr.   r   r   
isinstancer   Zconstruct_array_typeZ_concat_same_typeZconcatenatejoinwarningswarnr
   r   )rf   r5   Zwarning_columnsresultZarrsZdtypesZnumpy_dtypesZcommon_typer   Z
array_typeZwarning_namesZwarning_messager   rt   r%   r\   X  s>    



r\   z*DtypeArg | dict[Hashable, DtypeArg] | Nonez*DtypeObj | dict[Hashable, DtypeObj] | None)r   rJ   c                   s0   t  tr fdd D S  dk	r,t S  S )zc
    Ensure we have either None, a dtype object, or a dictionary mapping to
    dtype objects.
    c                   s   i | ]}|t  | qS r   )r   )r!   rU   rT   r   r%   rO     s      z%ensure_dtype_objs.<locals>.<dictcomp>N)r~   rr   r   rT   r   rT   r%   r1     s
    
r1   )'
__future__r   typingr   r   r   r   Znumpyr{   Zpandas._libs.parsersZ_libsr3   Zpandas._typingr   r   r   r	   Zpandas.errorsr
   Zpandas.util._exceptionsr   Zpandas.core.dtypes.commonr   r   Zpandas.core.dtypes.concatr   Zpandas.core.dtypes.dtypesr   Zpandasr   r   Zpandas.core.indexes.apir   Zpandas.io.parsers.base_parserr   r   r   r\   r1   r   r   r   r%   <module>   s$     1@