U
    b                     @   s>  d dl m Z  d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZmZ d dlmZ ejfddZejejd ejejejejejejejejejejejd ejejejd ejejd ejdZdd Zd	d
 Zdd Zdd Zejdddgdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Z ejdddgejd"ddgejd#ddgd$d% Z!d&d' Z"d(d) Z#d*d+ Z$d,d- Z%d.d/ Z&ejd0d1gee'd2d d3gd4d5gd6fee'd2(d7d8 d5gd d4gd6fee'd9d d3gd4d5gd6fgd:d; Z)d<d= Z*d>d? Z+d@dA Z,ejdBdCdDdCgfdEdFdGgfdHdFdCgfdIdDdGgfgdJdK Z-dLdM Z.dNdO Z/dPdQ Z0dRdS Z1dTdU Z2dVdW Z3dXdY Z4dZd[ Z5ejd\dee
j6ed4d4d5gd]d^d4d5d5ggd_d`gdad4d5d3gdbdcfdeed4d4d5gd]d^d4d5d5gd4d5d3gddfgdedf Z7ejdgdej8ej9gdhdi Z:ej;djdk Z<ejdldmdngdodp Z=ejdldmdngejd"ddgdqdr Z>ejdsde
6e	dtgd2 dugd2  d]dvdwe	dxdxdydydxdxdzdzgd]d{dwe	dHdIgd2 gd4d4d3d3d5d5d2d2gfde
j?edudtgdd|edxdzdygdd|e	dHdIggdvd{dgdad5d5d2d2ej8ej8d4d4ej8ej8d3d3gfde
j?edudtgdd|edxdzdygdd|e	dHdIggdvd{dgdad5d5d2d2ej8ej8d4d4ej8ej8d3d3gfgd}d~ Z@dd ZAejdd4d d gd d d ggdd ZBejCddd ZDdd ZEdd ZFdd ZGdd ZHejd"ddgdd ZIdd ZJejdejKd4d4d5gfejLd4d5d5gfgdd ZMdd ZNejddCdEgdd ZOdd ZPdd ZQdd ZRdd ZSdd ZTejddCdEgeUeVdddZWejddCdEgeUeVdddZXdd ZYejddCdEgdd ZZdd Z[dS )    )datetimeN)CategoricalCategoricalIndex	DataFrameIndex
MultiIndexSeriesqcutc                 C   s.   dd }t jt|||d}| j||d S )zpReindex to a cartesian production for the groupers,
    preserving the nature (Categorical) of each grouper
    c                 S   s4   t | ttfr0| j}tjtt||| jd} | S )N
categoriesordered)	
isinstancer   r   r   
from_codesnparangelenr   )ar    r   I/tmp/pip-unpacked-wheel-ck39h295/pandas/tests/groupby/test_categorical.pyf   s      z)cartesian_product_for_groupers.<locals>.fnames
fill_value)r   from_productmapreindex
sort_index)resultargsr   r   r   indexr   r   r   cartesian_product_for_groupers   s    r!   )allanycountcorrwithfirstZidxmaxZidxminlastZmadmaxmeanZmedianminnthnuniqueprodZquantileZsemsizeZskewstdsumvarc                 C   sB   t | jd}dd }| j|ddj|}|jjd dks>td S )N   c                 S   s   |   |  |  |  dS )Nr*   r(   r$   r)   r3   )groupr   r   r   	get_statsH   s
    z2test_apply_use_categorical_name.<locals>.get_statsFobservedr   C)r	   r8   groupbyDapplyr    r   AssertionError)dfcatsr5   r   r   r   r   test_apply_use_categorical_nameE   s    r?   c               
   C   s  t dddddddddg	ddddgdd} tddddddd	d
dg	| d}ttdddd}tdddd
tjgi|d}|jddd }t	|| t ddddgdddgdd}t ddddgdddgdd}t||ddd	d
gd}|jddd}tdddgddd}	tdt
d	ddg|	di}| }t	|| tddgddgddggddgd}
t |
j|
d< |
jdgdd}|dd }t	||
dg  |
d}|
jddg }t	|| d d! }||}|
jddg  }tddgdd"|_|d d#|d< t	|| tddd$d%gi}tj|jdd&d'd(d)gd*}|jj|ddt}t||d  t|jj|ddd+d |d  t	|j|ddt|dg  |j|dd}tjtd,dd- |d.d }W 5 Q R X t	||dg  td : |d/d }|t}|tjj}|d0d }W 5 Q R X tj	||dg dd1 tj	||dg dd1 t	||dg  t	||dg  t|jj|ddtj|d  t	|j|ddtj| tddd$d%d2gi}tj|jd3dd&d'd(d)gd*}|jj|ddt}t||d  t|jj|ddd4d |d  t	|j|ddt|dg  t	|j|ddd5d |dg  tdddddgi}tj|jdddd	d
gt tdd6}|j|ddt }t|j!j"|j!j#d7}t
ddddg|d}d|j_$t|| d8d9d:d;g}tj%j&dd
d<d=}t j'||dd7} ttj%(d<d
}|j| dd }|jt)| dd }t|| j"dd}	|*|	}t	|| |j| dd}|+ }| j,- }t)| .|}|.|}t |dd8d9d:d;gd>}|j|ddd?+ }t	|| t j't/d
0d@|dd7}t|}t1|2 j3d| tdAdBdCdDdEdFdGdHgd
 }t1|2 j3d| d S )INr   bcdTr
            r2      r   r@   abcdnamer   r    Fr6   zyABvaluesrO   rQ      r   zJohn P. Doez	Jane DoveZ	person_idperson_namecolumnsc                 S   s   | S Nr   xr   r   r   <lambda>u       ztest_basic.<locals>.<lambda>c                 S   s   |  djd S )NrS   r   )drop_duplicatesilocrW   r   r   r   r   |   s    ztest_basic.<locals>.frJ   object      
         (   )binsc                 S   s
   t | S rV   r   r0   xsr   r   r   rY      rZ   z
scalar maxmatchZcheck_stacklevelc                 S   s
   t | S rV   r   r(   rg   r   r   r   rY      rZ   c                 S   s   t j| ddS )Nr   Zaxisrk   rg   r   r   r   rY      rZ   c                 S   s   t j| S rV   )r   maximumreducerg   r   r   r   rY      rZ   )Zcheck_dtypeic                 S   s
   t | S rV   rf   rg   r   r   r   rY      rZ   c                 S   s
   t | S rV   rf   rg   r   r   r   rY      rZ   )labelsr   foobarbazquxd   r.   )r   r   sortr7      r$   r)   r/   r*   25%50%75%r(   )4r   r   r   listr   nanr9   r)   tmassert_frame_equalr   r0   rS   	transformr[   r\   r;   copyr   r    astypepdcutr   assert_series_equalassert_produces_warningFutureWarningr(   rm   rn   filterr"   r   rQ   r   r   rJ   randomrandintr   randnasarrayr   describecodesargsorttaker   repeatassert_index_equalstackget_level_values)r>   data	exp_indexexpectedr   cat1cat2r=   gbZexp_idxrX   gr   rA   Zgbcresult2Zresult3Zresult4Zresult5levelsr   groupeddesc_resultidx
ord_labelsord_dataZexp_catsexpcexpr   r   r   
test_basicT   s    
 

 "  
$ " $


  
r   c                 C   s   t tdddttddgtdgdgd dgd  tdgd	d
gdd}|jd	g| d}t tdddttddgtdgdgd tdgd	d
gdd}|d}t	|| d S )NrD      r   r@   ra   r   rF   rC   ZIndex1ZIndex2)r   r   r   r   r    levelr7      )
r   r   r   r   r   ranger9   	get_groupr   r   )r7   r=   r   r   r   r   r   r   test_level_get_group   s&    
r   c                  C   s   t dgd dgd  dddgd tdd	} t| jdddgd
d| _| dd  }|jdd
d}ddddddg}t|dddgd
d}ddddddgt|g}t	j
|dd gd}tdgd |dd}t|| d S )NrO      rP   highZmedlowr2   g      (@)r4   doseZoutcomesTr
   r4   r   r   )r   Zsort_remainingr   rD   r    rJ   )r   r   r   r   r   r9   Zvalue_countsr   r   r   from_arraysr   r   r   r=   r   r    r   r   r   r   (test_sorting_with_different_categoricals  s    r   r   TFc           	   	   C   s$  t td| d}t tdddg| d}tt|}t|||d}|jdd	gd
d}tj||gdd	gd}tdddg|dgd}t	j
tddd |dd }W 5 Q R X t	|| | }t	|| |tj}t	|| tj||gdd	gd}td|d}|dd }t	|| d S )Nabcrq   Zaaar   r@   r
   )missingdenserQ   r   r   Tr6   r   r   rC          @rQ   r    rU   zSelect only validFri   c                 S   s
   t | S rV   )r   r)   rW   r   r   r   rY   -  rZ   ztest_apply.<locals>.<lambda>rK   c                 S   s   dS NrC   r   rW   r   r   r   rY   9  rZ   )r   r~   r   r   r   r   r9   r   r   r   r   r   r;   r   r)   aggr   r   )	r   r   r   rQ   r=   r   r   r   r   r   r   r   
test_apply  s,      r   c              	   C   s  t ddddgdddgdd}t ddddgdddgdd}t||d	d
ddgd}ddgd
 |d< |jdddg| d}tj||ddgd
 gdddgd}tdtd	d
ddg|di }| }| st|||ddggt	ddd}t
|| |jddg| d}tj||gddgd}tdd	d
ddgi|d}| }| sRt|||gt	ddd}t
|| t ddddgdddgddd	d	d
d
gddddgd}t|}|jd | d}	|	 }tt	d!d t	d"dd#}td$d$gd%dgd&|d}| s
tt	d"d t	d"dd#}
||
}t
|| |jd d'g| d}|d(}td)d*d%d+gt ddddgdddgddd	d
d	d
gd,d d'g}| st||jjd	d
ggd d'g}t
|| d-D ]<}|\}}||}||j|k|j|k@  }t
|| qdd.dd.dd	d	gddddd/d0d1gddd2ddddgd3}t|}t|d tddd}||d4< |jd4d5gd6| d7}|d(}|jd4d5gd| d7}|d( }t
|| d S )8Nr   r@   rL   Tr
   rA   rB   rM   rC   rD   rE   r2   rN   rr   rs   r8   rO   rP   r6   r   rQ   rK   ABCr   r   ABra   rb   rc   rd   )catintsvalr   abr   )rJ   r   r   g      ?      4@)r   r   r   r)         $@g      >@g      D@)r   r   r   ))r   rC   )r@   rD   )r@   rC   )r   rD   rz   2   <   F   err   rs   rt   r   rt   Fas_indexr7   )r   r   r9   r   r   r   r   r0   r!   r~   r   r   r)   r   r   r   	set_indexr   rQ   r   r   r   r   r   linspacereset_index)r7   r   r   r=   r   r   r   r   rB   Zgroups_single_keyr    groups_double_keykeyrA   ir   groupsZgroups2r   r   r   test_observed=  s            
  

      



  
	  

r   c                 C   s   ddddgddddgdddd	gd
}t |}t|d ddddg}d|_|j|dg| d}tj|ddddggddgd}t ddddgddddgd|d}| st||jddddggddg}|	d}t
|| d S )NrE   r2   rF   rC   rD   ra   rv      "   )C1C2C3r   r   r   r   r6   r   g      @g      @g      @r   g      Y@g      i@g      A@)r   r   rK   r)   )r   r   r   rJ   r9   r   r   r!   rQ   r   r   r   )r7   rB   r=   rQ   r   r   r   r   r   r   r   test_observed_codes_remap  s$    $   
r   c                  C   s   t tjjddddtjjddddtjjdddddd} | jtd| d< | jdd	d
gdd}| }|j	j
d  | j kst|j	j
d  | j kst|j	j
d  | j kstd S )Nr      i0u  rw   '  )r   int_idother_idrr   categoryr   r   r   Tr6   rC   rD   )r   r   r   r   r   r   strr9   r$   r    r   r,   r<   r   r   )r=   r   r   r   r   r   test_observed_perf  s    r   c                 C   s   t dddgdddgd}t|dddgd}|jd	| d
}|j}| rftddgddtdgddd}n*tddgddtg ddtdgddd}t|| d S )Nr   rA   r@   r   rC   rD   rE   r   valsr   r6   r   int64dtype)r   rA   r   r@   rA   )r   r   r9   r   r   r   assert_dict_equal)r7   r   r=   r   r   r   r   r   r   test_observed_groups  s    "
r   c                 C   s   t tdtjdgdddgddddgd}|jd	| d
}|j}| rXdtddgddi}n(tddgddtg ddtg ddd}t|| d S )Nr   r@   rB   r   rC   rD   rE   r   r   r6   r   r   r   )r   r@   rB   )	r   r   r   r   r9   r   r   r   r   )r7   r=   r   r   r   r   r   r   test_observed_groups_with_nan  s    

r   c                  C   s   t dtjtjgdddgd} tdddg}t| |d}|jd	d
dd d}t dddgdddgd}tdtjtjg|dd}d	|j_t	
|| d S )Nr   r@   rA   r   rC   rD   rE   )r   serr   Fr6   r   r   r   )r   r   r   r   r   r9   r+   r    rJ   r   r   )r   r   r=   r   r    r   r   r   r   test_observed_nth  s    r   c                 C   s   t tjdtjdgdddgd}tddddg}t||d	}|jd
| d  }| rxtt dgdddgddgd	}n,tt dddgdddgddtjtjgd	}t	|| d S )Nr   r@   rA   r   rC   rD   rE   r2   )s1s2r   r6   )
r   r   r   r   r   r9   r&   r   r   r   )r7   r   r   r=   r   r   r   r   r   #test_dataframe_categorical_with_nan  s    r   r7   ry   c           	      C   s   t ddddddgddddg| d}tddddddg}t||d}|jd||dd	 d
}t|jjdd}t|j}|sd|| < t||ksd|  d| d| d| }dst	|d S )NrB   r   r@   r   r
   )labelr   r   )r7   ry   r   r&   r^   r   zDLabels and aggregation results not consistently sorted
for (ordered=z, observed=z, sort=z
)
Result:
F)
r   r   r   r9   	aggregater    arrayisnar"   r<   )	r   r7   ry   r   r   r=   r   Zaggrmsgr   r   r   0test_dataframe_categorical_ordered_observed_sort  s     	

r   c               	   C   s  t jddd} tjjdddd}tj|| dd}ttjdd}|j	|d	d

 }|j	t|d	d

 }|| }t|j|jdd|_t|| |j	|d	d
}| }|j }||}	||}
|
j	|	d	d
 }t|| t|j|j t|jd|jd tjtdd| dd}t|}t| jd| tddddddddgd }t| jd| d S )Nz
2014-01-01r2   )periodsr   rv   rw   Trq   Fr6   r
   rz   r$   r)   r/   r*   r{   r|   r}   r(   rC   )r   
date_ranger   r   r   r   r   r   r   r9   r)   r   r   r   r    r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r>   r   r   r   r   r   r   r   r   r   r   r   r   r   test_datetimeB  s<    
  



 
r   c                  C   s(  t jd} ddddg}| jdddd	}tj||d
d}tt t dd	ddt
dd}||d< |djddd }|t
d j|jdd }ttjddddg|d
ddd|_t|| |jddd }|t
d j|jdd }ttjddddg|d
ddd|_t|| d S )Ni90  rr   rs   rt   ru   r   r2   rb   rw   Trq   rH   rT   r>   Fr   r6   rC   rD   rE   r]   )r   r   ZRandomStater   r   r   r   r   r   Zreshaper~   r   r9   r0   r   r   r    r   r   )sr   r   r>   r=   r   r   r   r   r   test_categorical_indexi  s(    &  r   c                  C   sz   t ddddgddddgdd} ttjdd| d	}|d
dddgd  }t|	 j
|  t|	 j
j| j d S )Nru   rr   rt   rs   Tr
   rb   r2   rT   rC   rD   rE   rF   )r   r   r   r   r   r9   r   r   r   r   rU   assert_categorical_equalrQ   )r>   r=   r   r   r   r   !test_describe_categorical_columns  s    

r   c                  C   s   t tdddgd tdd d} | d d	| d< | jd
dgddd   }| }tddgddd}t	
|j| t	|jj|j |d |d  }tddgtddgd
dd}t	|| d S )Nra   rO   rP   rF   ZXYXXYrD   )r   mediumartistr  r   r  Fr6   r   r   rJ   r   r2   XYr]   rK   )r   r   r~   r   r9   r$   unstackr   r   r   r   rU   r   rQ   r   r   r   )r=   Zgcatr   Zexp_columnsr   r   r   r   test_unstack_categorical  s    r  c                  C   s^   t tjtjddddddddg
} t|  jd}tjt	dd | 
|  W 5 Q R X d S )NrC   rD   rE   r2   z$Grouper and axis must be same lengthrj   )r   r   r   r   r   dropnarQ   pytestraises
ValueErrorr9   r)   )seriesre   r   r   r   test_bins_unequal_len  s     r  r  r   r2   rE   rC   rD   rO   rP   c                 C   s   | d S r   r   )r   r   r   r   rY     rZ   rY   rR   c                 C   sD   |  ttddd}|t}t|t| d}t|| d S )NZABBAr   r   rK   )r9   r   r~   r   r   keysr   r   )r  r   r9   r   r   r   r   r   test_categorical_series  s    
r  c                     s  t tdddgdddgdddgdddgd	  jd
dgddd } t tddg jjjdddgddgd	d
ddgd}t| |  fdd} jd
|gddd } t tddg jjjdddgddgd	d
ddgd}t| | tdddgd
d} jd
|gddd } t| | d
dg}t tddg jjjdddgddgd	d
ddgd}dD ]8}t	t
d|d _ j|ddd } t| | q`d S )NrC   rD   rE   ra      e   f   g   )r   rO   rP   r   rO   FTr   r      rP   rT   c                    s    j | df S )NrO   )loc)rr=   r   r   rY     rZ   ztest_as_index.<locals>.<lambda>r   r   r@   r]   )Nr  rP   r   )r   r   r9   r0   r   r   r   r   r   r   r~   r    )r   r   r   r   Zgroup_columnsrJ   r   r  r   test_as_index  sP    	r  c                  C   s   t d} tdtt d| ddi}t| | ddd}t|jdddd j| t|jdddd j| tdtt d| ddi}t| | ddd}tt d	t d	ddd}t|jdddd j| t|jdddd j| d S )
Nr   rO   baTr
   r  Frx   bac)	r~   r   r   r   r   r   r9   r&   r    )r   r=   r    r   Znosort_indexr   r   r   test_preserve_categories  s,        r  c               	   C   s   t dddddgdddddgttdtd	d
dttdtd	ddd} t ddtjgddtjgttd	td	d
dttd	td	ddd}dD ]V}| j|d
d
d }| j|dd
d  }|j|j	d}t
|| t
|| qd S )NrC   rD   ra      r      r   Zabaabr  Fr
   T)rO   rP   r   r   r   g      ?g      9@r   )r   r   )byr   r7   rT   )r   r   r~   r   r   r9   r)   r   r   rU   r   r   )r=   Zexp_fullcolZresult1r   r   r   r   r   test_preserve_categorical_dtype  s(    	

r"  zfunc, valuesr&   secondr'   fourththirdr*   r(   c                 C   s   t ddddgdd}tddddg|d	}|d
}t||  }tddgt||jdd	d
}t|| |d
d }t||  }|d }t	|| d S )Nr&   r#  r%  r$  Trq   r   )payloadr!  r'  r   r!  )
r   r   r9   getattrr   r   r   r   r   r   )funcrQ   rA   r=   r   r   r   Zsgbr   r   r   test_preserve_on_ordered_ops1  s    
r*  c                  C   s  t tjd} tdddddddddg	}tj|dddgdd}| j|dd }| j|dd }t	|j
|j|jd	|_
t|| tddddddd
d
d
g	}tj|dddd
gdd}| j|dd }| j|dd |j}t	|j
|j|jd	|_
t|| tdddddddddg	ddddgdd	}tddddddd
ddg	|d} | jddd }|d j}tdddtjg}t|| d S )N	   r   rC   rD   Trq   Fr6   r
   rE   r   r@   rA   rB   r2   rF   rG   )r   r   r   r   r   r   r   r9   r)   r   r    r   r   r   r   r   r   rQ   r   assert_numpy_array_equal)r   r   r>   r   r   r   r   r   test_categorical_no_compressM  s>        
 
r-  c                  C   sd   t d gd tdddgd} | d d }ttg ddgdtg ddd	dd
}t|| d S )NrE   Ztraintestr  rO   rP   r   r^   r   rJ   r   )r   r   r9   r&   r   r   r   r=   r   r   r   r   r    test_groupby_empty_with_categoryr  s    r1  c                  C   s   t dtjdddi} dd tdddD }t||}| jdgdd	} tj| j	tdd
dd|d| d< | j
dgddd  }|t|jdd d }t|j|jjd|_t|| d S )Nvaluer   r   rv   c                 S   s   g | ]}| d |d  qS )z - i  r   ).0r   r   r   r   
<listcomp>  s     ztest_sort.<locals>.<listcomp>i  T)r   Z	ascendingi)  F)rightrp   Zvalue_groupr6   c                 S   s   t |  d S )Nr   )floatsplitrW   r   r   r   rY     rZ   ztest_sort.<locals>.<lambda>)r   r]   )r   r   r   r   r   r   Zsort_valuesr   r   r2  r9   r$   sortedr    r   rJ   r   r   )r=   rp   Z
cat_labelsresr   r   r   r   	test_sort  s    
 
  
r:  c               
   C   s  t dddgdddgdddgdd	d
gdddgdddgdddggdddgd} t| d dd| d< tddddgddd}t ddgddgd	d
gddggddg|d}d}| j|ddd }t|| |}| j|ddd }t|| t| d dd| d< tddddgdd}t ddgddgd	d
gddggddg|d}tddddgddddgdd}t ddgddgd	d
gddgg|ddgd}d}| j|ddd }t|| | j|ddd }t|| d S )Nz	(7.5, 10]ra   rz   rb   z(2.5, 5]rF   rc   z(5, 7.5]r   rd   r2   r   z(0, 2.5]rC   r   rR   r   r   rr   rs   rT   Trq   rI   )rU   r    Frx   r]   r   rJ   r   )r   r   r   r9   r&   r   r   )r=   r    Zexpected_sortr!  result_sortZexpected_nosortresult_nosortr   r   r   
test_sort2  sj    	
    
   

  r>  c                  C   s  t tdddtdddtdddtdddtdddtdddtdddgddddd	ddgdd
dddddgddddgd} t| d dd| d< tdddtdddtdddtdddg}t ddgddgddgddggddgd}t|ddd|_tdddtdddtdddtdddg}t ddgddgddgddggddgd}t||ddd|_d}t|| j|ddd  t|| j|ddd  t| d dd| d< tdddtdddtdddtdddg}t ddgddgddgddggddgd}t|dd|_tdddtdddtdddtdddg}t ddgddgddgddggddgd}t||dd|_d}t|| j|ddd  t|| j|ddd  d S )Ni  rR   rC   rD   rF   ra   rz   r   r2   rb   rc   rd   r   r   r   )dtrr   rs   r?  rr   rs   rT   Trq   rI   )r   rJ   r   Frx   r]   r;  )	r   r   r   r   r    r   r   r9   r&   )r=   r    r<  r=  r!  r   r   r   test_sort_datetimelike  s    






	



 



      



 



   r@  c                  C   s   t tdddgdddgddddgd} tdddgdd	}| jdd
dj }tdddg|dd	}t|| | jdd
djjdd}tdddg|dd	}t|| | jdd
djjdd}tddt	j
g|dd	}t|| | jdd
djjdd}tdt	j
t	j
g|dd	}t|| d S )Nr   r@   rA   r   rC   rD   r  rO   r]   Fr6   rE   r   rP   Z	min_count)r   r   r   r9   rP   r0   r   r   r   r   r   r=   Zexpected_idxr   r   r   r   r   test_empty_sum,  s     "rC  c                  C   s   t tdddgdddgddddgd} tdddgdd	}| jdd
dj }tdddg|dd	}t|| | jdd
djjdd}tdddg|dd	}t|| | jdd
djjdd}tddt	j
g|dd	}t|| d S )Nr   r@   rA   r   rC   rD   r  rO   r]   Fr6   rP   r   rA  )r   r   r   r9   rP   r-   r   r   r   r   r   rB  r   r   r   test_empty_prodH  s    "rD  c                  C   s   t ttdtttjddddd tdd} | dd	g }t	j
td
ddgttjddddgdd	gd}t ddddddddtjdg	i|d}t|| d S )NZ	abcbabcbaz2018-06-01 00Z1TrE   )freqr   r+  )key1key2rQ   rF  rG  r   r@   rA   r   rQ   r   r2   rz   rF   r   rD   rK   )r   r   r~   r   r   r   r   r9   r)   r   r   r   r   r   )r=   r   r   r   r   r   r   ,test_groupby_multiindex_categorical_datetime`  s"    
	$rH  zas_index, expectedr   r   r   r@   r   rX   )r    r   rJ   r   r@   rX   c                 C   sV   t tdddgdddddgdddgd}|jddg| d	d
d  }t|| d S )NrC   rD   r   r   rE   rI  r   r@   Tr   rX   )r   r   r9   r0   r   assert_equal)r   r   r=   r   r   r   r   ,test_groupby_agg_observed_true_single_columny  s
    $rK  r   c                 C   sZ   t ddddgddddgdd}t d dddgddddgdd}|jd| d}t|| d S )	Nr   r@   rA   rB   Fr
   rC   r   )r   shiftr   rJ  )r   ctr   r9  r   r   r   
test_shift  s    
 
 
 
 rN  c                 C   s\   |   dd }|d d|d< |d d|d< tddddg|d	< |jd
gdd}|S )a  
    DataFrame with multiple categorical columns and a column of integers.
    Shortened so as not to contain all possible combinations of categories.
    Useful for testing `observed` kwarg functionality on GroupBy objects.

    Parameters
    ----------
    df: DataFrame
        Non-categorical, longer DataFrame from another fixture, used to derive
        this one

    Returns
    -------
    df_cat: DataFrame
    Nr2   rO   r   rP   rC   rD   rE   r8   r:   rl   )r   r   r   Zdrop)r=   df_catr   r   r   rO    s    rO  	operationr   r;   c                 C   s   t ddddg| d jdd}t ddddg| d jdd}t||g}td	d
ddg|dd}| jddgddd }t||t}t	|| d S )Nrr   rs   rO   r/  onetwothreerP   rC   rE   rD   r2   r8   r   r    rJ   Tr6   )
r   r   r   r   r   r9   r(  r0   r   r   )rO  rP  Zlev_aZlev_br    r   r   r   r   r   r    test_seriesgroupby_observed_true  s    rU  c                 C   s   t jtddgddtdddgddgdd	gd
 \}}tddtjdtjdg|dd}|dkrl|jddd}| jdd	g|dd }t	||t
}t|| d S )Nrs   rr   Frq   rQ  rS  rR  rO   rP   r   rD   r2   rC   rE   r8   rT  r   r   Zinfer)Zdowncastr6   )r   r   r   Z	sortlevelr   r   r   Zfillnar9   r(  r0   r   r   )rO  r7   rP  r    _r   r   r   r   r   r   )test_seriesgroupby_observed_false_or_none  s    rW  zobserved, index, datarr   rs   rO   r/  rQ  rR  rS  rP   rq   c                 C   s>   t ||dd}| jddg|dd dd }t|| d S )Nr8   rT  rO   rP   r6   c                 S   s   |   |  dS )Nr*   r(   rX  rW   r   r   r   rY     rZ   z8test_seriesgroupby_observed_apply_dict.<locals>.<lambda>)r   r9   r;   r   r   )rO  r7   r    r   r   r   r   r   r   &test_seriesgroupby_observed_apply_dict  s
    .rY  c                 C   s<   |  ddgd  }|  ddg d }t|| d S )NrO   rP   r8   )r9   r)   r   r   )rO  r   r   r   r   r   4test_groupby_categorical_series_dataframe_consistent  s    rZ  codec                 C   sr   t ddddgddddgd	d
ddgd}tj| tdd}|j|dd }|jj|dd j}t|| d S )NrC   rD   rE   r2   r   r&  rF   r   rR   rz   r   r   r   rl   r   )	r   r   r   r~   r9   r)   Tr   r   )r[  r=   r   r   r   r   r   r   test_groupby_categorical_axis_1  s
    (r_  z(ignore:.*Select only valid:FutureWarningc                 C   s\   t tddg|dddgdddgd	}| }|jd| d
jt jdd }t|| d S )NZBobZGregrq   rC   rD   )NameItemr`  ra  rT   r6   T)Zskipna)	r   r   r   r9   r   r0   r   r   r   )r7   r   r=   r   r   r   r   r   $test_groupby_cat_preserves_structure)  s     rb  c               	   C   sL   t ddddgtdd} tjtdd | ddd	  W 5 Q R X d S )
Nr   r@   r2   r1   r   z'vau'r  r1   c                 S   s&   t | jd d g| jd d gdS )Nr   r1   Zvaurc  )r   r\   )Zrowsr   r   r   rY   @  s    z/test_get_nonexistent_category.<locals>.<lambda>)r   r   r
  r  KeyErrorr9   r;   r  r   r   r   test_get_nonexistent_category;  s
    
re  c           
      C   s   | dkrt d | dkr4t jjdd}|j| tttdtddttd	d
 tdddgd d}ddgi	| g }|rdnd}|j
ddg|dd }t|| }|| }	t|	|kstd S )Nngroupngroup is not truly a reductionr%   6TODO: implemented SeriesGroupBy.corrwith. See GH 32293reasonAABBABCDr   r   rD   皙?r2   cat_1cat_2r2  r+   r   r  ro  rp  r6   r2  )r
  skipmarkxfailnode
add_markerr   r   r~   getr9   r(  r   r<   )
reduction_funcr7   requestrr  r=   r   Zexpected_lengthseries_groupbyr   r   r   r   r   0test_series_groupby_on_2_categoricals_unobservedF  s&    

rz  c                 C   s4  | dkrt d | dkr4t jjdd}|j| tttdtddttd	d
 tdddgd d}t	dt	dt	dt	dt	dg}ddgi
| g }|jddgddd }t|| }|| }t|  }	|D ].}
|j|
 }t|	rt|s||	kstq|	dkr0| dkr0t|jtjs0td S )Nrf  rg  r%   rh  ri  rk  r   r   r   rD   rm  r2   rn  ZACZBCCAZCBCCr+   r   ro  rp  Fr6   r2  r0   )r
  rq  rr  rs  rt  ru  r   r   r~   tuplerv  r9   r(  -_results_for_groupbys_with_missing_categoriesr  r   r   r<   r   Z
issubdtyper   integer)rw  rx  rr  r=   Z
unobservedr   ry  r   r   Zzero_or_nanr   r   r   r   r   ?test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nansc  s0    
"

"r  c                 C   s   | dkrt d tttdtddttdtddddddgd	}d
dddg}|jddgdd}dg|gd| g }t|| | }|D ]}||jkst	qd S )Nrf  2ngroup does not return the Categories on the indexrk  r   r   111112rm  rn  rO   2rP   r  r8   1r8   r  ro  rp  Tr6   r   r+   r%   )
r
  rq  r   r   r~   r9   rv  r(  r    r<   )rw  r=   unobserved_catsdf_grpr   r9  r   r   r   r   >test_dataframe_groupby_on_2_categoricals_when_observed_is_true  s    

r  c           	      C   s   | dkrt d tttdtddttdtddddddgd	}d
dddg}|jddg|d}dg|gd| g }t|| | }t|  }|t	j
kr|j|    stn|j| |k  std S )Nrf  r  rk  r   r   r  r  rm  rn  r  r  r  r  ro  rp  r6   r   r  )r
  rq  r   r   r~   r9   rv  r(  r~  r   r   r  Zisnullr"   r<   )	rw  r7   rx  r=   r  r  r   r9  r   r   r   r   ?test_dataframe_groupby_on_2_categoricals_when_observed_is_false  s     	


r  c                  C   s   ddddgddddgdd	dd	gd
} t | }t|d tddd}||d< |jddgddd}|d d}|dd }t|| d S )Nra   rz   r2   rC   rb   rc   rd   rB   rA   r   rr   r   rF   r   rt   T)r   ry   r)   )	r   r   r   r   r   r9   r   r   r   )rB   r=   r   r   r   r   r   r   r   3test_series_groupby_categorical_aggregation_getitem  s    $r  zfunc, expected_valuesc              	   C   sv   t dddddgdddddgtdddddgdd}|d| }t d	|itdddgdd
d}t|| d S )Nr   rC   rD   rE   r2   )idr   r2  r  r   r2  r]   rK   )r   r   r   r9   r   r   r   r   )r)  Zexpected_valuesr=   r   r   r   r   r   $test_groupby_agg_categorical_columns  s     r  c                  C   s   t dtdddgdddgdi} t dddgiddgd}| dddgtj}t|| | dddg }t|| d S )	NrO   r   r@   rA   r   rD   rC   rK   )r   r   r9   r   r   r,   r   r   r=   r   r   r   r   r   test_groupby_agg_non_numeric  s     r  r)  c                 C   sl   t dgtdgddj d}|dd }t||  }tdgtdgddd|d jd	}t	|| d S )
Ni  r@   r   r   r  rO   rP   r]   r    rJ   r   )
r   r   r   Z
as_orderedr9   r(  r   r   r   r   )r)  r=   Z
df_groupedr   r   r   r   r   ;test_groupy_first_returned_categorical_instead_of_dataframe  s        r  c                  C   s   t ddg} d| j_tddddgtddddgt| dd}td	d
dgitddgddd}|jddd	 }t
|| d S )NrC   rD   FrE   rF   rR   r   rG   r   r   g      @r@   r]   r   ry   )r   r   flagsZ	writeabler   r   r   r   r9   r)   r   r   )r>   r=   r   r   r   r   r   test_read_only_category_no_sort  s    $
 r  c               
   C   s   t ddddddddgddddddddgd} | d djjd	dddgd
d| d< t ddddddddddddd}|jddd}td	dddgd	dddgd
ddd|_| ddg 	 }t
|| d S )Nsmalllarger  r8   rO   )rr   rs   rr   r   tinyTrq   r   )rO   r8   rC   rE   rD   )r  r  r  r  rs   r    rl   )r   r   rJ   r   )r   r   r   Zset_categoriesZrename_axisr   rU   r9   r.   r  r   r   r  r   r   r   #test_sorted_missing_category_values  sJ    

 

r  c                  C   s   t dddddgi} | d d| d< | dj }tdddgtdddgddd| d jd}t	|| | d
dd	i}| }t|| d S )
NZcol_numrC   rD   rE   r   col_catr]   r  r&   )r   r   r9   r  r&   r   r   r   r   r   r   to_framer   r0  r   r   r   1test_agg_cython_category_not_implemented_fallback@  s    r  c               	   C   s   t ddddgddddgddtjdgddddgddddgd	} | d
di} | ddgdd }tjddgddggdd}t ddgddgddgd|d}t	|| d S )NrC   rD   rm  g?g333333?rr   rs   Zfee)rO   rP   numerical_col
object_colcategorical_colr  r   rO   rP   c                 S   s   |    S rV   )r   r0   r  r   r   r   rY   d  rZ   z7test_aggregate_categorical_with_isnan.<locals>.<lambda>r  r   r   )r  r  r  r   )
r   r   r   r   r9   r   r   r   r   r   r   r   r   r   %test_aggregate_categorical_with_isnanV  s&    




r  c               	   C   s   t ddddddgddddddgd} tjdddgdd	}| d
 || d
< | dd
 t| d< |  }t ddddddgddddddgddddddgd}|d
 ||d
< |d ||d< t	|| d S )NrC   rD   rE   ZWaitingZOnTheWayZ	Delivered)
package_idstatusTr
   r  r  last_status)r  r  r  )
r   r   ZCategoricalDtyper   r9   r   r(   r   r   r   )r=   Zdelivery_status_typer   r   r   r   r   test_categorical_transformq  sN     	r  )r)  r7   c                 C   s   t ddddg}ddddg}t|||d}t ddg}tj||gddgd}tdtjtjdg|ddtdtjtjdg|ddd	}||  }|r| tj	}|j
ddg|d
d }	t|	|  }
t|
| d S Nr   rC   r   r   r@   r   rA   r]   )r&   r'   r6   )r   r   r   r   r   r   NaNr	  r   r   r9   r(  r   r   )r)  r7   r   r   r=   r   r   expected_dictr   Zsrs_grpr   r   r   r   Ftest_series_groupby_first_on_categorical_col_grouped_on_2_categoricals  s    r  c                 C   s   t ddddg}ddddg}t|||d}t ddg}tj||gddgd}tdtjtjdg|ddtdtjtjdg|ddd	}||   }|r| 	tj
}|jddg|d
}	t|	|  }
t|
| d S r  )r   r   r   r   r   r   r  r  r	  r   r   r9   r(  r   r   )r)  r7   r   r   r=   r   r   r  r   r  r   r   r   r   Btest_df_groupby_first_on_categorical_col_grouped_on_2_categoricals  s    r  c                  C   s   t tdddgdddgdtdd} | jddd	}|j}tjd
dgddtjdgddtjg ddd}| | ks~t| D ]}t	
|| ||  qd S )Nr@   r   rA   r   rE   )r   r!  r   Fr  r   rC   Zintpr   rD   )r@   r   rA   )r   r   r   r9   indicesr   r   r  r<   r   r,  )r=   r   r   r   r   r   r   r   2test_groupby_categorical_indices_unused_categories  s    r  c                 C   sp   t ddddgi}|d d|d< t|dd |  }ttdddgdtdddgddd}t|| d S )	Nr   rC   rD   rE   r   r@   r]   )rJ   r    )	r   r   r(  r9   r   r   r   r   r   )r)  r=   r   r   r   r   r   1test_groupby_last_first_preserve_categoricaldtype  s      r  c               	   C   s   t ddgddgddgd} | jdddd} | jd	d
gdd d }tddgttddgd	dtddgd
dgdd}t	|| d S )NrC   rD   ra   r  r   r   rG   r   r   r@   Tr6   rA   r]   r   )
r   r   r9   r,   r   r   r   r   r   r   r0  r   r   r   )test_groupby_categorical_observed_nunique  s    r  )\r   Znumpyr   r
  Zpandasr   r   r   r   r   r   r   r	   Zpandas._testingZ_testingr   r  r!   r~  r?   r   r   r   rr  Zparametrizer   r   r   r   r   r   r   r   r   r   r   r   r  r  r   renamer  r  r  r"  r*  r-  r1  r:  r>  r@  rC  rD  rH  r   rK  r   ZNaTrN  ZfixturerO  rU  rW  r   rY  rZ  r_  filterwarningsrb  re  rz  r  r  r  r  r,   r$   r  r  r  r  r  r  r  r  r   boolr  r  r  r  r  r   r   r   r   <module>   sx  $	! 
%l '
$

8



	
%:[ 




"
",
	
	

+
!

-5  
