@@ -622,6 +622,59 @@ def test_dataframe_groupby_value_counts(
622622 pd .testing .assert_frame_equal (pd_result , bf_result , check_dtype = False )
623623
624624
625+ @pytest .mark .parametrize (
626+ ("numeric_only" , "min_count" ),
627+ [
628+ (False , 4 ),
629+ (True , 0 ),
630+ ],
631+ )
632+ def test_dataframe_groupby_first (
633+ scalars_df_index , scalars_pandas_df_index , numeric_only , min_count
634+ ):
635+ # min_count seems to not work properly on older pandas
636+ pytest .importorskip ("pandas" , minversion = "2.0.0" )
637+ # bytes, dates not handling min_count properly in pandas
638+ bf_result = (
639+ scalars_df_index .drop (columns = ["bytes_col" , "date_col" ])
640+ .groupby (scalars_df_index .int64_col % 2 )
641+ .first (numeric_only = numeric_only , min_count = min_count )
642+ ).to_pandas ()
643+ pd_result = (
644+ scalars_pandas_df_index .drop (columns = ["bytes_col" , "date_col" ])
645+ .groupby (scalars_pandas_df_index .int64_col % 2 )
646+ .first (numeric_only = numeric_only , min_count = min_count )
647+ )
648+ pd .testing .assert_frame_equal (
649+ pd_result ,
650+ bf_result ,
651+ )
652+
653+
654+ @pytest .mark .parametrize (
655+ ("numeric_only" , "min_count" ),
656+ [
657+ (True , 2 ),
658+ (False , - 1 ),
659+ ],
660+ )
661+ def test_dataframe_groupby_last (
662+ scalars_df_index , scalars_pandas_df_index , numeric_only , min_count
663+ ):
664+ bf_result = (
665+ scalars_df_index .groupby (scalars_df_index .int64_col % 2 ).last (
666+ numeric_only = numeric_only , min_count = min_count
667+ )
668+ ).to_pandas ()
669+ pd_result = scalars_pandas_df_index .groupby (
670+ scalars_pandas_df_index .int64_col % 2
671+ ).last (numeric_only = numeric_only , min_count = min_count )
672+ pd .testing .assert_frame_equal (
673+ pd_result ,
674+ bf_result ,
675+ )
676+
677+
625678# ==============
626679# Series.groupby
627680# ==============
@@ -841,3 +894,48 @@ def test_series_groupby_value_counts(
841894 normalize = normalize , ascending = ascending , dropna = dropna
842895 )
843896 pd .testing .assert_series_equal (pd_result , bf_result , check_dtype = False )
897+
898+
899+ @pytest .mark .parametrize (
900+ ("numeric_only" , "min_count" ),
901+ [
902+ (True , 2 ),
903+ (False , - 1 ),
904+ ],
905+ )
906+ def test_series_groupby_first (
907+ scalars_df_index , scalars_pandas_df_index , numeric_only , min_count
908+ ):
909+ bf_result = (
910+ scalars_df_index .groupby ("string_col" )["int64_col" ].first (
911+ numeric_only = numeric_only , min_count = min_count
912+ )
913+ ).to_pandas ()
914+ pd_result = scalars_pandas_df_index .groupby ("string_col" )["int64_col" ].first (
915+ numeric_only = numeric_only , min_count = min_count
916+ )
917+ pd .testing .assert_series_equal (
918+ pd_result ,
919+ bf_result ,
920+ )
921+
922+
923+ @pytest .mark .parametrize (
924+ ("numeric_only" , "min_count" ),
925+ [
926+ (False , 4 ),
927+ (True , 0 ),
928+ ],
929+ )
930+ def test_series_groupby_last (
931+ scalars_df_index , scalars_pandas_df_index , numeric_only , min_count
932+ ):
933+ bf_result = (
934+ scalars_df_index .groupby ("string_col" )["int64_col" ].last (
935+ numeric_only = numeric_only , min_count = min_count
936+ )
937+ ).to_pandas ()
938+ pd_result = scalars_pandas_df_index .groupby ("string_col" )["int64_col" ].last (
939+ numeric_only = numeric_only , min_count = min_count
940+ )
941+ pd .testing .assert_series_equal (pd_result , bf_result )
0 commit comments