55import copy
66import warnings
77
8+ import string
9+
810import numpy as np
911from pandas .compat import range , lrange , lzip , zip , map , filter
1012import pandas .compat as compat
2830 is_list_like ,
2931 _ensure_int64 ,
3032 _ensure_float64 ,
31- _ensure_object )
33+ _ensure_object ,
34+ _get_dtype )
3235from pandas .types .missing import na_value_for_dtype
3336
3437from pandas .core .generic import NDFrame
@@ -271,8 +274,8 @@ def merge_asof(left, right, on=None,
271274 DataFrame whose 'on' key is less than or equal to the left's key. Both
272275 DataFrames must be sorted by the key.
273276
274- Optionally perform group-wise merge. This searches for the nearest match
275- on the 'on' key within the same group according to 'by '.
277+ Optionally match on equivalent keys with 'by' before searching for nearest
278+ match with 'on'.
276279
277280 .. versionadded:: 0.19.0
278281
@@ -299,16 +302,15 @@ def merge_asof(left, right, on=None,
299302
300303 .. versionadded:: 0.19.2
301304
302- by : column name
303- Group both the left and right DataFrames by the group column; perform
304- the merge operation on these pieces and recombine.
305+ by : column name or list of column names
306+ Match on these columns before performing merge operation.
305307 left_by : column name
306- Field name to group by in the left DataFrame.
308+ Field names to match on in the left DataFrame.
307309
308310 .. versionadded:: 0.19.2
309311
310312 right_by : column name
311- Field name to group by in the right DataFrame.
313+ Field names to match on in the right DataFrame.
312314
313315 .. versionadded:: 0.19.2
314316
@@ -997,27 +999,46 @@ def get_result(self):
997999 return result
9981000
9991001
1000- _asof_functions = {
1001- 'int64_t' : _join .asof_join_int64_t ,
1002- 'double' : _join .asof_join_double ,
1003- }
1002+ def _asof_function (on_type ):
1003+ return getattr (_join , 'asof_join_%s' % on_type , None )
1004+
1005+
1006+ def _asof_by_function (on_type , by_type ):
1007+ return getattr (_join , 'asof_join_%s_by_%s' % (on_type , by_type ), None )
10041008
1005- _asof_by_functions = {
1006- ('int64_t' , 'int64_t' ): _join .asof_join_int64_t_by_int64_t ,
1007- ('double' , 'int64_t' ): _join .asof_join_double_by_int64_t ,
1008- ('int64_t' , 'object' ): _join .asof_join_int64_t_by_object ,
1009- ('double' , 'object' ): _join .asof_join_double_by_object ,
1010- }
10111009
10121010_type_casters = {
10131011 'int64_t' : _ensure_int64 ,
10141012 'double' : _ensure_float64 ,
10151013 'object' : _ensure_object ,
10161014}
10171015
1016+ _cython_types = {
1017+ 'uint8' : 'uint8_t' ,
1018+ 'uint32' : 'uint32_t' ,
1019+ 'uint16' : 'uint16_t' ,
1020+ 'uint64' : 'uint64_t' ,
1021+ 'int8' : 'int8_t' ,
1022+ 'int32' : 'int32_t' ,
1023+ 'int16' : 'int16_t' ,
1024+ 'int64' : 'int64_t' ,
1025+ 'float16' : 'error' ,
1026+ 'float32' : 'float' ,
1027+ 'float64' : 'double' ,
1028+ }
1029+
10181030
10191031def _get_cython_type (dtype ):
1020- """ Given a dtype, return 'int64_t', 'double', or 'object' """
1032+ """ Given a dtype, return a C name like 'int64_t' or 'double' """
1033+ type_name = _get_dtype (dtype ).name
1034+ ctype = _cython_types .get (type_name , 'object' )
1035+ if ctype == 'error' :
1036+ raise MergeError ('unsupported type: ' + type_name )
1037+ return ctype
1038+
1039+
1040+ def _get_cython_type_upcast (dtype ):
1041+ """ Upcast a dtype to 'int64_t', 'double', or 'object' """
10211042 if is_integer_dtype (dtype ):
10221043 return 'int64_t'
10231044 elif is_float_dtype (dtype ):
@@ -1084,11 +1105,6 @@ def _validate_specification(self):
10841105 if not is_list_like (self .right_by ):
10851106 self .right_by = [self .right_by ]
10861107
1087- if len (self .left_by ) != 1 :
1088- raise MergeError ("can only asof by a single key" )
1089- if len (self .right_by ) != 1 :
1090- raise MergeError ("can only asof by a single key" )
1091-
10921108 self .left_on = self .left_by + list (self .left_on )
10931109 self .right_on = self .right_by + list (self .right_on )
10941110
@@ -1142,6 +1158,13 @@ def _get_merge_keys(self):
11421158 def _get_join_indexers (self ):
11431159 """ return the join indexers """
11441160
1161+ def flip (xs ):
1162+ """ unlike np.transpose, this returns an array of tuples """
1163+ labels = list (string .ascii_lowercase [:len (xs )])
1164+ dtypes = [x .dtype for x in xs ]
1165+ labeled_dtypes = list (zip (labels , dtypes ))
1166+ return np .array (lzip (* xs ), labeled_dtypes )
1167+
11451168 # values to compare
11461169 left_values = (self .left .index .values if self .left_index else
11471170 self .left_join_keys [- 1 ])
@@ -1165,22 +1188,23 @@ def _get_join_indexers(self):
11651188
11661189 # a "by" parameter requires special handling
11671190 if self .left_by is not None :
1168- left_by_values = self .left_join_keys [0 ]
1169- right_by_values = self .right_join_keys [0 ]
1170-
1171- # choose appropriate function by type
1172- on_type = _get_cython_type (left_values .dtype )
1173- by_type = _get_cython_type (left_by_values .dtype )
1191+ if len (self .left_join_keys ) > 2 :
1192+ # get tuple representation of values if more than one
1193+ left_by_values = flip (self .left_join_keys [0 :- 1 ])
1194+ right_by_values = flip (self .right_join_keys [0 :- 1 ])
1195+ else :
1196+ left_by_values = self .left_join_keys [0 ]
1197+ right_by_values = self .right_join_keys [0 ]
11741198
1175- on_type_caster = _type_casters [on_type ]
1199+ # upcast 'by' parameter because HashTable is limited
1200+ by_type = _get_cython_type_upcast (left_by_values .dtype )
11761201 by_type_caster = _type_casters [by_type ]
1177- func = _asof_by_functions [(on_type , by_type )]
1178-
1179- left_values = on_type_caster (left_values )
1180- right_values = on_type_caster (right_values )
11811202 left_by_values = by_type_caster (left_by_values )
11821203 right_by_values = by_type_caster (right_by_values )
11831204
1205+ # choose appropriate function by type
1206+ on_type = _get_cython_type (left_values .dtype )
1207+ func = _asof_by_function (on_type , by_type )
11841208 return func (left_values ,
11851209 right_values ,
11861210 left_by_values ,
@@ -1190,12 +1214,7 @@ def _get_join_indexers(self):
11901214 else :
11911215 # choose appropriate function by type
11921216 on_type = _get_cython_type (left_values .dtype )
1193- type_caster = _type_casters [on_type ]
1194- func = _asof_functions [on_type ]
1195-
1196- left_values = type_caster (left_values )
1197- right_values = type_caster (right_values )
1198-
1217+ func = _asof_function (on_type )
11991218 return func (left_values ,
12001219 right_values ,
12011220 self .allow_exact_matches ,
0 commit comments