Skip to content

Commit 2ced5af

Browse files
committed
update join to avoid inplace changes.
1 parent 6892d84 commit 2ced5af

File tree

1 file changed

+32
-3
lines changed

1 file changed

+32
-3
lines changed

bigframes/dataframe.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3559,7 +3559,7 @@ def _join_on_key(
35593559
rsuffix: str,
35603560
should_duplicate_on_key: bool,
35613561
) -> DataFrame:
3562-
left, right = self, other
3562+
left, right = self.copy(), other
35633563
# Replace all columns names with unique names for reordering.
35643564
left_col_original_names = left.columns
35653565
on_col_name = "bigframes_left_col_on"
@@ -3638,6 +3638,35 @@ def _add_join_suffix(
36383638
rsuffix: str = "",
36393639
extra_col: typing.Optional[str] = None,
36403640
):
3641+
"""Applies suffixes to overlapping column names to mimic a pandas join.
3642+
3643+
This method identifies columns that are common to both a "left" and "right"
3644+
set of columns and renames them using the provided suffixes. Columns that
3645+
are not in the intersection are kept with their original names.
3646+
3647+
Args:
3648+
left_columns (pandas.Index):
3649+
The column labels from the left DataFrame.
3650+
right_columns (pandas.Index):
3651+
The column labels from the right DataFrame.
3652+
lsuffix (str):
3653+
The suffix to apply to overlapping column names from the left side.
3654+
rsuffix (str):
3655+
The suffix to apply to overlapping column names from the right side.
3656+
extra_col (typing.Optional[str]):
3657+
An optional column name to prepend to the final list of columns.
3658+
This argument is used specifically to match the behavior of a
3659+
pandas join. When a join key (i.e., the 'on' column) exists
3660+
in both the left and right DataFrames, pandas creates two versions
3661+
of that column: one copy keeps its original name and is placed as
3662+
the first column, while the other instances receive the normal
3663+
suffix. Passing the join key's name here replicates that behavior.
3664+
3665+
Returns:
3666+
DataFrame:
3667+
A new DataFrame with the columns renamed to resolve overlaps.
3668+
"""
3669+
combined_df = self.copy()
36413670
col_intersection = left_columns.intersection(right_columns)
36423671
final_col_names = [] if extra_col is None else [extra_col]
36433672
for col_name in left_columns:
@@ -3651,8 +3680,8 @@ def _add_join_suffix(
36513680
final_col_names.append(f"{col_name}{rsuffix}")
36523681
else:
36533682
final_col_names.append(col_name)
3654-
self.columns = pandas.Index(final_col_names)
3655-
return self
3683+
combined_df.columns = pandas.Index(final_col_names)
3684+
return combined_df
36563685

36573686
@validations.requires_ordering()
36583687
def rolling(

0 commit comments

Comments
 (0)