合併DataFrame - 1/2¶
In [1]:
Copied!
import numpy as np
import pandas as pd
import numpy as np
import pandas as pd
In [2]:
Copied!
df1 = pd.DataFrame([['a', 1], ['b', 2]], columns=['col1', 'col2'], index=['row1', 'row2'])
df1 = pd.DataFrame([['a', 1], ['b', 2]], columns=['col1', 'col2'], index=['row1', 'row2'])
In [4]:
Copied!
df2 = pd.DataFrame([['c', 3], ['d', 4]], columns=['col1', 'col2'], index=['row1', 'row2'])
df2 = pd.DataFrame([['c', 3], ['d', 4]], columns=['col1', 'col2'], index=['row1', 'row2'])
In [5]:
Copied!
df3 = pd.DataFrame([['e', 1, 100], ['f', 2, 200]], columns=['col1', 'col2', 'col3'], index=['row1', 'row2'])
df3 = pd.DataFrame([['e', 1, 100], ['f', 2, 200]], columns=['col1', 'col2', 'col3'], index=['row1', 'row2'])
In [14]:
Copied!
df4 = pd.DataFrame([['g', 3, 300], ['h', 4, 400]], columns=['col1', 'col2', 'col3'])
df4 = pd.DataFrame([['g', 3, 300], ['h', 4, 400]], columns=['col1', 'col2', 'col3'])
In [3]:
Copied!
df1
df1
Out[3]:
col1 | col2 | |
---|---|---|
row1 | a | 1 |
row2 | b | 2 |
In [6]:
Copied!
df2
df2
Out[6]:
col1 | col2 | |
---|---|---|
row1 | c | 3 |
row2 | d | 4 |
In [7]:
Copied!
df3
df3
Out[7]:
col1 | col2 | col3 | |
---|---|---|---|
row1 | e | 1 | 100 |
row2 | f | 2 | 200 |
In [16]:
Copied!
df4
df4
Out[16]:
col1 | col2 | col3 | |
---|---|---|---|
0 | g | 3 | 300 |
1 | h | 4 | 400 |
.concat()¶
沿著特定軸向合併DataFrame。
In [8]:
Copied!
pd.concat([df1, df2])
pd.concat([df1, df2])
Out[8]:
col1 | col2 | |
---|---|---|
row1 | a | 1 |
row2 | b | 2 |
row1 | c | 3 |
row2 | d | 4 |
In [9]:
Copied!
pd.concat([df1, df2], ignore_index=True)
pd.concat([df1, df2], ignore_index=True)
Out[9]:
col1 | col2 | |
---|---|---|
0 | a | 1 |
1 | b | 2 |
2 | c | 3 |
3 | d | 4 |
In [10]:
Copied!
pd.concat([df1, df3])
pd.concat([df1, df3])
Out[10]:
col1 | col2 | col3 | |
---|---|---|---|
row1 | a | 1 | NaN |
row2 | b | 2 | NaN |
row1 | e | 1 | 100.0 |
row2 | f | 2 | 200.0 |
In [11]:
Copied!
pd.concat([df1, df3], join="inner")
pd.concat([df1, df3], join="inner")
Out[11]:
col1 | col2 | |
---|---|---|
row1 | a | 1 |
row2 | b | 2 |
row1 | e | 1 |
row2 | f | 2 |
In [13]:
Copied!
pd.concat([df1, df2], axis=1)
pd.concat([df1, df2], axis=1)
Out[13]:
col1 | col2 | col1 | col2 | |
---|---|---|---|---|
row1 | a | 1 | c | 3 |
row2 | b | 2 | d | 4 |
In [15]:
Copied!
pd.concat([df1, df4], axis=1)
pd.concat([df1, df4], axis=1)
Out[15]:
col1 | col2 | col1 | col2 | col3 | |
---|---|---|---|---|---|
row1 | a | 1.0 | NaN | NaN | NaN |
row2 | b | 2.0 | NaN | NaN | NaN |
0 | NaN | NaN | g | 3.0 | 300.0 |
1 | NaN | NaN | h | 4.0 | 400.0 |
.append()¶
將一個DataFrame附加到另一個之後。
.append()之後會被汰換掉,之後用.concat()就好了,不要再用.append()。
In [17]:
Copied!
df1.append(df2)
df1.append(df2)
<ipython-input-17-8ab0723181fb>:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df1.append(df2)
Out[17]:
col1 | col2 | |
---|---|---|
row1 | a | 1 |
row2 | b | 2 |
row1 | c | 3 |
row2 | d | 4 |
In [21]:
Copied!
df1.append(df2, verify_integrity=True)
df1.append(df2, verify_integrity=True)
<ipython-input-21-f4f92554bbfd>:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df1.append(df2, verify_integrity=True)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-21-f4f92554bbfd> in <cell line: 1>() ----> 1 df1.append(df2, verify_integrity=True) /usr/local/lib/python3.10/dist-packages/pandas/core/frame.py in append(self, other, ignore_index, verify_integrity, sort) 9766 ) 9767 -> 9768 return self._append(other, ignore_index, verify_integrity, sort) 9769 9770 def _append( /usr/local/lib/python3.10/dist-packages/pandas/core/frame.py in _append(self, other, ignore_index, verify_integrity, sort) 9806 to_concat = [self, other] 9807 -> 9808 result = concat( 9809 to_concat, 9810 ignore_index=ignore_index, /usr/local/lib/python3.10/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs) 329 stacklevel=find_stack_level(), 330 ) --> 331 return func(*args, **kwargs) 332 333 # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no /usr/local/lib/python3.10/dist-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy) 366 1 3 4 367 """ --> 368 op = _Concatenator( 369 objs, 370 axis=axis, /usr/local/lib/python3.10/dist-packages/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort) 561 self.copy = copy 562 --> 563 self.new_axes = self._get_new_axes() 564 565 def get_result(self): /usr/local/lib/python3.10/dist-packages/pandas/core/reshape/concat.py in _get_new_axes(self) 631 def _get_new_axes(self) -> list[Index]: 632 ndim = self._get_result_dim() --> 633 return [ 634 self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i) 635 for i in range(ndim) /usr/local/lib/python3.10/dist-packages/pandas/core/reshape/concat.py in <listcomp>(.0) 632 ndim = self._get_result_dim() 633 return [ --> 634 self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i) 635 for i in range(ndim) 636 ] /usr/local/lib/python3.10/dist-packages/pandas/_libs/properties.pyx in pandas._libs.properties.CachedProperty.__get__() /usr/local/lib/python3.10/dist-packages/pandas/core/reshape/concat.py in _get_concat_axis(self) 695 ) 696 --> 697 self._maybe_check_integrity(concat_axis) 698 699 return concat_axis /usr/local/lib/python3.10/dist-packages/pandas/core/reshape/concat.py in _maybe_check_integrity(self, concat_index) 703 if not concat_index.is_unique: 704 overlap = concat_index[concat_index.duplicated()].unique() --> 705 raise ValueError(f"Indexes have overlapping values: {overlap}") 706 707 ValueError: Indexes have overlapping values: Index(['row1', 'row2'], dtype='object')
In [18]:
Copied!
df1.append(df2, ignore_index=True)
df1.append(df2, ignore_index=True)
<ipython-input-18-992fb3fde170>:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df1.append(df2, ignore_index=True)
Out[18]:
col1 | col2 | |
---|---|---|
0 | a | 1 |
1 | b | 2 |
2 | c | 3 |
3 | d | 4 |
In [19]:
Copied!
df1.append(df3)
df1.append(df3)
<ipython-input-19-8d783e85260f>:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df1.append(df3)
Out[19]:
col1 | col2 | col3 | |
---|---|---|---|
row1 | a | 1 | NaN |
row2 | b | 2 | NaN |
row1 | e | 1 | 100.0 |
row2 | f | 2 | 200.0 |
In [20]:
Copied!
df1.append(df4)
df1.append(df4)
<ipython-input-20-2eeefe8b6e74>:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df1.append(df4)
Out[20]:
col1 | col2 | col3 | |
---|---|---|---|
row1 | a | 1 | NaN |
row2 | b | 2 | NaN |
0 | g | 3 | 300.0 |
1 | h | 4 | 400.0 |
.join()¶
會根據index把DataFrame合併在一起。
In [22]:
Copied!
df1
df1
Out[22]:
col1 | col2 | |
---|---|---|
row1 | a | 1 |
row2 | b | 2 |
In [23]:
Copied!
df2
df2
Out[23]:
col1 | col2 | |
---|---|---|
row1 | c | 3 |
row2 | d | 4 |
In [24]:
Copied!
df1.join(df2)
df1.join(df2)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-24-f8069890b6d0> in <cell line: 1>() ----> 1 df1.join(df2) /usr/local/lib/python3.10/dist-packages/pandas/core/frame.py in join(self, other, on, how, lsuffix, rsuffix, sort, validate) 9977 5 K1 A5 B1 9978 """ -> 9979 return self._join_compat( 9980 other, 9981 on=on, /usr/local/lib/python3.10/dist-packages/pandas/core/frame.py in _join_compat(self, other, on, how, lsuffix, rsuffix, sort, validate) 10016 validate=validate, 10017 ) > 10018 return merge( 10019 self, 10020 other, /usr/local/lib/python3.10/dist-packages/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate) 122 validate=validate, 123 ) --> 124 return op.get_result(copy=copy) 125 126 /usr/local/lib/python3.10/dist-packages/pandas/core/reshape/merge.py in get_result(self, copy) 773 join_index, left_indexer, right_indexer = self._get_join_info() 774 --> 775 result = self._reindex_and_concat( 776 join_index, left_indexer, right_indexer, copy=copy 777 ) /usr/local/lib/python3.10/dist-packages/pandas/core/reshape/merge.py in _reindex_and_concat(self, join_index, left_indexer, right_indexer, copy) 727 right = self.right[:] 728 --> 729 llabels, rlabels = _items_overlap_with_suffix( 730 self.left._info_axis, self.right._info_axis, self.suffixes 731 ) /usr/local/lib/python3.10/dist-packages/pandas/core/reshape/merge.py in _items_overlap_with_suffix(left, right, suffixes) 2456 2457 if not lsuffix and not rsuffix: -> 2458 raise ValueError(f"columns overlap but no suffix specified: {to_rename}") 2459 2460 def renamer(x, suffix): ValueError: columns overlap but no suffix specified: Index(['col1', 'col2'], dtype='object')
In [25]:
Copied!
df1.join(df2, lsuffix='_df1', rsuffix='_df2')
df1.join(df2, lsuffix='_df1', rsuffix='_df2')
Out[25]:
col1_df1 | col2_df1 | col1_df2 | col2_df2 | |
---|---|---|---|---|
row1 | a | 1 | c | 3 |
row2 | b | 2 | d | 4 |
In [26]:
Copied!
df2.columns = ['col3', 'col4']
df2.columns = ['col3', 'col4']
In [27]:
Copied!
df1.join(df2)
df1.join(df2)
Out[27]:
col1 | col2 | col3 | col4 | |
---|---|---|---|---|
row1 | a | 1 | c | 3 |
row2 | b | 2 | d | 4 |