python 中pandask的 iterrows、itertuples、iteritems的理解及遍历,用‘列名’和‘索引’方式访问,及速度比拼

一、iterrows 方法遍历 DataFrame

二、itertuples() 方式遍历DataFrame:

三 iteritems 旧方法和新方法方法 遍历 DataFrame

四、速度 比拼 下述代码可单独运行:都是运行30000行或列访问其中一个元素。

for df2index, df2_row in df2.iterrows():

中df2index为索引,从0到len(df2)

在df2_row 这个row中,可以使用df2_row[0]、df2_row[1]、df2_row[2]、df2_row[3]

实际为row中每列的值

后为当前语句运行结果

import pandas as pd

inp = [{'c1': 10, 'c2': 100}, {'c1': 11, 'c2': 110}, {'c1': 12, 'c2': 123}]
df = pd.DataFrame(inp)

t4 = np.arange(12)
print('t4:' + str(t4))
t4:[ 0  1  2  3  4  5  6  7  8  9 10 11]
print('t4.reshape(3,4):' + str(t4.reshape(3, 4)))
t4.reshape(3,4):
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
df2 = pd.DataFrame(t4.reshape(3, 4))
print('type(df):'+str(type(df)))
print(df)
print('type(t4):'+str(type(t4)))
print('t4:'+str(t4))
print('df2:'+df2.to_string())
df2:   0  1   2   3
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11
for df2index, df2_row in df2.iterrows():
    print('df2index:'+str(df2index)+'#len(df2_row):'+str(len(df2_row)))
    print('type(df2_row):'+str(type(df2_row)))
    # type(df2_row): < class 'pandas.core.series.Series'>
    # df2index: 0  # len(df2_row):4
    print('df2_row:'+df2_row.to_string())
    # df2_row: 0    0
    # 1    1
    # 2    2
    # 3    3
    print('df2_row['+str(df2index)+']:'+str(df2_row[df2index]))  #用索引方式访问
    # df2_row[0]: 0
    i = 0
    for i in range(len(df2_row)):
        print('df2_row['str(i)+']:'+str(df2_row[i]))    #用索引方式访问
        # df2_row[i]: 0
        # df2_row[i]: 1
        # df2_row[i]: 2
        # df2_row[i]: 3

完整运行结果:

t4:[ 0  1  2  3  4  5  6  7  8  9 10 11]
t4.reshape(3,4):[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
df2:   0  1   2   3
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11
df2index:0#len(df2_row):4
df2_row:0    0
1    1
2    2
3    3
df2_row[0]:0
df2_row[0]:0
df2_row[1]:1
df2_row[2]:2
df2_row[3]:3
df2index:1#len(df2_row):4
df2_row:0    4
1    5
2    6
3    7
df2_row[1]:5
df2_row[0]:4
df2_row[1]:5
df2_row[2]:6
df2_row[3]:7
df2index:2#len(df2_row):4
df2_row:0     8
1     9
2    10
3    11
df2_row[2]:10
df2_row[0]:8
df2_row[1]:9
df2_row[2]:10
df2_row[3]:11

Process finished with exit code 0

因我没有设置列名,所以用系统默认的列名:下划线加索引,同样可以访问

for df2_itertuples_row in df2.itertuples():
    print('df2index:'+str(df2_itertuples_row)+'#type(df2_itertuples_index):'+str(type(df2_itertuples_row)))
    # df2index: 0  # len(df2_row):4
    print('df2_row:'+str(df2_itertuples_row))
    print(getattr(df2_itertuples_row, '_1'), getattr(df2_itertuples_row, '_2'),getattr(df2_itertuples_row, '_3'),getattr(df2_itertuples_row, '_4'))  #用列名来访问,貌似仅能用列名来访问 # 这个可以作为遍历项目使用。
df2index:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)#type(df2_itertuples_index):
df2_row:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)
0 1 2 3
df2index:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)#type(df2_itertuples_index):
df2_row:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)
4 5 6 7
df2index:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)#type(df2_itertuples_index):
df2_row:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)
8 9 10 11
#三、 iteritems 老的方法 遍历 DataFrame
for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
    print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
    # df2_iteritems_index: 0  # len(df2_iteritems_row):3
    print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
    # type(df2_iteritems_row):
    print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2]))  #用索引的方法访问。
df2_iteritems_row[0]:1#df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
df2_iteritems_index:2#len(df2_iteritems_row):3
type(df2_iteritems_row):
df2_iteritems_row[0]:2#df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
df2_iteritems_index:3#len(df2_iteritems_row):3
type(df2_iteritems_row):
df2_iteritems_row[0]:3#df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
D:\develop\pycharmCode\exericse.py:86: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.  告诉我们这个方法以后不要用了,要用.items() 这个方法
  for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
-----------------------------------------------------------------------------------
#三、 iteritems 新的方法遍历 DataFrame,无报错提示:
for df2_iteritems_index, df2_iteritems_row in df2.items():   #用新的items()方法,无报错提示:
    print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
    # df2_iteritems_index: 0  # len(df2_iteritems_row):3
    print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
    # type(df2_iteritems_row):
    print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2]))  #用索引的方法访问。
    # df2_iteritems_row[0]: 0  # df2_iteritems_row[1]4#str(df2_iteritems_row[2])8
    # df2_iteritems_index: 1  # len(df2_iteritems_row):3
    # type(df2_iteritems_row): < class 'pandas.core.series.Series'>
    # df2_iteritems_row[0]: 1  # df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
    # df2_iteritems_index: 2  # len(df2_iteritems_row):3
    # type(df2_iteritems_row): < class 'pandas.core.series.Series'>
    # df2_iteritems_row[0]: 2  # df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
    # df2_iteritems_index: 3  # len(df2_iteritems_row):3
    # type(df2_iteritems_row): < class 'pandas.core.series.Series'>
    # df2_iteritems_row[0]: 3  # df2_iteritems_row[1]7#str(df2_iteritems_row[2])11

完整代码:

import pandas as pd
import numpy as np

df = pd.DataFrame({'A': ['A1', 'A2', 'A3'],
                 'B': ['B1', 'B2', 'B3'],
                 'C': ['C1', 'C2', 'C3']},
                index=['ONE', 'TWO', 'THREE'])
s = pd.Series(['X2', 'X3', 'X4'], index=['TWO', 'THREE', 'FOUR'], name='X')
#
df.insert(2, 'D', 0) #第一个参数是要加的列的位置,0为列的第一位,2为列的第3位,其实是给整列赋值。
print(df)
#        D   A   B   C
# ONE    0  A1  B1  C1
# TWO    0  A2  B2  C2
# THREE  0  A3  B3  C3
#
# df.insert(1, 'D', 0)
# print(df)
#
df.insert(len(df.columns), 'E', s)
print(df)
#        D   A   B   C    E
# ONE    0  A1  B1  C1  NaN
# TWO    0  A2  B2  C2   X2
# THREE  0  A3  B3  C3   X3
------------------------------
import pandas as pd

inp = [{'c1': 10, 'c2': 100}, {'c1': 11, 'c2': 110}, {'c1': 12, 'c2': 123}]
df = pd.DataFrame(inp)

t4 = np.arange(12)
print('t4:' + str(t4))
t4:[ 0  1  2  3  4  5  6  7  8  9 10 11]
print('t4.reshape(3,4):' + str(t4.reshape(3, 4)))
t4.reshape(3,4):
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
df2 = pd.DataFrame(t4.reshape(3, 4))
print('type(df):'+str(type(df)))
print(df)
print('type(t4):'+str(type(t4)))
print('t4:'+str(t4))
print('df2:'+df2.to_string())
df2:   0  1   2   3
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11
#一用 iterrows 方法遍历 DataFrame
for df2index, df2_row in df2.iterrows():
    print('df2index:'+str(df2index)+'#len(df2_row):'+str(len(df2_row)))
    print('type(df2_row):'+str(type(df2_row)))
    # type(df2_row): < class 'pandas.core.series.Series'>
    # df2index: 0  # len(df2_row):4
    # print('df2_row:'+df2_row.to_string())
    # df2_row: 0    0
    # 1    1
    # 2    2
    # 3    3
    print('df2_row['+str(df2index)+']:'+str(df2_row[df2index]))
    # df2_row[0]: 0
    i = 0
    for i in range(len(df2_row)):
        print('df2_row['+str(i)+']:'+str(df2_row[i]))
        # df2_row[i]: 0
        # df2_row[i]: 1
        # df2_row[i]: 2
        # df2_row[i]: 3

---------------------------------
二、itertuples方法遍历 DataFrame
for df2_itertuples_row in df2.itertuples():
    print('df2index:'+str(df2_itertuples_row)+'#type(df2_itertuples_index):'+str(type(df2_itertuples_row)))
    # df2index: 0  # len(df2_row):4
    print('df2_row:'+str(df2_itertuples_row))
    print(getattr(df2_itertuples_row, '_1'), getattr(df2_itertuples_row, '_2'),getattr(df2_itertuples_row, '_3'),getattr(df2_itertuples_row, '_4'))  #用列名来访问,貌似仅能用列名来访问 # 这个可以作为遍历项目使用。
df2index:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)#type(df2_itertuples_index):
df2_row:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)
0 1 2 3
df2index:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)#type(df2_itertuples_index):
df2_row:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)
4 5 6 7
df2index:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)#type(df2_itertuples_index):
df2_row:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)
8 9 10 11
----------------------------------------------
#三、 iteritems 老的方法 遍历 DataFrame
for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
    print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
    # df2_iteritems_index: 0  # len(df2_iteritems_row):3
    print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
    # type(df2_iteritems_row):
    print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2]))  #用索引的方法访问。
df2_iteritems_row[0]:1#df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
df2_iteritems_index:2#len(df2_iteritems_row):3
type(df2_iteritems_row):
df2_iteritems_row[0]:2#df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
df2_iteritems_index:3#len(df2_iteritems_row):3
type(df2_iteritems_row):
df2_iteritems_row[0]:3#df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
D:\develop\pycharmCode\exericse.py:86: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.  告诉我们这个方法以后不要用了,要用.items 这个方法?
  for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
-----------------------------------------------------------------------------------
#三、 iteritems 新的方法遍历 DataFrame,无报错提示:
for df2_iteritems_index, df2_iteritems_row in df2.items():   #用新的items()方法,无报错提示:
    print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
    # df2_iteritems_index: 0  # len(df2_iteritems_row):3
    print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
    # type(df2_iteritems_row):
    print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2]))  #用索引的方法访问。
    # df2_iteritems_row[0]: 0  # df2_iteritems_row[1]4#str(df2_iteritems_row[2])8
    # df2_iteritems_index: 1  # len(df2_iteritems_row):3
    # type(df2_iteritems_row): < class 'pandas.core.series.Series'>
    # df2_iteritems_row[0]: 1  # df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
    # df2_iteritems_index: 2  # len(df2_iteritems_row):3
    # type(df2_iteritems_row): < class 'pandas.core.series.Series'>
    # df2_iteritems_row[0]: 2  # df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
    # df2_iteritems_index: 3  # len(df2_iteritems_row):3
    # type(df2_iteritems_row): < class 'pandas.core.series.Series'>
    # df2_iteritems_row[0]: 3  # df2_iteritems_row[1]7#str(df2_iteritems_row[2])11

代码:

import pandas as pd
import numpy as np

df = pd.DataFrame({'A': ['A1', 'A2', 'A3'],
                 'B': ['B1', 'B2', 'B3'],
                 'C': ['C1', 'C2', 'C3']},
                index=['ONE', 'TWO', 'THREE'])
s = pd.Series(['X2', 'X3', 'X4'], index=['TWO', 'THREE', 'FOUR'], name='X')
#
df.insert(2, 'D', 0) #第一个参数是要加的列的位置,0为列的第一位,2为列的第3位,其实是给整列赋值。
print(df)
#        D   A   B   C
# ONE    0  A1  B1  C1
# TWO    0  A2  B2  C2
# THREE  0  A3  B3  C3
#
# df.insert(1, 'D', 0)
# print(df)
#
df.insert(len(df.columns), 'E', s)
print(df)
#        D   A   B   C    E
# ONE    0  A1  B1  C1  NaN
# TWO    0  A2  B2  C2   X2
# THREE  0  A3  B3  C3   X3
------------------------------
import pandas as pd
from datetime import datetime
from time import time
inp = [{'c1': 10, 'c2': 100}, {'c1': 11, 'c2': 110}, {'c1': 12, 'c2': 123}]
df = pd.DataFrame(inp)

t4 = np.arange(120000)
print('t4:' + str(t4))
t4:[ 0  1  2  3  4  5  6  7  8  9 10 11]
print('t4.reshape(3,4):' + str(t4.reshape(30000, 4)))
t4.reshape(3,4):
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
df2 = pd.DataFrame(t4.reshape(30000, 4))
print('type(df):'+str(type(df)))
print(df)
print('type(t4):'+str(type(t4)))
print('t4:'+str(t4))
print('df2:'+df2.to_string())
df2:   0  1   2   3
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11
start = time()
一用 iterrows 方法遍历 DataFrame
for df2index, df2_row in df2.iterrows():
    # print('df2index:'+str(df2index)+'#len(df2_row):'+str(len(df2_row)))
    # print('type(df2_row):'+str(type(df2_row)))
    # type(df2_row): < class 'pandas.core.series.Series'>
    # df2index: 0  # len(df2_row):4
    # print('df2_row:'+df2_row.to_string())
    # df2_row: 0    0
    # 1    1
    # 2    2
    # 3    3
    print('df2_row['+str(df2index)+']:'+str(df2_row[df2index]))
    print('df2_row['+str(df2index)+']:'+str(df2_row[0]))
    # df2_row[0]: 0
    i = 0
    for i in range(len(df2_row)):
        print('df2_row['+str(i)+']:'+str(df2_row[i]))
        # df2_row[i]: 0
        # df2_row[i]: 1
        # df2_row[i]: 2
        # df2_row[i]: 3
print('iterrows方法耗时:'+str(time()-start))
---------------------------------
start = time()
二、itertuples方法遍历 DataFrame
for df2_itertuples_row in df2.itertuples():
    # print('df2index:'+str(df2_itertuples_row)+'#type(df2_itertuples_index):'+str(type(df2_itertuples_row)))
    # df2index: 0  # len(df2_row):4
    # print('df2_row:'+str(df2_itertuples_row))
    # print(getattr(df2_itertuples_row, '_1'), getattr(df2_itertuples_row, '_2'),getattr(df2_itertuples_row, '_3'),getattr(df2_itertuples_row, '_4'))  #用列名来访问,貌似仅能用列名来访问 # 这个可以作为遍历项目使用。
    print(getattr(df2_itertuples_row, '_1'))  #用列名来访问,貌似仅能用列名来访问 # 这个可以作为遍历项目使用。
df2index:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)#type(df2_itertuples_index):
df2_row:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)
0 1 2 3
df2index:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)#type(df2_itertuples_index):
df2_row:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)
4 5 6 7
df2index:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)#type(df2_itertuples_index):
df2_row:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)
8 9 10 11

print('iitertuples方法耗时:'+str(time()-start))
----------------------------------------------
#三、 iteritems 老的方法 遍历 DataFrame

for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
    # print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
    # df2_iteritems_index: 0  # len(df2_iteritems_row):3
    # print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
    # type(df2_iteritems_row):
    # print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2]))  #用索引的方法访问。
    # print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0]))  #用索引的方法访问。
df2_iteritems_row[0]:1#df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
df2_iteritems_index:2#len(df2_iteritems_row):3
type(df2_iteritems_row):
df2_iteritems_row[0]:2#df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
df2_iteritems_index:3#len(df2_iteritems_row):3
type(df2_iteritems_row):
df2_iteritems_row[0]:3#df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
D:\develop\pycharmCode\exericse.py:86: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.  告诉我们这个方法以后不要用了,要用.items 这个方法?
  for df2_iteritems_index, df2_iteritems_row in df2.iteritems():

-----------------------------------------------------------------------------------
#三、 iteritems 新的方法遍历 DataFrame,无报错提示:
df2 = pd.DataFrame(t4.reshape(4, 30000))
start = time()
for df2_iteritems_index, df2_iteritems_row in df2.items():   #用新的items()方法,无报错提示:
    # print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
    # df2_iteritems_index: 0  # len(df2_iteritems_row):3
    # print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
    # type(df2_iteritems_row):
    # print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2]))  #用索引的方法访问。
    print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0]) ) #用索引的方法访问。
    # df2_iteritems_row[0]: 0  # df2_iteritems_row[1]4#str(df2_iteritems_row[2])8
    # df2_iteritems_index: 1  # len(df2_iteritems_row):3
    # type(df2_iteritems_row): < class 'pandas.core.series.Series'>
    # df2_iteritems_row[0]: 1  # df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
    # df2_iteritems_index: 2  # len(df2_iteritems_row):3
    # type(df2_iteritems_row): < class 'pandas.core.series.Series'>
    # df2_iteritems_row[0]: 2  # df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
    # df2_iteritems_index: 3  # len(df2_iteritems_row):3
    # type(df2_iteritems_row): < class 'pandas.core.series.Series'>
    # df2_iteritems_row[0]: 3  # df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
print('items方法耗时:'+str(time()-start))

items方法耗时:3.169365882873535

iterrows方法耗时:2.4688100814819336

iitertuples方法耗时:0.17905783653259277 据说这种方法不能更改里面的值

Original: https://blog.csdn.net/handsome1234/article/details/127353385
Author: handsome1234
Title: python 中pandask的 iterrows、itertuples、iteritems的理解及遍历,用‘列名’和‘索引’方式访问,及速度比拼

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/675377/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球