三 iteritems 旧方法和新方法方法 遍历 DataFrame
四、速度 比拼 下述代码可单独运行:都是运行30000行或列访问其中一个元素。
for df2index, df2_row in df2.iterrows():
中df2index为索引,从0到len(df2)
在df2_row 这个row中,可以使用df2_row[0]、df2_row[1]、df2_row[2]、df2_row[3]
实际为row中每列的值
后为当前语句运行结果
import pandas as pd
inp = [{'c1': 10, 'c2': 100}, {'c1': 11, 'c2': 110}, {'c1': 12, 'c2': 123}]
df = pd.DataFrame(inp)
t4 = np.arange(12)
print('t4:' + str(t4))
t4:[ 0 1 2 3 4 5 6 7 8 9 10 11]
print('t4.reshape(3,4):' + str(t4.reshape(3, 4)))
t4.reshape(3,4):
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
df2 = pd.DataFrame(t4.reshape(3, 4))
print('type(df):'+str(type(df)))
print(df)
print('type(t4):'+str(type(t4)))
print('t4:'+str(t4))
print('df2:'+df2.to_string())
df2: 0 1 2 3
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
for df2index, df2_row in df2.iterrows():
print('df2index:'+str(df2index)+'#len(df2_row):'+str(len(df2_row)))
print('type(df2_row):'+str(type(df2_row)))
# type(df2_row): < class 'pandas.core.series.Series'>
# df2index: 0 # len(df2_row):4
print('df2_row:'+df2_row.to_string())
# df2_row: 0 0
# 1 1
# 2 2
# 3 3
print('df2_row['+str(df2index)+']:'+str(df2_row[df2index])) #用索引方式访问
# df2_row[0]: 0
i = 0
for i in range(len(df2_row)):
print('df2_row['str(i)+']:'+str(df2_row[i])) #用索引方式访问
# df2_row[i]: 0
# df2_row[i]: 1
# df2_row[i]: 2
# df2_row[i]: 3
完整运行结果:
t4:[ 0 1 2 3 4 5 6 7 8 9 10 11]
t4.reshape(3,4):[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
df2: 0 1 2 3
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
df2index:0#len(df2_row):4
df2_row:0 0
1 1
2 2
3 3
df2_row[0]:0
df2_row[0]:0
df2_row[1]:1
df2_row[2]:2
df2_row[3]:3
df2index:1#len(df2_row):4
df2_row:0 4
1 5
2 6
3 7
df2_row[1]:5
df2_row[0]:4
df2_row[1]:5
df2_row[2]:6
df2_row[3]:7
df2index:2#len(df2_row):4
df2_row:0 8
1 9
2 10
3 11
df2_row[2]:10
df2_row[0]:8
df2_row[1]:9
df2_row[2]:10
df2_row[3]:11
Process finished with exit code 0
因我没有设置列名,所以用系统默认的列名:下划线加索引,同样可以访问
for df2_itertuples_row in df2.itertuples():
print('df2index:'+str(df2_itertuples_row)+'#type(df2_itertuples_index):'+str(type(df2_itertuples_row)))
# df2index: 0 # len(df2_row):4
print('df2_row:'+str(df2_itertuples_row))
print(getattr(df2_itertuples_row, '_1'), getattr(df2_itertuples_row, '_2'),getattr(df2_itertuples_row, '_3'),getattr(df2_itertuples_row, '_4')) #用列名来访问,貌似仅能用列名来访问 # 这个可以作为遍历项目使用。
df2index:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)#type(df2_itertuples_index):
df2_row:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)
0 1 2 3
df2index:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)#type(df2_itertuples_index):
df2_row:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)
4 5 6 7
df2index:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)#type(df2_itertuples_index):
df2_row:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)
8 9 10 11
#三、 iteritems 老的方法 遍历 DataFrame
for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
# df2_iteritems_index: 0 # len(df2_iteritems_row):3
print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
# type(df2_iteritems_row):
print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2])) #用索引的方法访问。
df2_iteritems_row[0]:1#df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
df2_iteritems_index:2#len(df2_iteritems_row):3
type(df2_iteritems_row):
df2_iteritems_row[0]:2#df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
df2_iteritems_index:3#len(df2_iteritems_row):3
type(df2_iteritems_row):
df2_iteritems_row[0]:3#df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
D:\develop\pycharmCode\exericse.py:86: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead. 告诉我们这个方法以后不要用了,要用.items() 这个方法
for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
-----------------------------------------------------------------------------------
#三、 iteritems 新的方法遍历 DataFrame,无报错提示:
for df2_iteritems_index, df2_iteritems_row in df2.items(): #用新的items()方法,无报错提示:
print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
# df2_iteritems_index: 0 # len(df2_iteritems_row):3
print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
# type(df2_iteritems_row):
print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2])) #用索引的方法访问。
# df2_iteritems_row[0]: 0 # df2_iteritems_row[1]4#str(df2_iteritems_row[2])8
# df2_iteritems_index: 1 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 1 # df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
# df2_iteritems_index: 2 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 2 # df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
# df2_iteritems_index: 3 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 3 # df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
完整代码:
import pandas as pd
import numpy as np
df = pd.DataFrame({'A': ['A1', 'A2', 'A3'],
'B': ['B1', 'B2', 'B3'],
'C': ['C1', 'C2', 'C3']},
index=['ONE', 'TWO', 'THREE'])
s = pd.Series(['X2', 'X3', 'X4'], index=['TWO', 'THREE', 'FOUR'], name='X')
#
df.insert(2, 'D', 0) #第一个参数是要加的列的位置,0为列的第一位,2为列的第3位,其实是给整列赋值。
print(df)
# D A B C
# ONE 0 A1 B1 C1
# TWO 0 A2 B2 C2
# THREE 0 A3 B3 C3
#
# df.insert(1, 'D', 0)
# print(df)
#
df.insert(len(df.columns), 'E', s)
print(df)
# D A B C E
# ONE 0 A1 B1 C1 NaN
# TWO 0 A2 B2 C2 X2
# THREE 0 A3 B3 C3 X3
------------------------------
import pandas as pd
inp = [{'c1': 10, 'c2': 100}, {'c1': 11, 'c2': 110}, {'c1': 12, 'c2': 123}]
df = pd.DataFrame(inp)
t4 = np.arange(12)
print('t4:' + str(t4))
t4:[ 0 1 2 3 4 5 6 7 8 9 10 11]
print('t4.reshape(3,4):' + str(t4.reshape(3, 4)))
t4.reshape(3,4):
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
df2 = pd.DataFrame(t4.reshape(3, 4))
print('type(df):'+str(type(df)))
print(df)
print('type(t4):'+str(type(t4)))
print('t4:'+str(t4))
print('df2:'+df2.to_string())
df2: 0 1 2 3
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
#一用 iterrows 方法遍历 DataFrame
for df2index, df2_row in df2.iterrows():
print('df2index:'+str(df2index)+'#len(df2_row):'+str(len(df2_row)))
print('type(df2_row):'+str(type(df2_row)))
# type(df2_row): < class 'pandas.core.series.Series'>
# df2index: 0 # len(df2_row):4
# print('df2_row:'+df2_row.to_string())
# df2_row: 0 0
# 1 1
# 2 2
# 3 3
print('df2_row['+str(df2index)+']:'+str(df2_row[df2index]))
# df2_row[0]: 0
i = 0
for i in range(len(df2_row)):
print('df2_row['+str(i)+']:'+str(df2_row[i]))
# df2_row[i]: 0
# df2_row[i]: 1
# df2_row[i]: 2
# df2_row[i]: 3
---------------------------------
二、itertuples方法遍历 DataFrame
for df2_itertuples_row in df2.itertuples():
print('df2index:'+str(df2_itertuples_row)+'#type(df2_itertuples_index):'+str(type(df2_itertuples_row)))
# df2index: 0 # len(df2_row):4
print('df2_row:'+str(df2_itertuples_row))
print(getattr(df2_itertuples_row, '_1'), getattr(df2_itertuples_row, '_2'),getattr(df2_itertuples_row, '_3'),getattr(df2_itertuples_row, '_4')) #用列名来访问,貌似仅能用列名来访问 # 这个可以作为遍历项目使用。
df2index:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)#type(df2_itertuples_index):
df2_row:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)
0 1 2 3
df2index:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)#type(df2_itertuples_index):
df2_row:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)
4 5 6 7
df2index:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)#type(df2_itertuples_index):
df2_row:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)
8 9 10 11
----------------------------------------------
#三、 iteritems 老的方法 遍历 DataFrame
for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
# df2_iteritems_index: 0 # len(df2_iteritems_row):3
print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
# type(df2_iteritems_row):
print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2])) #用索引的方法访问。
df2_iteritems_row[0]:1#df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
df2_iteritems_index:2#len(df2_iteritems_row):3
type(df2_iteritems_row):
df2_iteritems_row[0]:2#df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
df2_iteritems_index:3#len(df2_iteritems_row):3
type(df2_iteritems_row):
df2_iteritems_row[0]:3#df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
D:\develop\pycharmCode\exericse.py:86: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead. 告诉我们这个方法以后不要用了,要用.items 这个方法?
for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
-----------------------------------------------------------------------------------
#三、 iteritems 新的方法遍历 DataFrame,无报错提示:
for df2_iteritems_index, df2_iteritems_row in df2.items(): #用新的items()方法,无报错提示:
print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
# df2_iteritems_index: 0 # len(df2_iteritems_row):3
print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
# type(df2_iteritems_row):
print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2])) #用索引的方法访问。
# df2_iteritems_row[0]: 0 # df2_iteritems_row[1]4#str(df2_iteritems_row[2])8
# df2_iteritems_index: 1 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 1 # df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
# df2_iteritems_index: 2 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 2 # df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
# df2_iteritems_index: 3 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 3 # df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
代码:
import pandas as pd
import numpy as np
df = pd.DataFrame({'A': ['A1', 'A2', 'A3'],
'B': ['B1', 'B2', 'B3'],
'C': ['C1', 'C2', 'C3']},
index=['ONE', 'TWO', 'THREE'])
s = pd.Series(['X2', 'X3', 'X4'], index=['TWO', 'THREE', 'FOUR'], name='X')
#
df.insert(2, 'D', 0) #第一个参数是要加的列的位置,0为列的第一位,2为列的第3位,其实是给整列赋值。
print(df)
# D A B C
# ONE 0 A1 B1 C1
# TWO 0 A2 B2 C2
# THREE 0 A3 B3 C3
#
# df.insert(1, 'D', 0)
# print(df)
#
df.insert(len(df.columns), 'E', s)
print(df)
# D A B C E
# ONE 0 A1 B1 C1 NaN
# TWO 0 A2 B2 C2 X2
# THREE 0 A3 B3 C3 X3
------------------------------
import pandas as pd
from datetime import datetime
from time import time
inp = [{'c1': 10, 'c2': 100}, {'c1': 11, 'c2': 110}, {'c1': 12, 'c2': 123}]
df = pd.DataFrame(inp)
t4 = np.arange(120000)
print('t4:' + str(t4))
t4:[ 0 1 2 3 4 5 6 7 8 9 10 11]
print('t4.reshape(3,4):' + str(t4.reshape(30000, 4)))
t4.reshape(3,4):
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
df2 = pd.DataFrame(t4.reshape(30000, 4))
print('type(df):'+str(type(df)))
print(df)
print('type(t4):'+str(type(t4)))
print('t4:'+str(t4))
print('df2:'+df2.to_string())
df2: 0 1 2 3
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
start = time()
一用 iterrows 方法遍历 DataFrame
for df2index, df2_row in df2.iterrows():
# print('df2index:'+str(df2index)+'#len(df2_row):'+str(len(df2_row)))
# print('type(df2_row):'+str(type(df2_row)))
# type(df2_row): < class 'pandas.core.series.Series'>
# df2index: 0 # len(df2_row):4
# print('df2_row:'+df2_row.to_string())
# df2_row: 0 0
# 1 1
# 2 2
# 3 3
print('df2_row['+str(df2index)+']:'+str(df2_row[df2index]))
print('df2_row['+str(df2index)+']:'+str(df2_row[0]))
# df2_row[0]: 0
i = 0
for i in range(len(df2_row)):
print('df2_row['+str(i)+']:'+str(df2_row[i]))
# df2_row[i]: 0
# df2_row[i]: 1
# df2_row[i]: 2
# df2_row[i]: 3
print('iterrows方法耗时:'+str(time()-start))
---------------------------------
start = time()
二、itertuples方法遍历 DataFrame
for df2_itertuples_row in df2.itertuples():
# print('df2index:'+str(df2_itertuples_row)+'#type(df2_itertuples_index):'+str(type(df2_itertuples_row)))
# df2index: 0 # len(df2_row):4
# print('df2_row:'+str(df2_itertuples_row))
# print(getattr(df2_itertuples_row, '_1'), getattr(df2_itertuples_row, '_2'),getattr(df2_itertuples_row, '_3'),getattr(df2_itertuples_row, '_4')) #用列名来访问,貌似仅能用列名来访问 # 这个可以作为遍历项目使用。
print(getattr(df2_itertuples_row, '_1')) #用列名来访问,貌似仅能用列名来访问 # 这个可以作为遍历项目使用。
df2index:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)#type(df2_itertuples_index):
df2_row:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)
0 1 2 3
df2index:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)#type(df2_itertuples_index):
df2_row:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)
4 5 6 7
df2index:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)#type(df2_itertuples_index):
df2_row:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)
8 9 10 11
print('iitertuples方法耗时:'+str(time()-start))
----------------------------------------------
#三、 iteritems 老的方法 遍历 DataFrame
for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
# print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
# df2_iteritems_index: 0 # len(df2_iteritems_row):3
# print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
# type(df2_iteritems_row):
# print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2])) #用索引的方法访问。
# print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])) #用索引的方法访问。
df2_iteritems_row[0]:1#df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
df2_iteritems_index:2#len(df2_iteritems_row):3
type(df2_iteritems_row):
df2_iteritems_row[0]:2#df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
df2_iteritems_index:3#len(df2_iteritems_row):3
type(df2_iteritems_row):
df2_iteritems_row[0]:3#df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
D:\develop\pycharmCode\exericse.py:86: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead. 告诉我们这个方法以后不要用了,要用.items 这个方法?
for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
-----------------------------------------------------------------------------------
#三、 iteritems 新的方法遍历 DataFrame,无报错提示:
df2 = pd.DataFrame(t4.reshape(4, 30000))
start = time()
for df2_iteritems_index, df2_iteritems_row in df2.items(): #用新的items()方法,无报错提示:
# print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
# df2_iteritems_index: 0 # len(df2_iteritems_row):3
# print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
# type(df2_iteritems_row):
# print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2])) #用索引的方法访问。
print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0]) ) #用索引的方法访问。
# df2_iteritems_row[0]: 0 # df2_iteritems_row[1]4#str(df2_iteritems_row[2])8
# df2_iteritems_index: 1 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 1 # df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
# df2_iteritems_index: 2 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 2 # df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
# df2_iteritems_index: 3 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 3 # df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
print('items方法耗时:'+str(time()-start))
items方法耗时:3.169365882873535
iterrows方法耗时:2.4688100814819336
iitertuples方法耗时:0.17905783653259277 据说这种方法不能更改里面的值
Original: https://blog.csdn.net/handsome1234/article/details/127353385
Author: handsome1234
Title: python 中pandask的 iterrows、itertuples、iteritems的理解及遍历,用‘列名’和‘索引’方式访问,及速度比拼
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/675377/
转载文章受原作者版权保护。转载请注明原作者出处!