import numpy as np
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
创建一个dataframe,要求索引是a,b,c,d,列名是one,two,其中one列数据为1,2,3,two列数据为1,2,3,4
data={ 'one':[1,2,3,None],'two':[1,2,3,4]}
index=['a','b','c','d']
data
{'one': [1, 2, 3, None], 'two': [1, 2, 3, 4]}
a=pd.DataFrame(data,index=index)
a
onetwoa1.01b2.02c3.03dNaN4
a.values
a.index
a.columns
a.shape
array([[ 1., 1.],
[ 2., 2.],
[ 3., 3.],
[nan, 4.]])
Index(['a', 'b', 'c', 'd'], dtype='object')
Index(['one', 'two'], dtype='object')
(4, 2)
查看df的索引名和列名
a.index
Index(['a', 'b', 'c', 'd'], dtype='object')
a.columns
Index(['one', 'two'], dtype='object')
列操作
a['three']=a['one']*a['two']
a
onetwothreea1.011.0b2.024.0c3.039.0dNaN4NaN
a.pop('two')
a 1
b 2
c 3
d 4
Name: two, dtype: int64
onethreea1.01.0b2.04.0c3.09.0dNaNNaN
del a['three']
onea1.0b2.0c3.0dNaN
基于位置和基于标签的索引
df1 = pd.DataFrame({'one' : pd.Series(np.random.randn(3),
index=['a', 'b', 'c']),
'two' : pd.Series(np.random.randn(4),
index=['a', 'b', 'c', 'd']),
'three' : pd.Series(np.random.randn(3),
index=['b', 'c','d'])})
df1
onetwothreea0.7644930.194418NaNb0.2085090.058272-0.296883c0.243678-0.2193390.007492dNaN1.162892-1.032575
df1.two
a 0.194418
b 0.058272
c -0.219339
d 1.162892
Name: two, dtype: float64
df1['two']
a 0.194418
b 0.058272
c -0.219339
d 1.162892
Name: two, dtype: float64
df1
onetwothreea0.7644930.194418NaNb0.2085090.058272-0.296883c0.243678-0.2193390.007492dNaN1.162892-1.032575
df1.loc['b':'d':2,'one':'three':2]
onethreeb0.208509-0.296883dNaN-1.032575
df1.iloc[::2,::2]
onethreea0.764493NaNc0.2436780.007492
数据对齐及运算,观察运算结果,体会对齐原理,执行df3+df4的操作
df3 = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])
df4 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C'])
df3
df4
ABCD0-0.6214130.294929-0.0844580.2872001-0.985321-0.550060-1.9658921.1664552-0.1734580.514720-1.359982-1.6165283-0.2707260.823601-0.1027521.0768504-2.077785-0.9933480.595989-0.1472175-0.526779-1.695730-0.0384301.7280696-0.229735-0.576460-0.616993-1.41216070.023990-0.4234300.831251-1.17964780.2285530.893980-1.1670450.07420890.202145-0.3772181.654575-0.006734
ABC01.2163100.5898300.98559511.5132450.820891-1.30435121.380050-0.237665-0.43295230.1481781.3682461.4016664-1.887375-1.880049-0.54462451.659860-0.5620010.72098860.2002540.832807-0.515529
df3+df4
ABCD00.5948970.8847590.901137NaN10.5279250.270831-3.270242NaN21.2065920.277055-1.792934NaN3-0.1225482.1918471.298914NaN4-3.965160-2.8733980.051365NaN51.133081-2.2577310.682558NaN6-0.0294810.256347-1.132522NaN7NaNNaNNaNNaN8NaNNaNNaNNaN9NaNNaNNaNNaN
查看数据框的形状\索引\列名\详细信息\简单统计描述
np.random.seed(0)
df6 = pd.DataFrame(np.random.randint(1,11,(8, 3)),
index=["003","001","005","006","002","008","004","007"],
columns=list('ABC'))
df6
ABC003614001481000546300658700299200878800492600710910
df6.shape
(8, 3)
df6.index
df6.columns
Index(['A', 'B', 'C'], dtype='object')
df6.info()
<class 'pandas.core.frame.dataframe'>
Index: 8 entries, 003 to 007
Data columns (total 3 columns):
A 8 non-null int32
B 8 non-null int32
C 8 non-null int32
dtypes: int32(3)
memory usage: 160.0+ bytes
</class>
df6.describe()
ABCcount8.000008.0000008.000000mean6.750006.3750006.250000std2.375473.1594533.058945min4.000001.0000002.00000025%4.750005.0000003.75000050%6.500008.0000006.50000075%9.000008.2500008.500000max10.000009.00000010.000000
布尔索引与逻辑运算提取数据
df6.C>5
003 False
001 True
005 False
006 True
002 False
008 True
004 True
007 True
Name: C, dtype: bool
df6[df6.C>5]
ABC001481000658700878800492600710910
df6[(df6.A>5)&(df6.B>6)]
ABC00299200878800710910
isin方法
df7 = pd.DataFrame({'vals': [1, 2, 3, 4],
'ids': ['a', 'b', 'f', 'n'],
'ids2': ['a', 'n', 'c', 'n']})
df7
valsidsids201aa12bn23fc34nn
df7.ids.isin(['b'])
0 False
1 True
2 False
3 False
Name: ids, dtype: bool
df7[df7.ids.isin(['b','a'])]
valsidsids201aa12bn
Original: https://blog.csdn.net/weixin_55263276/article/details/115154793
Author: April晓宇
Title: pandas 两列相乘 dataframe
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/674163/
转载文章受原作者版权保护。转载请注明原作者出处!