常用操作
import pandas as pd
import numpy as np
example = pd.DataFrame({'Amount': [74., 235., 175., 100., 115., 245., 180., 90., 88., 129., 273., 300.],
'Category': ['Transportation', 'Grocery', 'Household', 'Entertainment',
'Transportation', 'Grocery', 'Household', 'Entertainment',
'Transportation', 'Grocery', 'Household', 'Entertainment'],
'Month': ['January', 'January', 'January', 'January',
'February', 'February', 'February', 'February',
'Marth', 'Marth', 'Marth', 'Marth']})
example
AmountCategoryMonth074.0TransportationJanuary1235.0GroceryJanuary2175.0HouseholdJanuary3100.0EntertainmentJanuary4115.0TransportationFebruary5245.0GroceryFebruary6180.0HouseholdFebruary790.0EntertainmentFebruary888.0TransportationMarth9129.0GroceryMarth10273.0HouseholdMarth11300.0EntertainmentMarth
example.sort_values(by=['Month', 'Amount'], ascending=[True, False])
AmountCategoryMonth5245.0GroceryFebruary6180.0HouseholdFebruary4115.0TransportationFebruary790.0EntertainmentFebruary1235.0GroceryJanuary2175.0HouseholdJanuary3100.0EntertainmentJanuary074.0TransportationJanuary11300.0EntertainmentMarth10273.0HouseholdMarth9129.0GroceryMarth888.0TransportationMarth
data = pd.DataFrame({'k1':[1,2,3,5,5,4,6,2,2], 'k2':[12,8,11,4,4,52,30,18,18]})
data
k1k2011212823113544545452663072188218
data.sort_values(by='k2')
k1k2354454128231101127218821866305452
data.drop_duplicates()
k1k201121282311354545266307218
data.drop_duplicates(subset=['k1'])
k1k20112128231135454526630
data2 = pd.DataFrame({'k1':[1,5,3,16,19], 'k2':['A1','A2', 'A1', 'B2', 'A2']})
data2
k1k201A115A223A1316B2419A2
def map(series):
if series['k2'] == 'A1':
return 'A'
elif series['k2'] == 'A2':
return 'A'
data2['k2_map'] = data2.apply(map, axis=1)
data2
k1k2k2_map01A1A15A2A23A1A316B2None419A2A
df = pd.DataFrame({'t1':np.random.randn(5), 't2':np.random.randn(5)})
df
t1t200.909904-0.39409610.9431650.4802812-1.6162222.1531463-0.403424-1.16006040.8500741.280206
df2 = df.assign(ration = df['t1'] / df['t2'])
df2
t1t2ration00.909904-0.394096-2.30884010.9431650.4802811.9637792-1.6162222.153146-0.7506333-0.403424-1.1600600.34776240.8500741.2802060.664013
df2.drop('ration', axis=1)
t1t200.909904-0.39409610.9431650.4802812-1.6162222.1531463-0.403424-1.16006040.8500741.280206
age = pd.Series([18, 9, 6, 20, 26, 67, 52, 38, 40, 81, 36, 74, 88])
bins = [10, 40, 60, 90]
res = pd.cut(age, bins)
res
0 (10.0, 40.0]
1 NaN
2 NaN
3 (10.0, 40.0]
4 (10.0, 40.0]
5 (60.0, 90.0]
6 (40.0, 60.0]
7 (10.0, 40.0]
8 (10.0, 40.0]
9 (60.0, 90.0]
10 (10.0, 40.0]
11 (60.0, 90.0]
12 (60.0, 90.0]
dtype: category
Categories (3, interval[int64, right]): [(10, 40] < (40, 60] < (60, 90]]
pd.value_counts(res)
(10, 40] 6
(60, 90] 4
(40, 60] 1
dtype: int64
w = pd.cut(age, bins, labels=['youth', 'mid', 'old'])
pd.value_counts(w)
youth 6
old 4
mid 1
dtype: int64
res.isnull()
0 False
1 True
2 True
3 False
4 False
5 False
6 False
7 False
8 False
9 False
10 False
11 False
12 False
dtype: bool
qw = pd.Series([np.nan, 1, 2, 3])
qw
0 NaN
1 1.0
2 2.0
3 3.0
dtype: float64
qw.fillna(5)
0 5.0
1 1.0
2 2.0
3 3.0
dtype: float64
Original: https://blog.csdn.net/D_Low/article/details/123765446
Author: One_T.
Title: pandas——常用操作
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/679521/
转载文章受原作者版权保护。转载请注明原作者出处!