pandas——常用操作

常用操作

import pandas as pd
import numpy as np
example = pd.DataFrame({'Amount': [74., 235., 175., 100., 115., 245., 180., 90., 88., 129., 273., 300.],
              'Category': ['Transportation', 'Grocery', 'Household', 'Entertainment',
                           'Transportation', 'Grocery', 'Household', 'Entertainment',
                           'Transportation', 'Grocery', 'Household', 'Entertainment'],
              'Month': ['January', 'January', 'January', 'January',
                        'February', 'February', 'February', 'February',
                        'Marth', 'Marth', 'Marth', 'Marth']})
example

AmountCategoryMonth074.0TransportationJanuary1235.0GroceryJanuary2175.0HouseholdJanuary3100.0EntertainmentJanuary4115.0TransportationFebruary5245.0GroceryFebruary6180.0HouseholdFebruary790.0EntertainmentFebruary888.0TransportationMarth9129.0GroceryMarth10273.0HouseholdMarth11300.0EntertainmentMarth

example.sort_values(by=['Month', 'Amount'], ascending=[True, False])

AmountCategoryMonth5245.0GroceryFebruary6180.0HouseholdFebruary4115.0TransportationFebruary790.0EntertainmentFebruary1235.0GroceryJanuary2175.0HouseholdJanuary3100.0EntertainmentJanuary074.0TransportationJanuary11300.0EntertainmentMarth10273.0HouseholdMarth9129.0GroceryMarth888.0TransportationMarth

data = pd.DataFrame({'k1':[1,2,3,5,5,4,6,2,2], 'k2':[12,8,11,4,4,52,30,18,18]})
data

k1k2011212823113544545452663072188218

data.sort_values(by='k2')

k1k2354454128231101127218821866305452

data.drop_duplicates()

k1k201121282311354545266307218

data.drop_duplicates(subset=['k1'])

k1k20112128231135454526630

data2 = pd.DataFrame({'k1':[1,5,3,16,19], 'k2':['A1','A2', 'A1', 'B2', 'A2']})
data2

k1k201A115A223A1316B2419A2

def map(series):
    if series['k2'] == 'A1':
        return 'A'
    elif series['k2'] == 'A2':
        return 'A'
data2['k2_map'] = data2.apply(map, axis=1)
data2

k1k2k2_map01A1A15A2A23A1A316B2None419A2A

df = pd.DataFrame({'t1':np.random.randn(5), 't2':np.random.randn(5)})
df

t1t200.909904-0.39409610.9431650.4802812-1.6162222.1531463-0.403424-1.16006040.8500741.280206

df2 = df.assign(ration = df['t1'] / df['t2'])
df2

t1t2ration00.909904-0.394096-2.30884010.9431650.4802811.9637792-1.6162222.153146-0.7506333-0.403424-1.1600600.34776240.8500741.2802060.664013

df2.drop('ration', axis=1)

t1t200.909904-0.39409610.9431650.4802812-1.6162222.1531463-0.403424-1.16006040.8500741.280206

age = pd.Series([18, 9, 6, 20, 26, 67, 52, 38, 40, 81, 36, 74, 88])
bins = [10, 40, 60, 90]
res = pd.cut(age, bins)
res
0     (10.0, 40.0]
1              NaN
2              NaN
3     (10.0, 40.0]
4     (10.0, 40.0]
5     (60.0, 90.0]
6     (40.0, 60.0]
7     (10.0, 40.0]
8     (10.0, 40.0]
9     (60.0, 90.0]
10    (10.0, 40.0]
11    (60.0, 90.0]
12    (60.0, 90.0]
dtype: category
Categories (3, interval[int64, right]): [(10, 40] < (40, 60] < (60, 90]]
pd.value_counts(res)
(10, 40]    6
(60, 90]    4
(40, 60]    1
dtype: int64
w = pd.cut(age, bins, labels=['youth', 'mid', 'old'])
pd.value_counts(w)
youth    6
old      4
mid      1
dtype: int64
res.isnull()
0     False
1      True
2      True
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
dtype: bool
qw = pd.Series([np.nan, 1, 2, 3])
qw
0    NaN
1    1.0
2    2.0
3    3.0
dtype: float64
qw.fillna(5)
0    5.0
1    1.0
2    2.0
3    3.0
dtype: float64

Original: https://blog.csdn.net/D_Low/article/details/123765446
Author: One_T.
Title: pandas——常用操作

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/679521/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球