import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from pylab import mpl
mpl.rcParams["font.sans-serif"] = ["SimHei"]
mpl.rcParams["axes.unicode_minus"] = False
一、Pandas初识
二、Pandas数据结构
2.1 Series
2.1.1 Series创建
pd.Series(np.arange(10))
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
dtype: int32
pd.Series(['a','b','c','d','e'],index=[1,2,3,4,5])
1 a
2 b
3 c
4 d
5 e
dtype: object
d_s = pd.Series({'red':100,"blue":20,"green":50,"yellow":70},dtype=np.float32)
d_s
red 100.0
blue 20.0
green 50.0
yellow 70.0
dtype: float32
2.1.2 Series的属性
d_s.index
Index(['red', 'blue', 'green', 'yellow'], dtype='object')
d_s.values
array([100., 20., 50., 70.], dtype=float32)
2.2 DataFrame
2.2.1 DataFrame的创建
pd.DataFrame(np.random.randn(2,3))
01200.2721340.434924-1.63214312.1362760.413088-1.156421
score = np.random.randint(40,100,(10,5))
score
array([[57, 81, 54, 94, 68],
[73, 83, 41, 40, 75],
[73, 99, 44, 51, 73],
[66, 72, 62, 62, 44],
[77, 81, 78, 73, 91],
[60, 65, 82, 61, 83],
[91, 78, 64, 95, 84],
[74, 63, 86, 47, 87],
[52, 85, 64, 51, 86],
[70, 96, 59, 70, 72]])
score_df = pd.DataFrame(score)
score_df
0123405781549468173834140752739944517336672626244477817873915606582618369178649584774638647878528564518697096597072
subject =['语文','数学','英语','物理','政治']
stu = ['同学'+str(i) for i in range(score_df.shape[0])]
data = pd.DataFrame(score,columns=subject,index=stu)
data
语文数学英语物理政治同学05781549468同学17383414075同学27399445173同学36672626244同学47781787391同学56065826183同学69178649584同学77463864787同学85285645186同学97096597072
2.2.2 Dataframe属性
data.values
array([[57, 81, 54, 94, 68],
[73, 83, 41, 40, 75],
[73, 99, 44, 51, 73],
[66, 72, 62, 62, 44],
[77, 81, 78, 73, 91],
[60, 65, 82, 61, 83],
[91, 78, 64, 95, 84],
[74, 63, 86, 47, 87],
[52, 85, 64, 51, 86],
[70, 96, 59, 70, 72]])
data.columns
Index(['语文', '数学', '英语', '物理', '政治'], dtype='object')
data.T
同学0同学1同学2同学3同学4同学5同学6同学7同学8同学9语文57737366776091745270数学81839972816578638596英语54414462788264866459物理94405162736195475170政治68757344918384878672
data.head(2)
语文数学英语物理政治同学05781549468同学17383414075
data.tail(3)
语文数学英语物理政治同学77463864787同学85285645186同学97096597072
三、基本数据操作
data = pd.read_excel('./5.Pandas_data.xlsx',encoding='utf-8')
data.head(5)
城市景点图片地址景点资料0邢台天河山https://dimg06.c-ctrip.com/images/100b0q000000…景点介绍\n\n天河山位于邢台市西侧,距离市区车程约70公里,是一处山清水秀的自然风景区。这…1邢台邢台峡谷群https://dimg04.c-ctrip.com/images/10021f000001…景点介绍\n\n邢台峡谷群又称邢台大峡谷、太行奇峡群,位于邢台市西侧,距离邢台市区车程约70…2邢台九龙峡https://dimg04.c-ctrip.com/images/100u1f000001…景点介绍\n\n九龙峡景区位于河北省邢台市邢台县西侧,距离市区车程约80公里,是一处以峡谷风…3邢台天梯山景区https://dimg07.c-ctrip.com/images/10090h000000…景点介绍\n\n天梯山景区位于邢台市西郊,是邢台周边郊游登山的好去处。天梯山地貌险峻,石壁陡…4邢台崆山白云洞https://dimg04.c-ctrip.com/images/0100×1200085…景点介绍\n\n崆山白云洞风景名胜区,位于邢台市临城县中部,是以天然溶洞白云洞为主体修建的地…
3.1 索引操作
3.1.1 直接使用行类索引(先列后行)
data['城市'][5]
'邢台'
3.1.2 结合loc或者iloc使用索引
data.loc[300:305,"城市"]
data.loc[data.index[0:5],"城市"]
0 邢台
1 邢台
2 邢台
3 邢台
4 邢台
Name: 城市, dtype: object
data.iloc[:6,:2]
data.iloc[0:6,data.columns.get_indexer(['城市','景点'])]
城市景点0邢台天河山1邢台邢台峡谷群2邢台九龙峡3邢台天梯山景区4邢台崆山白云洞5邢台云梦山风景区
3.2 赋值
data['城市'][5] = 6
data['城市'].head(10)
0 邢台
1 邢台
2 邢台
3 邢台
4 邢台
5 6
6 邢台
7 正定
8 正定
9 正定
Name: 城市, dtype: object
3.3 排序
3.3.1 DataFrame排序
data.sort_values(by='城市',ascending=False).head(5)
data.sort_values(by=['城市','景点'])
城市景点图片地址景点资料341三明三亚湾https://dimg07.c-ctrip.com/images/100r17000001…景点介绍\n\n三亚湾是一片绵延22公里的海滩。虽然这里的沙质没有亚龙湾好,但这一带的住宿价…336三明亚龙湾https://dimg08.c-ctrip.com/images/fd/tg/g3/M02…景点介绍\n\n亚龙湾的海水蔚蓝清澈,沙质洁白如玉,年平均海水温度22-25.1°C,终年可…342三明亚龙湾热带天堂森林公园https://dimg08.c-ctrip.com/images/350618000001…景点介绍\n\n亚龙湾热带天堂森林公园景区位于亚龙湾国家旅游度假区内,电影《非诚勿扰II》、…339三明大东海https://dimg05.c-ctrip.com/images/10090f000000…景点介绍\n\n大东海是三亚离市中心比较近的一个海湾,交通、用餐和住宿都比较便利。虽然沙质和…338三明天涯海角https://dimg02.c-ctrip.com/images/350l15000000…景点介绍\n\n天涯海角位于三亚湾西端,是一片宁静美丽的海滩。海滩上耸立着许多形态各异的岩石………………153龙井天佛指山保护区https://dimg07.c-ctrip.com/images/100h1d000001…景点介绍\n\n吉林天佛指山自然保护区地处龙井市境内,东南部与朝鲜隔图们江相望。该区以保护松…152龙井汉王山https://dimg08.c-ctrip.com/images/tg/477/940/9…景点介绍\n\n汉王山主要景点有一线天、高阳坡、石官峡、鹿亭温泉、回龙寺、金鹿峡瀑布、金鹿长…147龙井琵岩山https://dimg08.c-ctrip.com/images/100g1f000001…景点介绍\n\n琵岩山位于龙井市西南部,距龙井市中心区3公里处,交通便利。它树木繁茂,江水环…151龙井裕龙湾旅游风景区https://dimg04.c-ctrip.com/images/10021f000001…景点介绍\n\n裕龙湾旅游风景区位于延吉市和龙井市西侧,是集山水自然景观、玻璃吊桥、旱滑道等…149龙井龙山朝鲜族民俗村https://dimg06.c-ctrip.com/images/100k1f000001…景点介绍\n\n 龙井市龙山朝鲜族民俗村在省人民政府和旅游部门的大力支持和帮助下,共投资60…
1121 rows × 4 columns
data.sort_index()
城市景点图片地址景点资料0邢台天河山https://dimg06.c-ctrip.com/images/100b0q000000…景点介绍\n\n天河山位于邢台市西侧,距离市区车程约70公里,是一处山清水秀的自然风景区。这…1邢台邢台峡谷群https://dimg04.c-ctrip.com/images/10021f000001…景点介绍\n\n邢台峡谷群又称邢台大峡谷、太行奇峡群,位于邢台市西侧,距离邢台市区车程约70…2邢台九龙峡https://dimg04.c-ctrip.com/images/100u1f000001…景点介绍\n\n九龙峡景区位于河北省邢台市邢台县西侧,距离市区车程约80公里,是一处以峡谷风…3邢台天梯山景区https://dimg07.c-ctrip.com/images/10090h000000…景点介绍\n\n天梯山景区位于邢台市西郊,是邢台周边郊游登山的好去处。天梯山地貌险峻,石壁陡…4邢台崆山白云洞https://dimg04.c-ctrip.com/images/0100×1200085…景点介绍\n\n崆山白云洞风景名胜区,位于邢台市临城县中部,是以天然溶洞白云洞为主体修建的地………………1116石河子人民公园https://dimg03.c-ctrip.com/images/100m0l000000…景点介绍\n\n石河子市人民公园坐落在北三路与西一路交汇处的西南端,占地1平方公里,内辟林业…1117石河子周恩来总理纪念碑https://dimg08.c-ctrip.com/images/100h0i000000…景点介绍\n\n周恩来总理纪念碑座落在石河子市北郊,距市中心3公里。纪念碑南侧是周恩来总理纪…1118石河子音乐文化广场https://dimg04.c-ctrip.com/images/tg/806/642/7…景点介绍\n\n广场集花草树木、音乐喷泉、雕塑于一身。是石河子市民喜欢光顾的地方,也是来石河…1119石河子音乐喷泉https://dimg01.c-ctrip.com/images/tg/710/942/2…景点介绍\n\n进入石河子游憩广场,首先映入眼帘的就是音乐喷泉。该音乐喷泉自1998年改建后…1120石河子北湖公园https://dimg07.c-ctrip.com/images/1A051b000001…景点介绍\n\n这里建起了长廊、亭台、水上码头,配置了快艇、游船、水上摩托、水上降落平台,开…
1121 rows × 4 columns
3.3.2 Series排序
data['城市'].sort_values(ascending=True).head()
336 三明
337 三明
338 三明
339 三明
340 三明
Name: 城市, dtype: object
data['城市'].sort_index(ascending=False).head()
1120 石河子
1119 石河子
1118 石河子
1117 石河子
1116 石河子
Name: 城市, dtype: object
四、DataFrame运算
data = pd.read_csv('./5.stock_day.csv',encoding='utf-8')
data.head(10)
openhighcloselowvolumeprice_changep_changema5ma10ma20v_ma5v_ma10v_ma20turnover2018-02-2723.5325.8824.1623.5395578.030.632.6822.94222.14222.87553782.6446738.6555576.112.392018-02-2622.8023.7823.5322.8060985.110.693.0222.40621.95522.94240827.5242736.3456007.501.532018-02-2322.8823.3722.8222.7152914.010.542.4221.93821.92923.02235119.5841871.9756372.851.322018-02-2222.2522.7622.2822.0236105.010.361.6421.44621.90923.13735397.5839904.7860149.600.902018-02-1421.4921.9921.9221.4823331.040.442.0521.36621.92323.25333590.2142935.7461716.110.582018-02-1321.4021.9021.4821.3130802.450.281.3221.34222.10323.38739694.6545518.1465161.680.772018-02-1220.7021.4021.1920.6332445.390.824.0321.50422.33823.53344645.1645679.9468686.330.812018-02-0921.2021.4620.3620.1954304.01-1.50-6.8621.92022.59623.64548624.3648982.3870552.471.362018-02-0821.7922.0921.8821.7527068.160.090.4122.37223.00923.83944411.9848612.1673852.450.682018-02-0722.6923.1121.8021.2953853.25-0.50-2.2422.48023.25823.92952281.2856315.1174925.331.35
4.1 算术运算
data['open'].add(1).head()
2018-02-27 24.53
2018-02-26 23.80
2018-02-23 23.88
2018-02-22 23.25
2018-02-14 22.49
Name: open, dtype: float64
data['open'].sub(1).head()
2018-02-27 22.53
2018-02-26 21.80
2018-02-23 21.88
2018-02-22 21.25
2018-02-14 20.49
Name: open, dtype: float64
4.2 逻辑运算
4.2.1 逻辑运算符号
(data["open"] > 23).head()
2018-02-27 True
2018-02-26 False
2018-02-23 False
2018-02-22 False
2018-02-14 False
Name: open, dtype: bool
data[data["open"] > 23].head()
openhighcloselowvolumeprice_changep_changema5ma10ma20v_ma5v_ma10v_ma20turnover2018-02-2723.5325.8824.1623.5395578.030.632.6822.94222.14222.87553782.6446738.6555576.112.392018-02-0123.7123.8622.4222.2266414.64-1.30-5.4823.64624.36524.27952812.3580394.4388480.921.662018-01-3123.8523.9823.7223.3149155.02-0.11-0.4624.03624.58324.41160348.9480496.4891666.751.232018-01-3023.7124.0823.8323.7032420.430.050.2124.35024.67124.36577485.5384805.2392943.350.812018-01-2924.4024.6323.7723.7265469.81-0.73-2.9824.68424.72824.29491842.6091692.7393456.221.64
data[(data["open"] > 23) & (data["open"] < 24)].head()
openhighcloselowvolumeprice_changep_changema5ma10ma20v_ma5v_ma10v_ma20turnover2018-02-2723.5325.8824.1623.5395578.030.632.6822.94222.14222.87553782.6446738.6555576.112.392018-02-0123.7123.8622.4222.2266414.64-1.30-5.4823.64624.36524.27952812.3580394.4388480.921.662018-01-3123.8523.9823.7223.3149155.02-0.11-0.4624.03624.58324.41160348.9480496.4891666.751.232018-01-3023.7124.0823.8323.7032420.430.050.2124.35024.67124.36577485.5384805.2392943.350.812018-01-1623.4024.6024.4023.30101295.420.964.1023.90824.05823.32182003.73101081.4774590.922.54
4.2.2 逻辑运算函数
data.query("open23").head()
openhighcloselowvolumeprice_changep_changema5ma10ma20v_ma5v_ma10v_ma20turnover2018-02-2723.5325.8824.1623.5395578.030.632.6822.94222.14222.87553782.6446738.6555576.112.392018-02-0123.7123.8622.4222.2266414.64-1.30-5.4823.64624.36524.27952812.3580394.4388480.921.662018-01-3123.8523.9823.7223.3149155.02-0.11-0.4624.03624.58324.41160348.9480496.4891666.751.232018-01-3023.7124.0823.8323.7032420.430.050.2124.35024.67124.36577485.5384805.2392943.350.812018-01-1623.4024.6024.4023.30101295.420.964.1023.90824.05823.32182003.73101081.4774590.922.54
data[data["open"].isin([23.85])]
openhighcloselowvolumeprice_changep_changema5ma10ma20v_ma5v_ma10v_ma20turnover2018-01-3123.8523.9823.7223.3149155.02-0.11-0.4624.03624.58324.41160348.9480496.4891666.751.232015-11-2623.8524.0823.5323.5051446.29-0.31-1.3023.71024.35723.39574550.04110489.85109815.491.76
4.3 统计运算
4.3.1 describe
data.describe()
openhighcloselowvolumeprice_changep_changema5ma10ma20v_ma5v_ma10v_ma20turnovercount643.000000643.000000643.000000643.000000643.000000643.000000643.000000643.000000643.000000643.000000643.000000643.000000643.000000643.000000mean21.27270621.90051321.33626720.77183599905.5191140.0188020.19028021.30236221.26765621.200946100008.642691100287.542177100797.5183982.936190std3.9309734.0775783.9428063.79196873879.1193540.8984764.0796983.8806403.8136023.68618662761.57832656759.08206050101.5756392.079375min12.25000012.67000012.36000012.2000001158.120000-3.520000-10.03000012.52000012.52000012.52000010804.74000019130.51000023311.0000000.04000025%19.00000019.50000019.04500018.52500048533.210000-0.390000-1.85000018.99000019.10200019.27500055794.98500059861.95500064103.6750001.36000050%21.44000021.97000021.45000020.98000083175.9300000.0500000.26000021.50400021.62300021.53000086133.33000089234.97000096291.7300002.50000075%23.40000024.06500023.41500022.850000127580.0550000.4550002.30500023.31800023.09850023.136000127655.655000124693.255000124001.0150003.915000max34.99000036.35000035.21000034.010000501915.4100003.03000010.03000033.69600032.18600029.998000404443.540000360028.160000269280.79000012.560000
4.3.2 统计函数
data.min(),data.idxmax()
(open 12.25
high 12.67
close 12.36
low 12.20
volume 1158.12
price_change -3.52
p_change -10.03
ma5 12.52
ma10 12.52
ma20 12.52
v_ma5 10804.74
v_ma10 19130.51
v_ma20 23311.00
turnover 0.04
dtype: float64,
open 2015-06-15
high 2015-06-10
close 2015-06-12
low 2015-06-12
volume 2017-10-26
price_change 2015-06-09
p_change 2015-08-28
ma5 2015-06-15
ma10 2015-06-18
ma20 2015-06-18
v_ma5 2017-10-26
v_ma10 2017-11-02
v_ma20 2017-11-15
turnover 2017-10-26
dtype: object)
4.3.3 累计统计函数
d_s = data.sort_index()
stock_rise = data['p_change'].cumsum()
import matplotlib.pyplot as plt
stock_rise.plot()
plt.show()
D:\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py:1192: UserWarning: FixedFormatter should only be used together with FixedLocator
ax.set_xticklabels(xticklabels)
4.4 自定义运算
data[['open', 'close']].apply(lambda x: x.max() - x.min(), axis=0)
open 22.74
close 22.85
dtype: float64
五、资料网盘
百度云盘:https://pan.baidu.com/s/1puAwMn8QFGiUcrmQtnoiPw ;
提取码:echo
Original: https://blog.csdn.net/weixin_45160152/article/details/118547324
Author: Padaz
Title: 4.数据分析-Pandas_①
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/740666/
转载文章受原作者版权保护。转载请注明原作者出处!