Python的Numpy与Pandas包的使用

2023年7月16日上午9:34 • 人工智能 • 阅读 57

import numpy as np
import random
import matplotlib.pyplot as plt

&#x5B89;&#x88C5; numpy, scipy, pandas
numpy:  &#x9AD8;&#x7EA7;&#x6570;&#x7EC4;,&#x7EBF;&#x6027;&#x4EE3;&#x6570;&#x4E2D;&#x77E9;&#x9635;
scipy:  &#x79D1;&#x5B66;&#x8BA1;&#x7B97;&#x57FA;&#x7840;&#x516C;&#x5F0F;&#x5E93;
pandas:  &#x6570;&#x636E;&#x5206;&#x6790;&#x5DE5;&#x5177;
matplotlib:  &#x7ED8;&#x56FE;

1.&#x4F7F;&#x7528;numpy&#x521B;&#x5EFA;&#x4E00;&#x7EF4;&#x6570;&#x7EC4; &#xFF08;list&#xFF0C;tuple&#xFF09;
lst = [1, 2, 3, 4]  # &#x4F7F;&#x7528;&#x5217;&#x8868;&#x521B;&#x5EFA;lst
tup = (1, 2, 3, 4, 5)  # &#x4F7F;&#x7528;&#x5143;&#x7EC4;&#x521B;&#x5EFA;
set = {1, 2, 3, 4, 5}  # &#x4E0D;&#x80FD;&#x521B;&#x5EFA;
s = 'candle' # &#x4E0D;&#x80FD;&#x521B;&#x5EFA;
dic = {"key1":"value1"}
arr = np.array(lst)
print(arr[0])

2.&#x521B;&#x5EFA;&#x8FDE;&#x7EED;&#x533A;&#x95F4;&#x6570;&#x5B57;&#x7684;&#x4E00;&#x7EF4;&#x6570;&#x7EC4; np.arange&#xFF08;start,end,step&#xFF09;
lst = [i for i in range(1, 21)]
print(lst)
arr = np.array(lst)
print(arr)
&#x7EC3;&#x4E60;1&#xFF1A;&#x4EA7;&#x751F;&#x8FDE;&#x7EED;&#x6570;&#x5B57;1~20&#x7684;&#x6570;&#x7EC4;&#xFF08;&#x5217;&#x8868;&#x751F;&#x6210;&#x5F0F;&#xFF0C;arange&#xFF09;
arr = np.arange(1, 21)
print(arr)
&#x7EC3;&#x4E60;2&#xFF1A;&#x4EA7;&#x751F;1 4 7 &#x2026; 16&#x7684;&#x6570;&#x7EC4;
arr = np.arange(1, 17, 3)
print(arr)
&#x7EC3;&#x4E60;3&#xFF1A;&#x4EA7;&#x751F;9~1&#x7684;&#x6570;&#x7EC4;
arr = np.arange(9, 0, -1)
print(arr)

4.&#x521B;&#x5EFA;&#x968F;&#x673A;&#x6570;&#x7684;&#x6570;&#x7EC4;&#xFF08;1-9&#x8303;&#x56F4;&#x5185;&#xFF0C;&#x6570;&#x7EC4;&#x4E2D;&#x968F;&#x673A;10&#x4E2A;&#x6570;&#x5B57;&#xFF09;
lst = [ random.randint(1, 10) for i in range(1, 11)]
arr = np.random.randint(1, 10, size=10)
print(arr)

5.  &#x4E00;&#x7EF4;&#x6570;&#x7EC4;&#x5143;&#x7D20;&#x83B7;&#x53D6;&#x3010;index&#x3011; &#x53CA;&#x67E5;&#x770B;&#x5143;&#x7D20;&#x7C7B;&#x578B;dtype
arr = np.array([1, 2, 3, 4, 5, 6], dtype=np.int8)
print(arr[0].dtype)  # &#x9ED8;&#x8BA4;&#x662F;int32
numpy int&#x9ED8;&#x8BA4;int32  8~128
arr = np.array([1, 2, 3, 4, 5.4, 6])  # [1.  2.  3.  4.  5.4 6. ]
print(arr[0].dtype)
numpy float&#x9ED8;&#x8BA4; float64   16~256
arr = np.array([1, 2, 3, 4.16, 'candle'])
print(arr)
print(arr[4].dtype)  # <u6 2 5 12 字符串类型 大小为6 # 6.查看维度ndim，元素个数size，数组形状shape arr="np.arange(1," 10) print(arr.ndim) 查看数组维度 print(arr.size) 查看数组中元素的个数 print(arr.shape) 查看数组形状 每个维度大小 7.numpy创建一个连续数字的二维数组（1~12） 多维数组,每个维度下,个数要一致 lst1="[[1," 2, 3], (4, 5, 6), [7, 8, 9]] 二维数组 lst2="[[1," 9, 10]] 一维数组 print(arr) 1. 一维升维度后变成二维 reshape(x,y,z) 13) 有12个成员 升维度为二维数组 1*12 2*6 3*4 4*3 6*2 12*1 (12,) arr2d="arr.reshape(3," 4) 3行4列 print(arr2d) print(arr2d.shape) 必须满足 行*列="&#x4E2A;&#x6570;" 6) valueerror: cannot reshape array of size into shape (6,6) -表示自动匹配剩余的维度 -1) can only specify one unknown dimension 2.创建时指定维度(随机数数组) 8.产生1-9随机数的9个元素的二维数组（要求3*3） 10, print(arr.reshape(3, 3)) 13, 9. 使用numpy创建矩阵 方法1： array()函数生成矩阵时数据只能为列表形式 [4, 6]]) brr="np.mat([[1," print(brr) crr="np.mat("1" 3;4 6") print(crr) 10. 创建特殊矩阵 5), dtype="np.int8)" 2)) 练习2：模拟抛掷500次硬币，正面表示1，反面表示0 练习3：模拟抛掷500次硬币，正面表示1，反面表示-1 =="0," -1, 1) 练习4：绘制随机漫步曲线（cumsum 数组累加求和） 对抛硬币结果进行累加求和 plt.plot(arr) plt.title("my title") plt.xlabel("my xlabel") plt.ylabel("my cumsum") plt.show() < code></u6>

其他使用

import numpy as np
import random

1  2  &#x4F7F;&#x7528;numpy&#x521B;&#x5EFA;&#x4E00;&#x7EF4;&#x6570;&#x7EC4;
&#x8981;&#x4F7F;&#x7528;list&#xFF0C;tuple
lst = [1, 2, 3, 4]
tup = (1, 2, 3, 4, 5)

set = {1, 2, 3, 4, 5}
str = 'candle'
dict = {"lihua": 28, "zm": 18}

arr1 = np.array(lst)  # [1 2 3 4]
arr2 = np.array(tup)  # [1 2 3 4 5]

arr3 = np.array(set)
arr4 = np.array(str)
arr5 = np.array(dict)
print(arr1)  # {1, 2, 3, 4, 5}
print(arr2) # candle
print(arr3) # {'lihua': 28, 'zm': 18}
print(arr4)
print(arr5)
3.&#x521B;&#x5EFA;&#x8FDE;&#x7EED;&#x533A;&#x95F4;&#x6570;&#x5B57;&#x7684;&#x4E00;&#x7EF4;&#x6570;&#x7EC4; np.arange&#xFF08;start,end,step&#xFF09;
&#x7EC3;&#x4E60;1&#xFF1A;&#x4EA7;&#x751F;&#x8FDE;&#x7EED;&#x6570;&#x5B57;1~20&#x7684;&#x6570;&#x7EC4;&#xFF08;&#x5217;&#x8868;&#x751F;&#x6210;&#x5F0F;&#xFF0C;arange&#xFF09;
lst2 = [i for i in range(1, 21)]
print(lst2)  # [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
arr6 = np.array(lst2)
print(arr6)  # [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
&#x7EC3;&#x4E60;2&#xFF1A;&#x4EA7;&#x751F;1 4 7 &#x2026; 16&#x7684;&#x6570;&#x7EC4;
arr7 = np.arange(1, 17, 3)
print(arr7)  # [ 1  4  7 10 13 16]
&#x7EC3;&#x4E60;3&#xFF1A;&#x4EA7;&#x751F;9~1&#x7684;&#x6570;&#x7EC4;
arr8 = np.arange(9, 0, -1)
print(arr8)  # [9 8 7 6 5 4 3 2 1]

4.&#x521B;&#x5EFA;&#x968F;&#x673A;&#x6570;&#x7684;&#x6570;&#x7EC4;&#xFF08;1-9&#x8303;&#x56F4;&#x5185;&#xFF0C;&#x6570;&#x7EC4;&#x4E2D;&#x968F;&#x673A;10&#x4E2A;&#x6570;&#x5B57;&#xFF09;
print("4***********")
lst9 = [random.randint(1, 10) for i in range(1, 11)]
print(lst9)  # [5, 5, 8, 8, 7, 8, 3, 3, 6, 10]
np.random.randint(low,high,size,dtype)
arr9 = np.random.randint(1, 10, size=10)
print(arr9) # [5 7 9 4 4 4 4 4 8 3]

5.  &#x4E00;&#x7EF4;&#x6570;&#x7EC4;&#x5143;&#x7D20;&#x83B7;&#x53D6;&#x3010;index&#x3011; &#x53CA;&#x67E5;&#x770B;&#x5143;&#x7D20;&#x7C7B;&#x578B;dtype
(1)nunpy &#x7684;int&#x7C7B;&#x578B;  &#x9ED8;&#x8BA4;&#x662F;int32 int8~int128
arr9 = np.array([1, 2, 3, 4, 5.4, 6], dtype=np.int8)
print(arr9[0].dtype)  # int8

(2) numpy float&#x9ED8;&#x8BA4;  float64 16~256
&#x53D1;&#x73B0;&#x6709;&#x4E00;&#x4E2A;&#x4E3A;&#x6D6E;&#x70B9;&#x7C7B;&#x578B; 5.4 &#x5176;&#x4ED6;&#x5168;&#x90E8;&#x8F6C;&#x4E86;
arr10 = np.array([1, 2, 3, 4, 5.4, 6])
print(arr10)  # [1.  2.  3.  4.  5.4 6. ]
print(arr10[0].dtype)  # float64

(3)&#x5B57;&#x7B26;&#x4E32;&#x7C7B;&#x578B;
arr11 = np.array([1, 2, 3, 4.16, 'lihua'])
print(arr11) # ['1' '2' '3' '4.16' 'lihua']
print(arr11[2].dtype)  # <u1 1 2 3 4 5 6 7 8 9 10 11 12 大小为6 unicode编码 print(arr11[4].dtype) # <u5 大小为5 6.查看维度ndim，元素个数size，数组形状shape\ arr="np.arange(1," 10) print(arr) [1 9] 查看数组维度 print(arr.ndim) (9,) 查看数组中元素的个数 print(arr.size) 查看数组的星形状 行列 维度 print(arr.shape) 7. numpy创建一个连续数字的二维数组（1~12） print('numpy创建一个连续数字的二维数组（1~12）') 多维数组,每个维度下个数要一致(不然是一维数组) 二维数组 lst1="[[1," 2, 3], (4, 5, 6), [7, 8, 9]] (3, 3) 一维 有3个元素 每个二维里面有3个元素 print(lst1) [[1, 一维数组 lst2="[[1," [4, 6], 9, 10]] print(np.array(lst2).shape) (3,) print(lst2) (1) a.一维升维度后变成二维 reshape(x,y,z) 13) [ 12] (12,) arr2d="arr.reshape(3," 4) print(arr2d.shape) print(arr2d) [[ 4] 8] 12]] b.-1 表示自动匹配维度 print("b.-1 表示自动匹配维度") arr2="arr.reshape(-1," 6) print(arr2) 6] arr3="arr.reshape(3," -1) print(arr3) c.reshape(3,4) 参数相乘必须等于个数 valueerror: cannot reshape array of size into shape (2,4) (2)创建时指定维度(随机数数组) 直接再size中指定二维 10, 3)) [[1 [7 [6 5]] 8. 产生一个1-9随机数的9个元素的二维数组(要求3*3) print(arr.reshape(3, [[5 1] [5 8]] 三维数组 13, [[[ [10 11]] 9]]] #9.使用numpy创建矩阵 print("9******") #方法1： array()函数生成矩阵时数据只能为列表形式 2], [3, 4]]) 2] [3 4]] #方法2 ： mat()函数生成矩阵时(字符串或列表形式) brr="np.mat([[1," print(brr) crr="np.mat("1" 2;3 4") print(crr) 10. 创建特殊矩阵 numpy.ones(shape, dtype) 产生全1矩阵 numpy.zeros(shape, 产生全0矩阵 numpy.eye(shape, 产生下对角线1矩阵 5), dtype="np.int8)" 1]] 2)) [[0. 0.] [0. 0.]] [[1. 0. 1. 1.]] 11. numpy 常用替换函数 np.where(condition, x, y)：满足条件(condition)，输出x，不满足输出y。 np.where(condition)：满足条件返回给定数组索引 12. 课堂练习 练习1：产生1-9之间的所有数字乱序的3*3二维数组 print("12*****") arr9="np.random.randint(1," print(arr9) 3] 5] 练习2：模拟抛掷500次硬币，正面表示1，反面表示0 [0,1,0,1...] 练习3：模拟抛掷500次硬币，正面表示1，反面表示-1 0全部变为-1,是1数据就不变 练习4：绘制随机漫步曲线（cumsum 数组累加求和） #[1,-1,-1,-1,1,-1.....] 对抛硬币的结果进行累加求和 rleft [0,-1,-2...] 1-1 0-1 -1-1 ... import matplotlib.pyplot as plt 画图 plt.plot(arr) 设置图片的标题,x轴,y轴的标签 plt.title("my title") plt.xlabel("my xlabel") plt.ylabel("my ylabel") 在终端显示图片 plt.show() < code></u1>

1.1 ndarry对象

import numpy as np

python&#x4E2D;&#x6CA1;&#x6709;&#x539F;&#x751F;&#x6570;&#x7EC4;&#x6570;&#x636E;&#x7C7B;&#x578B;,ndarray&#x5C31;&#x662F;python&#x7684;&#x5F15;&#x7528;&#x6570;&#x636E;&#x7C7B;&#x578B;:&#x6570;&#x7EC4;
a = np.array([1, 2, 3])
print(a)  # [1 2 3]

&#x591A;&#x4E8E;&#x4E00;&#x4E2A;&#x7EF4;&#x5EA6;

a = np.array([[1, 2], [3, 4]])
print(a)

[[1 2]
[3 4]]

&#x6700;&#x5C0F;&#x7EF4;&#x5EA6;

a = np.array([1, 2, 3, 4, 5], ndmin=2)
print(a)  # [[1 2 3 4 5]]

dtype &#x53C2;&#x6570;

a = np.array([1, 2, 3], dtype=complex)
print(a)  # [1.+0.j 2.+0.j 3.+0.j]

1.2 numpy数据类型

import numpy as np

&#x4F7F;&#x7528;&#x6807;&#x91CF;&#x7C7B;&#x578B;
dt = np.dtype(np.int32)
print(dt)  # int32

int8, int16, int32, int64 &#x56DB;&#x79CD;&#x6570;&#x636E;&#x7C7B;&#x578B;&#x53EF;&#x4EE5;&#x4F7F;&#x7528;&#x5B57;&#x7B26;&#x4E32; 'i1', 'i2','i4','i8' &#x4EE3;&#x66FF;
dt = np.dtype('i4')
print(dt)  # int32

&#x5B57;&#x8282;&#x987A;&#x5E8F;&#x6807;&#x6CE8;
dt = np.dtype('<i4') 20 print(dt) # int32 首先创建结构化数据类型 dt="np.dtype([('age'," np.int8)]) [('age', 'i1')] age的数据类型是int8 将数据类型应用于 ndarray 对象 a="np.array([(10,)," (20,), (30,)], dtype="dt)" print(a) [(10,) (20,) (30,)] #数据类型是int8,代表int中的数据长度 类型字段名可以用于存取实际的 age 列 print(a['age']) [10 30] student="np.dtype([('name'," 's20'), ('age', 'i1'), ('marks', 'f4')]) print(student) [('name', '<f4')] 列表 [] 里面是元组 () 每一个库就相当于一个框架 列表中的元组不同的代表不同元素name,age,marks的数据类型 21, 50), ('xyz', 18, 75)], [(b'abc', 50.) (b'xyz', 75.)] < code></i4')>

1.3 numpy数组属性

import numpy as np

 ndarray.ndim &#x7528;&#x4E8E;&#x8FD4;&#x56DE;&#x6570;&#x7EC4;&#x7684;&#x7EF4;&#x6570;&#xFF0C;&#x7B49;&#x4E8E;&#x79E9;
a = np.arange(24)
print(a.ndim)  # a &#x73B0;&#x53EA;&#x6709;&#x4E00;&#x4E2A;&#x7EF4;&#x5EA6;   1
&#x73B0;&#x5728;&#x8C03;&#x6574;&#x5176;&#x5927;&#x5C0F;
b = a.reshape(2, 4, 3)  # b &#x73B0;&#x5728;&#x62E5;&#x6709;&#x4E09;&#x4E2A;&#x7EF4;&#x5EA6;
print(b.ndim)  # 3

darray.shape &#x8868;&#x793A;&#x6570;&#x7EC4;&#x7684;&#x7EF4;&#x5EA6;&#xFF0C;&#x8FD4;&#x56DE;&#x4E00;&#x4E2A;&#x5143;&#x7EC4;&#xFF0C;&#x8FD9;&#x4E2A;&#x5143;&#x7EC4;&#x7684;&#x957F;&#x5EA6;&#x5C31;&#x662F;&#x7EF4;&#x5EA6;&#x7684;&#x6570;&#x76EE;&#xFF0C;&#x5373; ndim &#x5C5E;&#x6027;(&#x79E9;)&#x3002;&#x6BD4;&#x5982;&#xFF0C;&#x4E00;&#x4E2A;&#x4E8C;&#x7EF4;&#x6570;&#x7EC4;&#xFF0C;&#x5176;&#x7EF4;&#x5EA6;&#x8868;&#x793A;"&#x884C;&#x6570;"&#x548C;"&#x5217;&#x6570;"&#x3002;
a = np.array([[1, 2, 3], [4, 5, 6]])
print(a.shape)  # (2, 3) 2&#x884C;3&#x5217;&#x7684;&#x6570;&#x7EC4;

 &#x5C06;&#x6570;&#x7EC4;&#x8C03;&#x6574;&#x6210;&#x4E3A;3&#x884C;2&#x5217;&#x7684;&#x6570;&#x7EC4;
a = np.array([[1, 2, 3], [4, 5, 6]])
a.shape = (3, 2)
print(a)

[1,2,3]
[4,5,6]

[1 2]
[3 4]
[5 6]]

 NumPy &#x4E5F;&#x63D0;&#x4F9B;&#x4E86; reshape(&#x6539;&#x9020;) &#x51FD;&#x6570;&#x6765;&#x8C03;&#x6574;&#x6570;&#x7EC4;&#x5927;&#x5C0F;&#x3002;
a = np.array([[1, 2, 3], [4, 5, 6]])
b = a.reshape(3, 2)
print(b)

[[1 2]
[3 4]
[5 6]]

 ndarray.itemsize &#x4EE5;&#x5B57;&#x8282;&#x7684;&#x5F62;&#x5F0F;&#x8FD4;&#x56DE;&#x6570;&#x7EC4;&#x4E2D;&#x6BCF;&#x4E00;&#x4E2A;&#x5143;&#x7D20;&#x7684;&#x5927;&#x5C0F;,&#x5355;&#x4F4D;&#x5B57;&#x8282;&#x3002;
&#x6570;&#x7EC4;&#x7684; dtype &#x4E3A; int8&#xFF08;&#x4E00;&#x4E2A;&#x5B57;&#x8282;&#x7B49;&#x4E8E;8&#x4F4D;&#xFF09;
x = np.array([1, 2, 3, 4, 5], dtype=np.int8)
print(x.itemsize)  # 1

&#x6570;&#x7EC4;&#x7684; dtype &#x73B0;&#x5728;&#x4E3A; float64&#xFF08;&#x516B;&#x4E2A;&#x5B57;&#x8282;&#x7B49;&#x4E8E;64&#x4F4D;&#xFF09;
y = np.array([1, 2, 3, 4, 5], dtype=np.float64)
print(y.itemsize)  # 8

 ndarray.flags &#x8FD4;&#x56DE; ndarray &#x5BF9;&#x8C61;&#x7684;&#x5185;&#x5B58;&#x4FE1;&#x606F;
x = np.array([1, 2, 3, 4, 5])
print(x.flags)
C_CONTIGUOUS: True
F_CONTIGUOUS: True
OWNDATA: True
WRITEABLE: True
ALIGNED: True
WRITEBACKIFCOPY: False
UPDATEIFCOPY: False

1.4 numpy创建数组

import numpy as np

 (1)numpy.empty &#x65B9;&#x6CD5;&#x7528;&#x6765;&#x521B;&#x5EFA;&#x4E00;&#x4E2A;&#x6307;&#x5B9A;&#x5F62;&#x72B6;&#xFF08;shape&#xFF09;&#x3001;&#x6570;&#x636E;&#x7C7B;&#x578B;&#xFF08;dtype&#xFF09;&#x4E14;&#x672A;&#x521D;&#x59CB;&#x5316;&#x7684;&#x6570;&#x7EC4;&#xFF1A;
x = np.empty([3, 2], dtype=int)
print(x)  # &#x751F;&#x6210;&#x7684;3&#x884C;2&#x5217;&#x7684;&#x6570;&#x7EC4;&#x7684;&#x503C;&#x662F;&#x968F;&#x673A;&#x7684;
 &#x6CE8;&#x610F; &#x2212; &#x6570;&#x7EC4;&#x5143;&#x7D20;&#x4E3A;&#x968F;&#x673A;&#x503C;&#xFF0C;&#x56E0;&#x4E3A;&#x5B83;&#x4EEC;&#x672A;&#x521D;&#x59CB;&#x5316;
[[-1212020656       32764]
[-1212016064       32764]
[    6881397         100]]

(2)&#x521B;&#x5EFA;&#x6307;&#x5B9A;&#x5927;&#x5C0F;&#x7684;&#x6570;&#x7EC4;&#xFF0C;&#x6570;&#x7EC4;&#x5143;&#x7D20;&#x4EE5; 0 &#x6765;&#x586B;&#x5145;&#xFF1A;
&#x9ED8;&#x8BA4;&#x4E3A;&#x6D6E;&#x70B9;&#x6570;
x = np.zeros(5)
print(x)  # [0. 0. 0. 0. 0.]

&#x8BBE;&#x7F6E;&#x7C7B;&#x578B;&#x4E3A;&#x6574;&#x6570;
y = np.zeros((5,), dtype=np.int)
print(y)  # [0 0 0 0 0]

&#x81EA;&#x5B9A;&#x4E49;&#x7C7B;&#x578B; x ,y &#x7684;&#x6570;&#x636E;&#x7C7B;&#x578B;&#x662F;int32
z = np.zeros((2, 2), dtype=[('x', 'i4'), ('y', 'i4')])
print(z)

[[(0, 0) (0, 0)]
[(0, 0) (0, 0)]]

(3)&#x521B;&#x5EFA;&#x6307;&#x5B9A;&#x5F62;&#x72B6;&#x7684;&#x6570;&#x7EC4;&#xFF0C;&#x6570;&#x7EC4;&#x5143;&#x7D20;&#x4EE5; 1 &#x6765;&#x586B;&#x5145;&#xFF1A;
&#x9ED8;&#x8BA4;&#x4E3A;&#x6D6E;&#x70B9;&#x6570;
x = np.ones(5)
print(x)

[1. 1. 1. 1. 1.]

&#x81EA;&#x5B9A;&#x4E49;&#x7C7B;&#x578B; 2&#x884C;2&#x5217;&#x7684;&#x6570;&#x7EC4; &#x521D;&#x59CB;&#x503C;&#x4E3A;1
x = np.ones([2, 2], dtype=int)
print(x)

[[1 1]
 [1 1]]

x = np.ones([3, 2], dtype=int)  # 3&#x884C;&#x4E24;&#x5217;&#x7684;&#x6570;&#x7EC4;,&#x521D;&#x59CB;&#x503C;&#x4E3A;1
print(x)

[[1 1]
[1 1]
[1 1]]

1.5 numpy从已有的数据中创建数组

import numpy as np

 &#x5C06;&#x5217;&#x8868;&#x8F6C;&#x6362;&#x4E3A; ndarray:
x = [1, 2, 3]
a = np.asarray(x)
print(a)  # [1 2 3]

 &#x5C06;&#x5143;&#x7EC4;&#x8F6C;&#x6362;&#x4E3A; ndarray:
x = (1, 2, 3)
a = np.asarray(x)
print(a)  # [1 2 3]

 &#x5C06;&#x5143;&#x7EC4;&#x5217;&#x8868;&#x8F6C;&#x6362;&#x4E3A; ndarray:
x = [(1, 2, 3), (4, 5)]
a = np.asarray(x)
print(a)  # [(1, 2, 3) (4, 5)]

 &#x8BBE;&#x7F6E;&#x4E86; dtype &#x53C2;&#x6570;&#xFF1A;
x = [1, 2, 3]
a = np.asarray(x, dtype=float)
print(a)  # [1. 2. 3.]

 numpy.frombuffer &#x7528;&#x4E8E;&#x5B9E;&#x73B0;&#x52A8;&#x6001;&#x6570;&#x7EC4;&#x3002;
python 3
s = b'Hello World'
a = np.frombuffer(s, dtype='S1')
print(a)  # [b'H' b'e' b'l' b'l' b'o' b' ' b'W' b'o' b'r' b'l' b'd']

 &#x6CE8;&#x610F;&#xFF1A;buffer &#x662F;&#x5B57;&#x7B26;&#x4E32;&#x7684;&#x65F6;&#x5019;&#xFF0C;Python3 &#x9ED8;&#x8BA4; str &#x662F; Unicode &#x7C7B;&#x578B;&#xFF0C;&#x6240;&#x4EE5;&#x8981;&#x8F6C;&#x6210; bytestring &#x5728;&#x539F; str &#x524D;&#x52A0;&#x4E0A; b&#x3002;

python 2
s = 'Hello World'
a = np.frombuffer(s, dtype='S1')
print(a)

 numpy.fromiter   &#x65B9;&#x6CD5;&#x4ECE;&#x53EF;&#x8FED;&#x4EE3;&#x5BF9;&#x8C61;&#x4E2D;&#x5EFA;&#x7ACB; ndarray &#x5BF9;&#x8C61;&#xFF0C;&#x8FD4;&#x56DE;&#x4E00;&#x7EF4;&#x6570;&#x7EC4;&#x3002;
&#x4F7F;&#x7528; range &#x51FD;&#x6570;&#x521B;&#x5EFA;&#x5217;&#x8868;&#x5BF9;&#x8C61;
list = range(5)
it = iter(list)
print(it)  # <range_iterator object at 0x000001e45ad94f50>
&#x4F7F;&#x7528;&#x8FED;&#x4EE3;&#x5668;&#x521B;&#x5EFA; ndarray
x = np.fromiter(it, dtype=float)
print(x)  # [0. 1. 2. 3. 4.]
</range_iterator>

1.6 NumPy从数值范围创建数组

import numpy as np

1.numpy.arange  numpy &#x5305;&#x4E2D;&#x7684;&#x4F7F;&#x7528; arange &#x51FD;&#x6570;&#x521B;&#x5EFA;&#x6570;&#x503C;&#x8303;&#x56F4;&#x5E76;&#x8FD4;&#x56DE; ndarray &#x5BF9;&#x8C61;

 &#x751F;&#x6210; 0 &#x5230; 5 &#x7684;&#x6570;&#x7EC4;:
x = np.arange(5)
print(x)  # [0 1 2 3 4]

 &#x8BBE;&#x7F6E;&#x8FD4;&#x56DE;&#x7C7B;&#x578B;&#x4F4D; float:
&#x8BBE;&#x7F6E;&#x4E86; dtype
x = np.arange(5, dtype=float)
print(x)  # [0. 1. 2. 3. 4.]

 &#x8BBE;&#x7F6E;&#x4E86;&#x8D77;&#x59CB;&#x503C;&#x3001;&#x7EC8;&#x6B62;&#x503C;&#x53CA;&#x6B65;&#x957F;&#xFF1A;
x = np.arange(10, 20, 2)
print(x)  # [10 12 14 16 18]

2.numpy.linspace  numpy.linspace &#x51FD;&#x6570;&#x7528;&#x4E8E;&#x521B;&#x5EFA;&#x4E00;&#x4E2A;&#x4E00;&#x7EF4;&#x6570;&#x7EC4;&#xFF0C;&#x6570;&#x7EC4;&#x662F;&#x4E00;&#x4E2A;&#x7B49;&#x5DEE;&#x6570;&#x5217;&#x6784;&#x6210;&#x7684;

&#x4EE5;&#x4E0B;&#x5B9E;&#x4F8B;&#x7528;&#x5230;&#x4E09;&#x4E2A;&#x53C2;&#x6570;&#xFF0C;&#x8BBE;&#x7F6E;&#x8D77;&#x59CB;&#x70B9;&#x4E3A; 1 &#xFF0C;&#x7EC8;&#x6B62;&#x70B9;&#x4E3A; 10&#xFF0C;&#x6570;&#x5217;&#x4E2A;&#x6570;&#x4E3A; 10&#x3002;
a = np.linspace(1, 10, 10)
print(a)  # [ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]

&#x8BBE;&#x7F6E;&#x5143;&#x7D20;&#x5168;&#x90E8;&#x662F;1&#x7684;&#x7B49;&#x5DEE;&#x6570;&#x5217;&#xFF1A;
a = np.linspace(1, 1, 10)
print(a)  # [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]

&#x5C06; endpoint(&#x7AEF;&#x70B9;) &#x8BBE;&#x4E3A; false&#xFF0C;&#x4E0D;&#x5305;&#x542B;&#x7EC8;&#x6B62;&#x503C;&#xFF1A;

a = np.linspace(10, 20, 5, endpoint=False)
print(a)  # [10. 12. 14. 16. 18.]

&#x5982;&#x679C;&#x5C06; endpoint(&#x7AEF;&#x70B9;) &#x8BBE;&#x4E3A; true&#xFF0C;&#x5219;&#x4F1A;&#x5305;&#x542B; 20&#x3002;

&#x4EE5;&#x4E0B;&#x5B9E;&#x4F8B;&#x8BBE;&#x7F6E;&#x95F4;&#x8DDD;&#x3002; retstep =True &#x5C31;&#x662F;&#x5C06;&#x7B49;&#x5DEE;&#x6570;&#x5217;&#x7684;&#x65B9;&#x5DEE;&#x663E;&#x793A;&#x51FA;&#x6765;,&#x9ED8;&#x8BA4;&#x662F;&#x4E0D;&#x663E;&#x793A;.

a = np.linspace(1, 10, 10, retstep=True)
print(a)  # (array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]), 1.0)
a = np.linspace(10, 20, 5, retstep=True)  # (array([10. , 12.5, 15. , 17.5, 20. ]), 2.5)   (10&#x5230;20&#x4E4B;&#x95F4;&#x8981;&#x6709;5&#x4E2A;&#x6570;,&#x7B49;&#x5DEE;&#x662F;2.5 )

print(a)
&#x62D3;&#x5C55;&#x4F8B;&#x5B50;  1&#x884C;10&#x5217;,&#x91CD;&#x65B0;&#x53D8;&#x6210;10&#x884C;1&#x5217;
b = np.linspace(1, 10, 10).reshape([10, 1])
print(b)

'''
[ 1.]
 [ 2.]
 [ 3.]
 [ 4.]
 [ 5.]
 [ 6.]
 [ 7.]
 [ 8.]
 [ 9.]
 [10.]]
'''
 3.numpy.logspace  numpy.logspace &#x51FD;&#x6570;&#x7528;&#x4E8E;&#x521B;&#x5EFA;&#x4E00;&#x4E2A;&#x4E8E;&#x7B49;&#x6BD4;&#x6570;&#x5217;

&#x9ED8;&#x8BA4;&#x5E95;&#x6570;&#x662F; 10  base&#x5C31;&#x662F;&#x7B49;&#x6BD4;&#x6570;&#x5217;&#x7684;q
a = np.logspace(1.0, 2.0, num=10)
print(a)
10^1(10. )  ......      10^2(100.)
[ 10.          12.91549665  16.68100537  21.5443469   27.82559402
35.93813664  46.41588834  59.94842503  77.42636827 100.        ]

&#x5C06;&#x5BF9;&#x6570;&#x7684;&#x5E95;&#x6570;&#x8BBE;&#x7F6E;&#x4E3A; 2 :
a = a = np.logspace(0, 9, 10, base=2)
print(a)

0 1 2 3 4 5 6 7 8 9
  2^0  2^1  2^2  2^3  2^4  2^5  2^6  2^7  2^8  2^9
 [  1.   2.   4.   8.  16.  32.  64. 128. 256. 512.]

1.7 numpy的切片和索引

import numpy as np

a = np.arange(10)
s = slice(2, 7, 2)  # &#x4ECE;&#x7D22;&#x5F15; 2 &#x5F00;&#x59CB;&#x5230;&#x7D22;&#x5F15; 7 &#x505C;&#x6B62;&#xFF0C;&#x95F4;&#x9694;&#x4E3A;2
print(a[s])  # [2 4 6]

 &#x6211;&#x4EEC;&#x4E5F;&#x53EF;&#x4EE5;&#x901A;&#x8FC7;&#x5192;&#x53F7;&#x5206;&#x9694;&#x5207;&#x7247;&#x53C2;&#x6570; start:stop:step &#x6765;&#x8FDB;&#x884C;&#x5207;&#x7247;&#x64CD;&#x4F5C;&#xFF1A;
a = np.arange(10)
b = a[2:7:2]  # &#x4ECE;&#x7D22;&#x5F15; 2 &#x5F00;&#x59CB;&#x5230;&#x7D22;&#x5F15; 7 &#x505C;&#x6B62;&#xFF0C;&#x95F4;&#x9694;&#x4E3A; 2
print(b)  # [2 4 6]

a = np.arange(10)  # [0 1 2 3 4 5 6 7 8 9]
b = a[5]
print(b)  # 5

a = np.arange(10)
print(a[2:])  # [2 3 4 5 6 7 8 9]

a = np.arange(10)  # [0 1 2 3 4 5 6 7 8 9]
print(a[2:5])  # [2 3 4]

a = np.array([[1, 2, 3], [3, 4, 5], [4, 5, 6]])
print(a)
java&#x4E2D;&#x7684;&#x4E8C;&#x7EF4;&#x6570;&#x7EC4;,java&#x4E2D;&#x7684;&#x6570;&#x7EC4;&#x662F;&#x7528;(),&#x5728;python&#x4E2D;&#x7684;&#x6570;&#x7EC4;&#x662F;&#x7528; [ ]
 [[1 2 3]
 [3 4 5]
 [4 5 6]]

&#x4ECE;&#x67D0;&#x4E2A;&#x7D22;&#x5F15;&#x5904;&#x5F00;&#x59CB;&#x5207;&#x5272;
print('&#x4ECE;&#x6570;&#x7EC4;&#x7D22;&#x5F15; a[1:] &#x5904;&#x5F00;&#x59CB;&#x5207;&#x5272;')
print(a[1:])
[[3 4 5]
[4 5 6]]

a = np.array([[1, 2, 3], [3, 4, 5], [4, 5, 6]])
print(a)
[[1 2 3]
[3 4 5]
[4 5 6]]
&#x4E0B;&#x6807;&#x8FD8;&#x662F;&#x4ECE;0&#x5F00;&#x59CB;&#x7684;,1&#x662F;&#x7B2C;2&#x4E2A;&#x5143;&#x7D20;&#x4E86; [ &#x884C;&#x53C2;&#x6570;,&#x5217;&#x53C2;&#x6570; ]
print(a[..., 1])  # &#x7B2C;2&#x5217;&#x5143;&#x7D20;    [2 4 5]
print(a[1, ...])  # &#x7B2C;2&#x884C;&#x5143;&#x7D20;    [3 4 5]
print(a[..., 1:])  # &#x7B2C;2&#x5217;&#x53CA;&#x5269;&#x4E0B;&#x7684;&#x6240;&#x6709;&#x5143;&#x7D20;
[[2 3]
[4 5]
[5 6]]

1.8 numpy高级索引

1.&#x6574;&#x6570;&#x6570;&#x7EC4;&#x7D22;&#x5F15;
&#x4EE5;&#x4E0B;&#x5B9E;&#x4F8B;&#x83B7;&#x53D6;&#x6570;&#x7EC4;&#x4E2D;(0,0)&#xFF0C;(1,1)&#x548C;(2,0)&#x4F4D;&#x7F6E;&#x5904;&#x7684;&#x5143;&#x7D20;&#x3002;

import numpy as np

x = np.array([[1, 2], [3, 4], [5, 6]])
y = x[[0, 1, 2], [0, 1, 0]]
print(y)  # [1 4 5]

&#x4EE5;&#x4E0B;&#x5B9E;&#x4F8B;&#x83B7;&#x53D6;&#x4E86; 4X3 &#x6570;&#x7EC4;&#x4E2D;&#x7684;&#x56DB;&#x4E2A;&#x89D2;&#x7684;&#x5143;&#x7D20;&#x3002; &#x884C;&#x7D22;&#x5F15;&#x662F; [0,0] &#x548C; [3,3]&#xFF0C;&#x800C;&#x5217;&#x7D22;&#x5F15;&#x662F; [0,2] &#x548C; [0,2]&#x3002;
x = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]])
print('&#x6211;&#x4EEC;&#x7684;&#x6570;&#x7EC4;&#x662F;&#xFF1A;')
print(x)
print('\n')
'''
&#x6211;&#x4EEC;&#x7684;&#x6570;&#x7EC4;&#x662F;&#xFF1A;
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
'''
rows = np.array([[0, 0], [3, 3]])
cols = np.array([[0, 2], [0, 2]])
y = x[rows, cols]

[[0, 0], [3, 3]],[[0, 2], [0, 2]]

 [ [0,0] [0,2]] [[3,0],[3,2]]

   0     2        9    11
print('&#x8FD9;&#x4E2A;&#x6570;&#x7EC4;&#x7684;&#x56DB;&#x4E2A;&#x89D2;&#x5143;&#x7D20;&#x662F;&#xFF1A;')
print(y)
[[ 0  2]
[ 9 11]]

&#x53EF;&#x4EE5;&#x501F;&#x52A9;&#x5207;&#x7247; : &#x6216; &#x2026; &#x4E0E;&#x7D22;&#x5F15;&#x6570;&#x7EC4;&#x7EC4;&#x5408;&#x3002;&#x5982;&#x4E0B;&#x9762;&#x4F8B;&#x5B50;&#xFF1A;
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = a[1:3, 1:3]
c = a[1:3, [1, 2]]
d = a[..., 1:]
print(b)
print(c)
print(d)
[1,2],[1,2]  -> [1,1] [1,2] [2,1] [2,2]
[[5 6]
[8 9]]

[[5 6]
[8 9]]

 &#x8F93;&#x51FA;2,3 &#x5217;
[[2 3]
[5 6]
[8 9]]

 2.&#x5E03;&#x5C14;&#x7D22;&#x5F15;
&#x4EE5;&#x4E0B;&#x5B9E;&#x4F8B;&#x83B7;&#x53D6;&#x5927;&#x4E8E; 5 &#x7684;&#x5143;&#x7D20;&#xFF1A;
x = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]])
print('&#x6211;&#x4EEC;&#x7684;&#x6570;&#x7EC4;&#x662F;&#xFF1A;')
print(x)
print('\n')
'''
&#x6211;&#x4EEC;&#x7684;&#x6570;&#x7EC4;&#x662F;&#xFF1A;
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
'''

&#x73B0;&#x5728;&#x6211;&#x4EEC;&#x4F1A;&#x6253;&#x5370;&#x51FA;&#x5927;&#x4E8E; 5 &#x7684;&#x5143;&#x7D20;   &#x4E8C;&#x7EF4;&#x6570;&#x7EC4;&#x53D8;&#x6210;&#x4E00;&#x7EF4;&#x5EA6;&#x6570;&#x7EC4;&#x4E86;
print('&#x5927;&#x4E8E; 5 &#x7684;&#x5143;&#x7D20;&#x662F;&#xFF1A;')
print(x[x > 5])

&#x5927;&#x4E8E; 5 &#x7684;&#x5143;&#x7D20;&#x662F;&#xFF1A;
[ 6  7  8  9 10 11]

&#x4EE5;&#x4E0B;&#x5B9E;&#x4F8B;&#x4F7F;&#x7528;&#x4E86; ~&#xFF08;&#x53D6;&#x8865;&#x8FD0;&#x7B97;&#x7B26;&#xFF09;&#x6765;&#x8FC7;&#x6EE4; NaN&#x3002;
a = np.array([np.nan, 1, 2, np.nan, 3, 4, 5])
print(a[~np.isnan(a)])  # [1. 2. 3. 4. 5.]

&#x4EE5;&#x4E0B;&#x5B9E;&#x4F8B;&#x6F14;&#x793A;&#x5982;&#x4F55;&#x4ECE;&#x6570;&#x7EC4;&#x4E2D;&#x8FC7;&#x6EE4;&#x6389;&#x975E;&#x590D;&#x6570;&#x5143;&#x7D20;&#x3002;
a = np.array([1, 2 + 6j, 5, 3.5 + 5j])
print(a[np.iscomplex(a)])  # [2. +6.j 3.5+5.j]

3.&#x82B1;&#x5F0F;&#x7D22;&#x5F15;

(1)&#x4F20;&#x5165;&#x987A;&#x5E8F;&#x7D22;&#x5F15;&#x6570;&#x7EC4;
x = np.arange(32).reshape((8, 4))

print(x)  # 0-31&#x7684;8&#x884C;4&#x5217;&#x6570;&#x7EC4;
'''
[[ 0  1  2  3]   -8
 [ 4  5  6  7]   -7
 [ 8  9 10 11]   -6
 [12 13 14 15]   -5
 [16 17 18 19]   -4
 [20 21 22 23]   -3
 [24 25 26 27]   -2
 [28 29 30 31]]  -1
'''
print(x[[4, 2, 1, 7]])  # &#x53D6;&#x4E0A;&#x9762;&#x76F8;&#x5E94;&#x7684;&#x884C;

[[16 17 18 19]
[ 8  9 10 11]
[ 4  5  6  7]
[28 29 30 31]]

(2)&#x4F20;&#x5165;&#x5012;&#x5E8F;&#x7D22;&#x5F15;&#x6570;&#x7EC4;
x = np.arange(32).reshape((8, 4))
print(x[[-4, -2, -1, -7]])
[[16 17 18 19]
[24 25 26 27]
[28 29 30 31]
[ 4  5  6  7]]

(3)&#x4F20;&#x5165;&#x591A;&#x4E2A;&#x7D22;&#x5F15;&#x6570;&#x7EC4;&#xFF08;&#x8981;&#x4F7F;&#x7528;np.ix_&#xFF09;
x = np.arange(32).reshape((8, 4))
print(x[np.ix_([1, 5, 7, 2], [0, 3, 1, 2])])
[1,0] 4
[1,3] 7
....

[2,2] 10

[[ 4  7  5  6]
[20 23 21 22]
[28 31 29 30]
[ 8 11  9 10]]

1.9 NumPY广播(Broadcast)

&#x5E7F;&#x64AD;&#x5C31;&#x662F;&#x5728;&#x8FDB;&#x884C;&#x6570;&#x7EC4;&#x7684; + - *  &#x9006;&#x7684;&#x65F6;&#x5019;,&#x53EF;&#x4EE5;&#x81EA;&#x52A8;&#x8865;&#x9F50;

 4x3 &#x7684;&#x4E8C;&#x7EF4;&#x6570;&#x7EC4;&#x4E0E;&#x957F;&#x4E3A; 3 &#x7684;&#x4E00;&#x7EF4;&#x6570;&#x7EC4;&#x76F8;&#x52A0;&#xFF0C;&#x7B49;&#x6548;&#x4E8E;&#x628A;&#x6570;&#x7EC4; b &#x5728;&#x4E8C;&#x7EF4;&#x4E0A;&#x91CD;&#x590D; 4 &#x6B21;&#x518D;&#x8FD0;&#x7B97;&#xFF1A;

import numpy as np

a = np.array([[0, 0, 0],
              [10, 10, 10],
              [20, 20, 20],
              [30, 30, 30]])
b = np.array([1, 2, 3])
bb = np.tile(b, (4, 1))

([1, 2, 3],(4,1))
print(a + bb)

[[ 1  2  3]
[11 12 13]
[21 22 23]
[31 32 33]]

a = np.array([[0, 0, 0],
              [10, 10, 10],
              [20, 20, 20],
              [30, 30, 30]])
b = np.array([1, 2, 3])
print(a + b)

[[ 1  2  3]
[11 12 13]
[21 22 23]
[31 32 33]]

1.10 numpy迭代数组

import numpy as np

a &#x548C; a.T &#x7684;&#x904D;&#x5386;&#x987A;&#x5E8F;&#x662F;&#x4E00;&#x6837;&#x7684;  &#x9ED8;&#x8BA4;&#x6309;&#x884C;&#x8BBF;&#x95EE;
a = np.arange(6).reshape(2, 3)
for x in np.nditer(a.T):
    print(x, end=", ")  # 0, 1, 2, 3, 4, 5,
print('\n')
a.T.copy(order='C') &#x8FD9;&#x6837;&#x8BBE;&#x7F6E;&#x5C31;&#x662F;&#x6309;&#x7167;&#x5217;&#x8BBF;&#x95EE;
for x in np.nditer(a.T.copy(order='C')):
    print(x, end=", ")  # 0, 3, 1, 4, 2, 5,
print('\n')

a.T &#x662F;a&#x7684;&#x8F6C;&#x7F6E;

1.11 numpy数组操作

numpy.rollaxis   numpy.swapaxes  &#x8FD9;&#x4E2A;&#x662F;&#x4E0D;&#x61C2; ?????

numpy.rollaxis numpy.rollaxis &#x51FD;&#x6570;&#x5411;&#x540E;&#x6EDA;&#x52A8;&#x7279;&#x5B9A;&#x7684;&#x8F74;&#x5230;&#x4E00;&#x4E2A;&#x7279;&#x5B9A;&#x4F4D;&#x7F6E;

import numpy as np

&#x521B;&#x5EFA;&#x4E86;&#x4E09;&#x7EF4;&#x7684; ndarray
a = np.arange(8).reshape(2, 2, 2)

print('&#x539F;&#x6570;&#x7EC4;&#xFF1A;')
print(a)
print('\n')
&#x5C06;&#x8F74; 2 &#x6EDA;&#x52A8;&#x5230;&#x8F74; 0&#xFF08;&#x5BBD;&#x5EA6;&#x5230;&#x6DF1;&#x5EA6;&#xFF09;

print('&#x8C03;&#x7528; rollaxis &#x51FD;&#x6570;&#xFF1A;')
print(np.rollaxis(a, 2))  # np.rollaxis(a, 2,0)) &#x9ED8;&#x8BA4;&#x662F;0
&#x5C06;&#x8F74; 0 &#x6EDA;&#x52A8;&#x5230;&#x8F74; 1&#xFF1A;&#xFF08;&#x5BBD;&#x5EA6;&#x5230;&#x9AD8;&#x5EA6;&#xFF09;
print('\n')

print('&#x8C03;&#x7528; rollaxis &#x51FD;&#x6570;&#xFF1A;')
print(np.rollaxis(a, 2, 1))
'''
&#x539F;&#x6570;&#x7EC4;&#xFF1A;
[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]

[[[0 1] [2 3]] [[4 5][6 7]]]

&#x8C03;&#x7528; rollaxis &#x51FD;&#x6570;&#xFF1A;
&#x4E0A;&#x9762;&#x7684;&#x6570;&#x7EC4;  &#x5148;&#x9009;&#x62E9; 0 &#x8DF3;2&#x4E2A;&#x4F4D;&#x7F6E; 2  &#x8DF3;2&#x4E2A;&#x4F4D;&#x7F6E; 4  &#x8DF3;2&#x4E2A;&#x4F4D;&#x7F6E; 6

[[[0 2]
  [4 6]]

 [[1 3]
  [5 7]]]

&#x8C03;&#x7528; rollaxis &#x51FD;&#x6570;&#xFF1A;

[[[0 2]
  [1 3]]

 [[4 6]
  [5 7]]]

”’

1.13 NumPy5种常见函数

&#x5B57;&#x7B26;&#x4E32;&#x51FD;&#x6570;  &#x6570;&#x5B66;&#x51FD;&#x6570;  &#x7B97;&#x672F;&#x51FD;&#x6570;  &#x7EDF;&#x8BA1;&#x51FD;&#x6570;  &#x6392;&#x5E8F;&#x6761;&#x4EF6;&#x7B5B;&#x9009;&#x51FD;&#x6570;

&#x7EDF;&#x8BA1;&#x51FD;&#x6570;
import numpy as np

a = np.array([[3, 7, 5], [8, 4, 3], [2, 4, 9]])
print('&#x6211;&#x4EEC;&#x7684;&#x6570;&#x7EC4;&#x662F;&#xFF1A;')
print(a)
print('\n')
print('&#x8C03;&#x7528; amin() &#x51FD;&#x6570;&#xFF1A;')
print(np.amin(a, 1))
print('\n')
print('&#x518D;&#x6B21;&#x8C03;&#x7528; amin() &#x51FD;&#x6570;&#xFF1A;')
print(np.amin(a, 0))
print('\n')
print('&#x8C03;&#x7528; amax() &#x51FD;&#x6570;&#xFF1A;')
print(np.amax(a))
print('\n')
print('&#x518D;&#x6B21;&#x8C03;&#x7528; amax() &#x51FD;&#x6570;&#xFF1A;')
print(np.amax(a, axis=0))

'''
&#x6211;&#x4EEC;&#x7684;&#x6570;&#x7EC4;&#x662F;&#xFF1A;
[[3 7 5]
 [8 4 3]
 [2 4 9]]

&#x8C03;&#x7528; amin() &#x51FD;&#x6570;&#xFF1A;  np.amin(a, 1) &#x6BCF;&#x884C;&#x7684;&#x6700;&#x5C0F;&#x503C;    &#x884C;&#x662F;&#x8F74;1
[3 3 2]

&#x518D;&#x6B21;&#x8C03;&#x7528; amin() &#x51FD;&#x6570;&#xFF1A; np.amin(a, 0) &#x6BCF;&#x5217;&#x7684;&#x6700;&#x5C0F;&#x503C;   &#x5217;&#x662F;&#x8F74;0
[2 4 3]

&#x8C03;&#x7528; amax() &#x51FD;&#x6570;&#xFF1A;  np.amax(a) &#x6240;&#x6709;&#x5143;&#x7D20;&#x4E2D;&#x7684;&#x6700;&#x5927;&#x503C;
9

&#x518D;&#x6B21;&#x8C03;&#x7528; amax() &#x51FD;&#x6570;&#xFF1A;   np.amax(a, axis=0) &#x8F74;&#x4E3A;0&#x7684;&#x6700;&#x5927;&#x503C;   &#x6BCF;&#x5217;&#x7684;&#x6700;&#x5927;&#x503C;
[8 7 9]
'''

numpy.percentile() &#x767E;&#x5206;&#x4F4D;&#x6570; &#x4E2D;&#x4F4D;&#x6570;

1.17 numpy线性代数

import numpy as np
from matplotlib import pyplot as plt

1.

x = np.arange(1, 11)
y = 2 * x + 5
plt.title("Matplotlib demo")
plt.xlabel("x axis caption")
plt.ylabel("y axis caption")
plt.plot(x, y)
plt.show()

 2.&#x56FE;&#x5F62;&#x4E2D;&#x6587;&#x663E;&#x793A;
import numpy as np
from matplotlib import pyplot as plt
import matplotlib

fname &#x4E3A; &#x4F60;&#x4E0B;&#x8F7D;&#x7684;&#x5B57;&#x4F53;&#x5E93;&#x8DEF;&#x5F84;&#xFF0C;&#x6CE8;&#x610F; SimHei.ttf &#x5B57;&#x4F53;&#x7684;&#x8DEF;&#x5F84;
zhfont1 = matplotlib.font_manager.FontProperties(fname="SimHei.ttf")

x = np.arange(1, 11)
y = 2 * x + 5
plt.title("&#x83DC;&#x9E1F;&#x6559;&#x7A0B; - &#x6D4B;&#x8BD5;", fontproperties=zhfont1)

fontproperties &#x8BBE;&#x7F6E;&#x4E2D;&#x6587;&#x663E;&#x793A;&#xFF0C;fontsize &#x8BBE;&#x7F6E;&#x5B57;&#x4F53;&#x5927;&#x5C0F;
plt.xlabel("x &#x8F74;", fontproperties=zhfont1)
plt.ylabel("y &#x8F74;", fontproperties=zhfont1)
plt.plot(x, y)
plt.show()

3.&#x8981;&#x663E;&#x793A;&#x5706;&#x6765;&#x4EE3;&#x8868;&#x70B9;&#xFF0C;&#x800C;&#x4E0D;&#x662F;&#x4E0A;&#x9762;&#x793A;&#x4F8B;&#x4E2D;&#x7684;&#x7EBF;&#xFF0C;&#x8BF7;&#x4F7F;&#x7528; ob &#x4F5C;&#x4E3A; plot() &#x51FD;&#x6570;&#x4E2D;&#x7684;&#x683C;&#x5F0F;&#x5B57;&#x7B26;&#x4E32;&#x3002;
import numpy as np
from matplotlib import pyplot as plt

x = np.arange(1, 11)
y = 2 * x + 5
plt.title("Matplotlib demo")
plt.xlabel("x axis caption")
plt.ylabel("y axis caption")
plt.plot(x, y, "ob")
plt.show()

4.&#x7ED8;&#x5236;&#x6B63;&#x5F26;&#x6CE2; &#x4EE5;&#x4E0B;&#x5B9E;&#x4F8B;&#x4F7F;&#x7528; matplotlib &#x751F;&#x6210;&#x6B63;&#x5F26;&#x6CE2;&#x56FE;

import numpy as np
import matplotlib.pyplot as plt

&#x8BA1;&#x7B97;&#x6B63;&#x5F26;&#x66F2;&#x7EBF;&#x4E0A;&#x70B9;&#x7684; x &#x548C; y &#x5750;&#x6807;
x = np.arange(0, 3 * np.pi, 0.1)
y = np.sin(x)
plt.title("sine wave form")
&#x4F7F;&#x7528; matplotlib &#x6765;&#x7ED8;&#x5236;&#x70B9;
plt.plot(x, y)
plt.show()

5.subplot()subplot(&#x6B21;&#x8981;&#x60C5;&#x8282;)() &#x51FD;&#x6570;&#x5141;&#x8BB8;&#x4F60;&#x5728;&#x540C;&#x4E00;&#x56FE;&#x4E2D;&#x7ED8;&#x5236;&#x4E0D;&#x540C;&#x7684;&#x4E1C;&#x897F;&#x3002;

import numpy as np
import matplotlib.pyplot as plt

&#x8BA1;&#x7B97;&#x6B63;&#x5F26;&#x548C;&#x4F59;&#x5F26;&#x66F2;&#x7EBF;&#x4E0A;&#x7684;&#x70B9;&#x7684; x &#x548C; y &#x5750;&#x6807;
x = np.arange(0, 3 * np.pi, 0.1)
y_sin = np.sin(x)
y_cos = np.cos(x)
&#x5EFA;&#x7ACB; subplot(&#x6B21;&#x8981;&#x60C5;&#x8282;) &#x7F51;&#x683C;&#xFF0C;&#x9AD8;&#x4E3A; 2&#xFF0C;&#x5BBD;&#x4E3A; 1
&#x6FC0;&#x6D3B;&#x7B2C;&#x4E00;&#x4E2A; subplot
plt.subplot(2, 1, 1)
&#x7ED8;&#x5236;&#x7B2C;&#x4E00;&#x4E2A;&#x56FE;&#x50CF;
plt.plot(x, y_sin)
plt.title('Sine')
&#x5C06;&#x7B2C;&#x4E8C;&#x4E2A; subplot(&#x6B21;&#x8981;&#x60C5;&#x8282;) &#x6FC0;&#x6D3B;&#xFF0C;&#x5E76;&#x7ED8;&#x5236;&#x7B2C;&#x4E8C;&#x4E2A;&#x56FE;&#x50CF;
plt.subplot(2, 1, 2)
plt.plot(x, y_cos)
plt.title('Cosine')
&#x5C55;&#x793A;&#x56FE;&#x50CF;
plt.show()

6.bar()
pyplot &#x5B50;&#x6A21;&#x5757;&#x63D0;&#x4F9B; bar() &#x51FD;&#x6570;&#x6765;&#x751F;&#x6210;&#x6761;&#x5F62;&#x56FE;&#x3002;
&#x4EE5;&#x4E0B;&#x5B9E;&#x4F8B;&#x751F;&#x6210;&#x4E24;&#x7EC4; x &#x548C; y &#x6570;&#x7EC4;&#x7684;&#x6761;&#x5F62;&#x56FE;&#x3002;

from matplotlib import pyplot as plt

x = [5, 8, 10]
y = [12, 16, 6]
x2 = [6, 9, 11]
y2 = [6, 15, 7]
plt.bar(x, y, align='center')
plt.bar(x2, y2, color='g', align='center')
plt.title('Bar graph')
plt.ylabel('Y axis')
plt.xlabel('X axis')
plt.show()

7.numpy.histogram()
import numpy as np

a = np.array([22, 87, 5, 43, 56, 73, 55, 54, 11, 20, 51, 5, 79, 31, 27])
np.histogram(a, bins=[0, 20, 40, 60, 80, 100])
hist, bins = np.histogram(a, bins=[0, 20, 40, 60, 80, 100])
print(hist)
print(bins)

8.plt()
Matplotlib &#x53EF;&#x4EE5;&#x5C06;&#x76F4;&#x65B9;&#x56FE;&#x7684;&#x6570;&#x5B57;&#x8868;&#x793A;&#x8F6C;&#x6362;&#x4E3A;&#x56FE;&#x5F62;&#x3002; pyplot &#x5B50;&#x6A21;&#x5757;&#x7684; plt() &#x51FD;&#x6570;&#x5C06;&#x5305;&#x542B;&#x6570;&#x636E;&#x548C; bin &#x6570;&#x7EC4;&#x7684;&#x6570;&#x7EC4;&#x4F5C;&#x4E3A;&#x53C2;&#x6570;&#xFF0C;&#x5E76;&#x8F6C;&#x6362;&#x4E3A;&#x76F4;&#x65B9;&#x56FE;&#x3002;
from matplotlib import pyplot as plt
import numpy as np

a = np.array([22, 87, 5, 43, 56, 73, 55, 54, 11, 20, 51, 5, 79, 31, 27])
plt.hist(a, bins=[0, 20, 40, 60, 80, 100])
plt.title("histogram")
plt.show()

1.18 numpy IO

import numpy as np

a = np.array([1, 2, 3, 4, 5])

&#x4FDD;&#x5B58;&#x5230; outfile.npy &#x6587;&#x4EF6;&#x4E0A;
np.save('outfile.npy', a)

&#x4FDD;&#x5B58;&#x5230; outfile2.npy &#x6587;&#x4EF6;&#x4E0A;&#xFF0C;&#x5982;&#x679C;&#x6587;&#x4EF6;&#x8DEF;&#x5F84;&#x672B;&#x5C3E;&#x6CA1;&#x6709;&#x6269;&#x5C55;&#x540D; .npy&#xFF0C;&#x8BE5;&#x6269;&#x5C55;&#x540D;&#x4F1A;&#x88AB;&#x81EA;&#x52A8;&#x52A0;&#x4E0A;
np.save('outfile2', a)

b = np.load('outfile.npy')
print(b)  # [1 2 3 4 5]

a = np.array([[1, 2, 3], [4, 5, 6]])
b = np.arange(0, 1.0, 0.1)
c = np.sin(b)
c &#x4F7F;&#x7528;&#x4E86;&#x5173;&#x952E;&#x5B57;&#x53C2;&#x6570; sin_array
np.savez("runoob.npz", a, b, sin_array=c)
r = np.load("runoob.npz")
print(r.files)  # &#x67E5;&#x770B;&#x5404;&#x4E2A;&#x6570;&#x7EC4;&#x540D;&#x79F0;  ['sin_array', 'arr_0', 'arr_1']
print(r["arr_0"])  # &#x6570;&#x7EC4; a [[1 2 3] [4 5 6] ]
print(r["arr_1"])  # &#x6570;&#x7EC4; b   [0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9]
print(r["sin_array"])  # &#x6570;&#x7EC4; c
[0.         0.09983342 0.19866933 0.29552021 0.38941834 0.47942554
0.56464247 0.64421769 0.71735609 0.78332691]

a = np.array([1, 2, 3, 4, 5])
np.savetxt('out.txt', a)
b = np.loadtxt('out.txt')

print(b)  # [1. 2. 3. 4. 5.]

a = np.arange(0, 10, 0.5).reshape(4, -1)
np.savetxt("out.txt", a, fmt="%d", delimiter=",")  # &#x6539;&#x4E3A;&#x4FDD;&#x5B58;&#x4E3A;&#x6574;&#x6570;&#xFF0C;&#x4EE5;&#x9017;&#x53F7;&#x5206;&#x9694;
b = np.loadtxt("out.txt", delimiter=",")  # load &#x65F6;&#x4E5F;&#x8981;&#x6307;&#x5B9A;&#x4E3A;&#x9017;&#x53F7;&#x5206;&#x9694;
print(b)  # [[0. 0. 1. 1. 2.]
[2. 3. 3. 4. 4.]
[5. 5. 6. 6. 7.]
[7. 8. 8. 9. 9.]]

import numpy as np
import random

1.&#x5207;&#x7247; &#x548C;&#x5E8F;&#x5217;&#x4E00;&#x81F4;
arr = np.arange(3, 15)
print(arr[2:4])
print(arr[-1])
print(arr[::2])

brr = arr
print(brr)
print(id(brr), id(arr))
brr = arr[:]  # &#x62F7;&#x8D1D; arr[:]&#x8FD4;&#x56DE;&#x4E00;&#x4E2A;&#x65B0;&#x7684;&#x6570;&#x7EC4;
print(brr)
print(id(brr), id(arr))

2.&#x9AD8;&#x7EA7;&#x5207;&#x7247; &#x5E03;&#x5C14;&#x7C7B;&#x578B;&#x5207;&#x7247;
print("*"*100)
print(arr > 6)  # arr > 6 &#x6570;&#x7EC4;&#x4E2D;&#x6BCF;&#x4E2A;&#x6210;&#x5458;&#x7684;&#x503C;&#x548C;&#x8868;&#x8FBE;&#x8FD0;&#x7B97; ,&#x7ED3;&#x679C;&#x751F;&#x6210;&#x4E00;&#x4E2A;&#x65B0;&#x7684;&#x6570;&#x7EC4;
print(arr[arr > 6])  # &#x4E3A;True&#x6210;&#x5458;&#x4FDD;&#x7559;  &#x4E3A;False&#x5220;&#x9664;
print(arr[arr != 9])
&#x5207;&#x7247; &#x5927;&#x4E8E;9 &#x5C0F;&#x4E8E;12   & | ~
print(arr[(arr > 9) & (arr < 12)] )

#3. Numpy&#x5411;&#x91CF;&#x8FD0;&#x7B97;&#xFF08;&#xFF09;
print("*"*100)
arr = np.array([1, 2, 3, 4, 5])
brr = np.array([3, 1, 4, 1, 5])
print(arr + brr)  # &#x4E24;&#x4E2A;&#x6570;&#x7EC4;&#x8FD0;&#x7B97;,&#x5BF9;&#x5E94;&#x7684;&#x6210;&#x5458;&#x8FDB;&#x884C;&#x8FD0;&#x7B97; &#x4E24;&#x4E2A;&#x6570;&#x7EC4;shape&#x76F8;&#x540C;&#x65F6;&#x5019;&#x624D;&#x53EF;&#x4EE5;&#x8FD0;&#x7B97;

4. Numpy&#x7EDF;&#x8BA1;&#x65B9;&#x6CD5;(&#x8F74;&#x65B9;&#x5411;axis : 0,1)&#xFF08;sum,max,min,argmax,mean,cumsum,comprod&#xFF09;
 &#x9488;&#x5BF9;&#x4E8C;&#x7EF4;&#x6570;&#x7EC4;
arr = np.arange(1, 10).reshape(3, 3)
print(arr)
print(arr.sum())  # &#x6CA1;&#x6709;&#x8BBE;&#x7F6E;&#x8F74;&#x65B9;&#x5411;,&#x628A;&#x6240;&#x6709;&#x7684;&#x6210;&#x5458;&#x8FDB;&#x884C;&#x7D2F;&#x52A0;
print(arr.sum(axis=0))  # &#x6C42;&#x6BCF;&#x4E00;&#x5217;&#x7684;&#x603B;&#x548C;   [12 15 18]
print(arr.sum(axis=1))  # &#x6C42;&#x6BCF;&#x4E00;&#x884C;&#x603B;&#x548C;   [6 15 24]
print(arr.max(axis=0))
print(arr.max(axis=1))
print(arr.mean(axis=0))
print(arr.argmax())  # &#x627E;&#x51FA;&#x6700;&#x5927;&#x503C;&#x6240;&#x5728;&#x4F4D;&#x7F6E;
print(arr.cumsum())  # &#x7D2F;&#x52A0;&#x6C42;&#x548C;
print(np.cumprod(arr, axis=1))
print("*"*100)

arr = np.arange(1, 13).reshape(2,2, 3)
print(arr)
print(arr.sum(axis=2))

5.Numpy&#x53BB;&#x91CD; np.unique(&#x8FED;&#x4EE3;&#x5BF9;&#x8C61;)
arr = np.array([3, 1, 4, 1, 5, 3, 5])
arr = np.array([[1, 2, 3, 4], [5, 5, 6, 6]])
print(arr)
print(np.unique(arr))  # [1 2 3 4 5 6]
arr = np.array([1.8, 2.6, 3.1])
print(np.ceil(arr))  # &#x5411;&#x4E0A;&#x53D6;&#x6574;
print(np.floor(arr))  # &#x5411;&#x4E0B;&#x53D6;&#x6574;
&#x56DB;&#x820D;&#x4E94;&#x5165;?

&#x7EC3;&#x4E60;1: &#x4EA7;&#x751F;1-9&#x968F;&#x673A;&#x6570;&#xFF0C;&#x8981;&#x6C42;3*3&#x4E8C;&#x7EF4;&#x6570;&#x7EC4;
arr = np.random.randint(1, 10, size=(3, 3))
print(arr)
&#x7EC3;&#x4E60;2: &#x7EDF;&#x8BA1;&#x6BCF;&#x5217;&#x4E2D;&#x5927;&#x4E8E;6&#x7684;&#x5143;&#x7D20;&#x4E2A;&#x6570;
print(arr>6)
brr = arr > 6

&#x7EC3;&#x4E60;3&#xFF1A;&#x7EDF;&#x8BA1;&#x4E00;&#x884C;&#x4E2D;&#x4E09;&#x4E2A;&#x503C;&#x90FD;&#x662F;&#x5927;&#x4E8E;3&#x7684;&#x884C;&#x7684;&#x4E2A;&#x6570;
print("*"*100)
print(arr)
crr = arr>3
drr = crr.sum(axis=1)
err = drr == 3
print(err.sum())
 all&#x8868;&#x793A;&#x6240;&#x6709;&#x5FC5;&#x987B;&#x90FD;&#x6EE1;&#x8DB3;&#x65F6;&#x5019;&#x624D;&#x8FD4;&#x56DE;True
brr = arr > 3
print(brr)
print(brr.all(axis=1))
print(brr.all(axis=1).sum())

&#x521B;&#x5EFA;&#x4E00;&#x4E2A;&#x7531;1~9&#x968F;&#x673A;&#x751F;&#x6210; 3*3&#x4E8C;&#x7EF4;&#x6570;&#x7EC4;,&#x6BCF;&#x4E2A;&#x6570;&#x7EC4;&#x53EA;&#x51FA;&#x73B0;&#x4E00;&#x6B21;
arr = np.arange(1, 10)
random.shuffle(arr)  # &#x628A;arr&#x6570;&#x7EC4;&#x968F;&#x673A;&#x6392;&#x5217;
print(arr)
print(arr.reshape(3, 3))

其他使用

import numpy as np
1.Numpy &#x666E;&#x901A;&#x5207;&#x7247;  &#x548C;&#x5E8F;&#x5217;&#x4E00;&#x81F4;
arr = np.arange(1, 10)
print(arr)  # [1 2 3 4 5 6 7 8 9]
print(arr[2:4])  # [3 4]
print(arr[-1])  # 9
print(arr[::2])  # [1 3 5 7 9]  &#x4ECE;&#x524D;&#x5230;&#x540E;, &#x6B65;&#x957F;&#x4E3A;2

brr = arr
print(id(brr))  # 31289872
brr = arr[:]
print(id(brr))  # 31290112   &#x8FD4;&#x56DE;&#x7684;&#x662F;&#x4E00;&#x4E2A;&#x65B0;&#x7684;&#x5E8F;&#x5217;

a = np.arange(1, 10).reshape(3, 3)
print(a)
[[1 2 3]
 [4 5 6]
 [7 8 9]]
print(a[2])
[7 8 9]
print(a[:2])
[[1 2 3]
 [4 5 6]]
print(a[2, 1])   # &#x4E8C;&#x7EF4;&#x6570;&#x7EC4;&#x5C31;&#x662F;&#x5C31;&#x5750;&#x6807;   &#x5750;&#x6807;&#x786E;&#x5B9A;   &#x5728;&#x884C;&#x4F4D;0&#x8F74;&#x7684;&#x4F4D;&#x7F6E;&#x5728;2  1&#x8F74;&#x7684;&#x4F4D;&#x7F6E;&#x5728;1
8
print(a[:2, :1]) # &#x4E24;&#x4E2A;&#x7EF4;&#x5EA6;&#x4E0A;&#x5207;&#x7247;
[[1]
 [4]]
print(a[1, :2]) # &#x4E00;&#x4E2A;&#x7EF4;&#x5EA6;&#x7D22;&#x5F15;,&#x4E00;&#x4E2A;&#x7EF4;&#x5EA6;&#x5207;&#x7247;
[4 5]
print(a[:, :1])
[[1]
 [4]
 [7]]
a[:2, :1] = 0  # &#x5207;&#x7247;&#x8D4B;&#x503C;

.2. Numpy &#x82B1;&#x5F0F;&#x5207;&#x7247; (&#x8303;&#x56F4;,  & |  !&#xFF0C;&#x5E03;&#x5C14;&#x7D22;&#x5F15;)
print("*" * 100)
(1)&#x5E03;&#x5C14;&#x7D22;&#x5F15;   arr>6 &#x751F;&#x6210;&#x4E00;&#x4E2A;&#x65B0;&#x7684;&#x6570;&#x7EC4;,&#x6570;&#x7EC4;&#x4E2D;&#x7684;&#x6BCF;&#x4E2A;&#x6210;&#x5458;&#x548C;&#x8868;&#x8FBE;&#x5F0F;&#x8FD0;&#x7B97;&#x7684;&#x7ED3;&#x679C;
arr = np.arange(3, 10)
print(arr)  # [3 4 5 6 7 8 9]
print(arr > 6)  # [False False False False  True  True  True]
print(arr[arr > 6])  # [7 8 9]   &#x4E3A;True&#x6210;&#x5458;&#x4FDD;&#x7559;,&#x4E3A;False&#x6210;&#x5458;&#x5220;&#x9664;
print(arr[arr != 3])  # [4 5 6 7 8 9]  &#x7B49;&#x4E8E;3&#x7684;&#x6570;&#x5B57;&#x88AB;&#x5220;&#x9664;

&#x5207;&#x7247; &#x5927;&#x4E8E;4  &#x5C0F;&#x4E8E; 8         & |  !
print(arr[arr>4 and arr<8]) 报错 # print(arr[arr>4 & arr<8]) # 没括号， print(arr[(arr> 4) & (arr < 8)])  # [5 6 7]

3 Numpy&#x5411;&#x91CF;&#x8FD0;&#x7B97;&#xFF08;&#xFF09;
print("3****")
arr = np.array([1, 2, 3, 4, 5])
brr = np.array([1, 2, 3, 4, 5])
crr = np.array([1, 2, 3, 4, 5, 6])

print(arr + 2)  # [3 4 5 6 7]
print(arr + brr)  # [ 2  4  6  8 10]
print(arr+crr)  # &#x5143;&#x7D20;&#x4E2A;&#x6570;&#x76F8;&#x540C;

4   Numpy&#x7EDF;&#x8BA1;&#x65B9;&#x6CD5;(&#x8F74;&#x65B9;&#x5411;axis : 0,1)
&#xFF08;sum,max,min,argmax,mean,cumsum,comprod&#xFF09;
print("4*****")
arr = np.arange(1, 10).reshape(3, 3)
print(arr)
[[1 2 3]
 [4 5 6]
 [7 8 9]]
print(arr.sum())  # 45
print(arr.sum(axis=0))
[12 15 18]
print(arr.sum(axis=1))
[ 6 15 24]

print(a.max(axis=0))  # [7 8 9]
print(a.min())  # 0
print(a.mean(axis=1))  # [1.66666667 3.66666667 8.        ]
print(a.std())  # 3.1661792997277796
print(a.var())  # 10.024691358024691
print(a.cumsum())  # [ 0  2  5  5 10 16 23 31 40]
print(a.cumsum(axis=1))  # [ 0  2  5  5 10 16 23 31 40]
print(a.cumprod())  # [0 0 0 0 0 0 0 0 0]
&#x627E;&#x51FA;&#x6700;&#x5927;&#x7684;&#x6700;&#x5728;&#x4F4D;&#x7F6E;
print(arr.max())  # &#x6700;&#x5927;&#x503C; 9
print(arr.argmax())  # 8    &#x4E0B;&#x6807;&#x4ECE;0&#x5F00;&#x59CB;

&#x4E09;&#x7EF4;&#x6570;&#x7EC4;
print('***&#x4E09;&#x7EF4;&#x6570;&#x7EC4;***')
arr = np.arange(1, 13).reshape(2, 2, 3)
print(arr)
[[[ 1  2  3]
  [ 4  5  6]]
#
 [[ 7  8  9]
  [10 11 12]]]

print(arr.sum(axis=0))
[[ 8 10 12]
 [14 16 18]]
print(arr.sum(axis=1))
[[ 5  7  9]
 [17 19 21]]
print(arr.sum(axis=2))
[[ 6 15]
 [24 33]]

5.Numpy&#x53BB;&#x91CD; np.unique(&#x8FED;&#x4EE3;&#x5BF9;&#x8C61;)
arr = np.array([1, 2, 3, 2, 3, 4, 5, 6, 4])
brr = np.array([[1, 2, 3, 4], [1, 2, 2, 3]])
print(arr)  # [1 2 3 2 3 4 5 6 4]
print(np.unique(arr))  # [1 2 3 4 5 6]

print(brr)
print(np.unique(brr))

&#x6D6E;&#x70B9;&#x7C7B;&#x578B;&#x7684;&#x64CD;&#x4F5C;
arr = np.array([1.2, 2.6, 3.1])
print(np.ceil(arr))  # [2. 3. 4.]
print(np.floor(arr))  # [1. 2. 3.]
print(np.round(arr))  # [1. 3. 3.]

&#x7EC3;&#x4E60;1: &#x4EA7;&#x751F;1-9&#x968F;&#x673A;&#x6570;&#xFF0C;&#x8981;&#x6C42;3*3&#x4E8C;&#x7EF4;&#x6570;&#x7EC4;
arr = np.random.randint(1, 10, size=(3, 3))
print(arr)
[[4 2 2]
 [8 3 8]
 [6 5 7]]
&#x7EC3;&#x4E60;2: &#x7EDF;&#x8BA1;&#x6BCF;&#x5217;&#x4E2D;&#x5927;&#x4E8E;6&#x7684;&#x5143;&#x7D20;&#x4E2A;&#x6570;
brr=arr>6
print(brr)
[[False False False]
 [ True False  True]
 [False False  True]
print(brr.sum(axis=0)) # [1 0 2]

&#x7EC3;&#x4E60;3&#xFF1A;&#x7EDF;&#x8BA1;&#x4E00;&#x884C;&#x4E2D;&#x4E09;&#x4E2A;&#x503C;&#x90FD;&#x662F;&#x5927;&#x4E8E;3&#x7684;&#x884C;&#x7684;&#x4E2A;&#x6570;
&#x7EC3;&#x4E60;3 &#x65B9;&#x6CD5;1
arr = np.random.randint(1, 10, size=(3, 3))
print(arr)
crr=arr>3
print(crr)
[[ True False  True]
 [ True  True False]
 [ True  True  True]]
drr=crr.sum(axis=1)
print(drr) # [2 2 3]
print(drr==3)  # [False False  True]
print(drr[drr==3].sum()/3)

&#x7EC3;&#x4E60;3 &#x65B9;&#x6CD5;2
print('&#x7EC3;&#x4E60;3********')
crr=arr>3
drr=crr.sum(axis=1)
err = drr==3
print(crr)
[[ True  True False]
 [ True  True  True]
 [ True  True  True]]
print(drr) # False &#x4E3A;0 True&#x4E3A;1 &#x6C42;&#x548C;
[2 2 3]
print(err)
[False  True  True]
print(err.sum()) # False &#x4E3A;0 True&#x4E3A;1 &#x6C42;&#x548C;
2

&#x7EC3;&#x4E60;3 &#x7B80;&#x5355;&#x65B9;&#x6CD5;  &#x8C03;&#x7528;all &#x8868;&#x793A;&#x6240;&#x6709;&#x5FC5;&#x9700;&#x6EE1;&#x8DB3; &#x624D;&#x8FD4;&#x56DE;True
arr = np.random.randint(1, 10, size=(3, 3))
print(arr)
[[9 8 7]
 [2 2 2]
 [5 5 2]]
brr=arr>3
print(brr)
[[ True  True  True]
 [False False False]
 [ True  True False]]
print(brr.all(axis=1))   # &#x6BCF;&#x4E00;&#x884C;&#x4E3A;true &#x5C31;&#x4E3A;true    &#x8FD9;&#x91CC;&#x7684;axis=1
[ True False False]
print(brr.all(axis=1).sum()) # 1

&#x7EC3;&#x4E60;4 :&#x521B;&#x5EFA;&#x4E00;&#x4E2A;&#x7531;1-9&#x968F;&#x673A;&#x751F;&#x6210; 3*3 &#x4E8C;&#x7EF4;&#x6570;&#x7EC4;,&#x6BCF;&#x4E2A;&#x6570;&#x5B57;&#x53EA;&#x51FA;&#x73B0;&#x4E00;&#x6B21;
print('&#x7EC3;&#x4E60;4***********')
arr=np.arange(1,10)
print(arr) # [1 2 3 4 5 6 7 8 9]
&#x628A;arr&#x6570;&#x7EC4;&#x968F;&#x673A;&#x6392;&#x5217;
np.random.shuffle(arr)
print(arr) # [7 4 9 5 3 2 8 6 1]
print(arr.reshape(3,3))
[[7 4 9]
 [5 3 2]
 [8 6 1]]
</8])></8])>

3.1 Pandas的使用Series序列

&#x5E8F;&#x5217;
import pandas as pd
import numpy as np

1.&#x521B;&#x5EFA;&#x5E8F;&#x5217;
lst = [1, 2, 3, 4, 5]
tup = (4, 5, 6)
dic = {'name':'candle', 'age':18}  # &#x5B57;&#x5178;&#x7684;&#x952E;&#x5C31;&#x662F;&#x5E8F;&#x5217;&#x884C;&#x7D22;&#x5F15;
arr = np.array(lst)
pd.Series(arr)
2.&#x81EA;&#x5B9A;&#x4E49;&#x6240;&#x4EE5;
se = pd.Series(range(1, 5), index=list('abcd'))  # &#x7D22;&#x5F15;&#x7684;&#x4E2A;&#x6570;&#x8981;&#x548C;&#x5143;&#x7D20;&#x7684;&#x4E2A;&#x6570;&#x8981;&#x4E00;&#x81F4;
&#x7D22;&#x5F15;&#x53EF;&#x4EE5;&#x76F8;&#x540C;,&#x521B;&#x5EFA;&#x65F6;&#x5019;&#x6CA1;&#x6709;&#x95EE;&#x9898;,&#x53D6;&#x503C;&#x7684;&#x65F6;&#x5019;&#x56DE;&#x6709;&#x95EE;&#x9898;
print(se)
se = pd.Series(range(1, 10))
se.index = list("abcdefghi")
print(se)
#print(se.values)  # &#x83B7;&#x53D6;&#x5E8F;&#x5217;&#x4E2D;&#x7684;&#x503C;    &#x5E8F;&#x5217;&#x4E2D;&#x7684;&#x503C;&#x5176;&#x5B9E;&#x5C31;&#x662F;&#x4E00;&#x7EF4;&#x6570;&#x7EC4; [1 2 3 4]
#print(list(se.index))  # &#x83B7;&#x53D6;&#x5E8F;&#x5217;&#x7D22;&#x5F15;

4.&#x83B7;&#x53D6;&#x7D22;&#x5F15;&#x4E2D;&#x7684;&#x503C;
print(se[2])  # &#x53EF;&#x4EE5;&#x901A;&#x8FC7;&#x9ED8;&#x8BA4;&#x7D22;&#x5F15;&#x83B7;&#x53D6;&#x5143;&#x7D20;
print(se['c'])  # &#x901A;&#x8FC7;&#x6307;&#x5B9A;&#x7D22;&#x5F15;

5 &#x82B1;&#x5F0F;&#x7D22;&#x5F15;
&#x5207;&#x7247;  &#x9ED8;&#x8BA4;&#x6570;&#x5B57;&#x7D22;&#x5F15;
print(se[2:])
&#x7D22;&#x5F15;&#x5217;&#x8868; [[ ]]
lst = ['a', 'e', 'f']
print(se[['a', 'e', 'f']])
bool&#x7C7B;&#x578B;&#x6570;&#x636E; se[se > 5]
print(se[se > 5])
print('e' in se.index)  # &#x5224;&#x65AD;&#x5E8F;&#x5217;&#x4E2D;&#x662F;&#x5426;&#x5B58;&#x5728;&#x6307;&#x5B9A;&#x7D22;&#x5F15;&#xFF1A; idx in &#x5E8F;&#x5217;.index

&#x5206;&#x7C7B;&#x7EDF;&#x8BA1;
se = pd.Series(['aa', 'bb', 'cc', 'aa', 'aa', 'cc'])
print(se.value_counts())  # &#x9ED8;&#x8BA4;&#x964D;&#x5E8F;
print(se.value_counts(ascending=True))

&#x68C0;&#x6D4B;&#x7F3A;&#x5931;&#x503C;
se = pd.Series(['1', '2', '3', '4', '5'], index=list('ABCDE'))
se[1] = None
se[3] = np.nan
print(se)

print(se.dropna())  # dropna&#x628A;&#x5143;&#x7D20;&#x4E3A;(None NaN)&#x5254;&#x9664;

lst = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
print(pd.DataFrame(lst))
    id age name
1
2
3

2_1 pandas的使用_Series(序列)

import pandas as pd
import numpy as np
Python&#x6700;&#x5F3A;&#x5927;&#x6570;&#x636E;&#x5206;&#x6790;&#x5DE5;&#x5177;&#xFF0C;&#x5305;&#x542B;&#x4E86;&#x4E24;&#x79CD;&#x91CD;&#x8981;&#x7684;&#x6570;&#x636E;&#x7C7B;&#x578B;&#xFF1A;Series, DataFrame
1. Series&#xFF1A;&#x5E8F;&#x5217; &#x7531;&#x7D22;&#x5F15;&#x5217;+&#x503C;      (&#x7C7B;&#x4F3C;&#x5B57;&#x5178;)
(1) &#x5E8F;&#x5217;&#x7684;&#x5B9A;&#x4E49; s = pd.Series(data, index)
    data&#x4E3A;&#x5217;&#x8868;&#xFF0C;&#x5143;&#x7EC4;&#x6216;&#x5B57;&#x5178;&#xFF08;key&#x662F;&#x884C;&#x7D22;&#x5F15;&#xFF09;   numpy&#x4E2D;&#x53EA;&#x80FD;&#x7528;&#x5217;&#x8868;&#x548C;&#x5143;&#x7EC4;&#x521B;&#x5EFA;

lst=[1,2,3,4,5]
print(pd.Series(lst))

0    1
1    2
2    3
3    4
4    5
dtype: int64

tup=(4,5,6)
print(pd.Series(tup))
0    4
1    5
2    6
dtype: int64

&#x5E8F;&#x5217;&#x7684;&#x952E;&#x5C31;&#x662F;&#x884C;&#x7684;&#x7D22;&#x5F15;
dic={"lihua":18,"zm":10}
print(pd.Series(dic))
lihua    18
zm       10
dtype: int64

arr=np.array([1,2,3,4,5])
print(pd.Series(arr))
0    1
1    2
2    3
3    4
4    5
dtype: int32

lst2=[[1,2,3],[4,5,6],[7,8,9]] # &#x7C7B;&#x4F3C;&#x8868;&#x683C;
print(pd.DataFrame(lst2))
   0  1  2
0  1  2  3
1  4  5  6
2  7  8  9

(2)&#x9ED8;&#x8BA4;&#x7D22;&#x5F15;&#x4ECE;0&#x5F00;&#x59CB;&#xFF0C;&#x53EF;&#x4EE5;&#x81EA;&#x5B9A;&#x4E49;&#x7D22;&#x5F15;
(2)_1
print(type(list('abcd'))) # <class 'list'>
se=pd.Series(range(1,5),index=list('abcd'))
print(se)
a    1
b    2
c    3
d    4
dtype: int64

#a.&#x81EA;&#x5B9A;&#x4E49;&#x7D22;&#x5F15;&#x5FC5;&#x987B;&#x4E0E;&#x6570;&#x5B57;&#x4E2A;&#x6570;&#x4E00;&#x81F4;
se=pd.Series(range(1,5),index=list('abcde'))
    .format(val=len(data), ind=len(index)))
ValueError: Length of passed values is 4, index implies 5

#b.&#x81EA;&#x5B9A;&#x4E49;&#x7D22;&#x5F15;&#x76F8;&#x540C;,&#x521B;&#x5EFA;&#x7684;&#x65F6;&#x5019;&#x6CA1;&#x6709;&#x95EE;&#x9898;,&#x53D6;&#x503C;&#x7684;&#x65F6;&#x5019;&#x6709;&#x95EE;&#x9898;
se=pd.Series(range(1,5),index=list('aade'))
print(se)
a    1
a    2
d    3
e    4
dtype: int64

#(2)_2
se=pd.Series(range(1,5))
print(se)
0    1
1    2
2    3
3    4

list('abcd') &#x8FD9;&#x4E2A;&#x662F;list&#x7684;&#x58F0;&#x660E;   dic()  tuple()
se.index=list('abcd')
print(se)
a    1
b    2
c    3
d    4

#&#x83B7;&#x53D6;&#x5E8F;&#x5217;&#x7684;&#x7D22;&#x5F15;
print(se.index)
Index(['a', 'b', 'c', 'd'], dtype='object')
&#x83B7;&#x53D6;&#x5E8F;&#x5217;&#x4E2D;&#x7684;&#x503C;  &#x5E8F;&#x5217;&#x4E2D;&#x7684;&#x503C;&#x5B9E;&#x9645;&#x4E0A;&#x5C31;&#x662F;&#x4E00;&#x7EF4;&#x6570;&#x7EC4;
print(se.values)
[1 2 3 4]

2. &#x901A;&#x8FC7;&#x7D22;&#x5F15;&#x83B7;&#x53D6;&#x5E8F;&#x5217;&#x4E2D;&#x7684;&#x503C; &#xFF08;&#x9ED8;&#x8BA4;&#x7684;&#x7D22;&#x5F15;&#x6216;&#x81EA;&#x5B9A;&#x4E49;&#x7D22;&#x5F15;&#xFF09;
(1)&#x53EF;&#x4EE5;&#x901A;&#x8FC7;&#x7CFB;&#x7EDF;&#x9ED8;&#x8BA4;&#x7684;&#x7D22;&#x5F15;,&#x83B7;&#x53D6;&#x5143;&#x7D20;

a    1
b    2
c    3
d    4
print(se[2]) # 3

print(se[2:])
c    3
d    4
print(se['c']) # 3

3 &#x5206;&#x522B;&#x67E5;&#x770B;&#x7D22;&#x5F15;index&#x548C;&#x503C;values
list('abcd') &#x8FD9;&#x4E2A;&#x662F;list&#x7684;&#x58F0;&#x660E;   dic()  tuple()
se.index=list('abcd')
print(se)
a    1
b    2
c    3
d    4

#&#x83B7;&#x53D6;&#x5E8F;&#x5217;&#x7684;&#x7D22;&#x5F15;
print(se.index)
Index(['a', 'b', 'c', 'd'], dtype='object')
&#x83B7;&#x53D6;&#x5E8F;&#x5217;&#x4E2D;&#x7684;&#x503C;  &#x5E8F;&#x5217;&#x4E2D;&#x7684;&#x503C;&#x5B9E;&#x9645;&#x4E0A;&#x5C31;&#x662F;&#x4E00;&#x7EF4;&#x6570;&#x7EC4;
print(se.values)
[1 2 3 4]

#4. &#x82B1;&#x5F0F;&#x7D22;&#x5F15;&#xFF1A;
(1)&#x5E8F;&#x5217;[index]   index&#x662F;&#x5217;&#x8868;&#xFF08;&#x53EF;&#x4EE5;&#x7B5B;&#x9009;&#x591A;&#x5217;&#x503C;&#xFF09;
se=pd.Series(range(1,5),index=list('abcd'))
print(se)
a    1
b    2
c    3
d    4
print(se[2:])
c    3
d    4

(2)&#x5217;&#x8868;&#x7D22;&#x5F15; []
lst=['a','b']
print(se[lst])
a    1
b    2

print(se[['a','c']])
a    1
c    3

(3)&#x5E03;&#x5C14;&#x7D22;&#x5F15;
print(se>2)
a    False
b    False
c     True
d     True
print(se[se>2])
c    3
d    4

(4)&#x53EA;&#x4F1A;&#x5BF9;&#x5143;&#x7D20;&#x53BB;&#x91CD;&#xFF0C;&#x5E8F;&#x5217;.unique()
se2=pd.Series(np.random.randint(1,10),index=list('abcd'))
print(se2)
print(se2.unique())

(5)&#x5224;&#x65AD;&#x5E8F;&#x5217;&#x4E2D;&#x662F;&#x5426;&#x5B58;&#x5728;&#x6307;&#x5B9A;&#x7D22;&#x5F15;&#xFF1A; idx in &#x5E8F;&#x5217;.index
print('e' in se.index) # False

5.&#x7EDF;&#x8BA1;&#x5143;&#x7D20;&#x51FA;&#x73B0;&#x7684;&#x6B21;&#x6570;&#xFF1A;  # &#x5927;&#x6570;&#x636E; &#x5206;&#x5E03;&#x5F0F;&#x624D;&#x662F;&#x6838;&#x5FC3;   1&#x4E2A;&#x8282;&#x70B9;&#x6570;&#x636E;&#x5B58;&#x4E0D;&#x4E0B;,&#x5904;&#x7406;&#x4E0D;&#x4E86;
    # s.value_counts() &#x5206;&#x7C7B;&#x7EDF;&#x8BA1;&#xFF08;&#x9ED8;&#x8BA4;&#x4E3A;&#x964D;&#x5E8F;&#xFF09;
    #s.value_counts(ascending=True)
se=pd.Series(['aa','bb','cc','aa','bb','dd','aa'])
print(se.value_counts()) # &#x9ED8;&#x8BA4;&#x964D;&#x5E8F;
aa    3
bb    2
dd    1
cc    1
print(se.value_counts(ascending=True))
cc    1
dd    1
bb    2
aa    3

6> ***&#x68C0;&#x6D4B;&#x662F;&#x5426;&#x6709;&#x7F3A;&#x5931;&#x503C;&#xFF1A;  None  np.nan
s.isna() &#x8FD4;&#x56DE;bool   s.isna &#x5C06;nan&#x7684;&#x5143;&#x7D20;&#x76F4;&#x63A5;&#x5220;&#x9664;&#x540E;&#x8FD4; s.isnull()&#x8FD4;&#x56DE;bool
s.isnull&#x5C06;NULL&#x7684;&#x5143;&#x7D20;&#x76F4;&#x63A5;&#x5220;&#x9664;&#x540E;&#x8FD4;&#x56DE;

se=pd.Series(['1','2','3','4','5'],index=list('ABCDE'))
se[1]=None
se[3]=np.nan
print(type(None))  # <class 'nonetype'>
print(type(np.nan)) # <class 'float'>
s.isna() &#x8FD4;&#x56DE;bool   s.isna &#x5C06;nan&#x7684;&#x5143;&#x7D20;&#x76F4;&#x63A5;&#x5220;&#x9664;&#x540E;&#x8FD4;&#x56DE;
s.isnull()&#x8FD4;&#x56DE;bool  s.isnull&#x5C06;NULL&#x7684;&#x5143;&#x7D20;&#x76F4;&#x63A5;&#x5220;&#x9664;&#x540E;&#x8FD4;&#x56DE;

print(se)
A       1
B    None
C       3
D     NaN
E       5
print(se.isna())
A    False
B     True
C    False
D     True
E    False
print(se.isnull())
A    False
B     True
C    False
D     True
E    False

&#x5305;&#x62EC;None &#x548C; np.nan&#x7684;&#x5168;&#x90E8;&#x5254;&#x9664;
print(se.dropna())
A    1
C    3
E    5
</class></class></class>

3.2 Panads的使用DataFram

3.1 DataFram&#x57FA;&#x7840;&#x77E5;&#x8BC6;
import pandas as pd
import numpy as np

1.DataFrame&#x7ED3;&#x6784;&#x7C7B;&#x4F3C;&#x4E8E;&#x6570;&#x636E;&#x5E93;&#x8868;&#x7ED3;&#x6784;&#x7684;&#x6570;&#x636E;&#x7ED3;&#x6784;&#xFF0C;&#x5176;&#x542B;&#x6709;&#x884C;&#x7D22;
lst = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
dic = {"name":"candle", "age":18, "addr":"&#x4E0A;&#x6D77;"}  # &#x5B57;&#x5178;&#x952E;&#x662F;DF&#x5217;&#x7D22;&#x5F15;
arr = np.arange(1, 21).reshape(5, 4)
df = pd.DataFrame(arr, index=list('abcde'), columns=list('ABCD'))
print(df)

2.&#x83B7;&#x53D6;&#x5355;&#x5217;&#x6570;&#x636E;
print(df.A)
print(df['A'])

3.&#x83B7;&#x53D6;&#x591A;&#x5217;&#x6570;&#x636E;
print(df[['A', 'B', 'D']])

4.&#x65B0;&#x589E;&#x4E00;&#x5217;
df['E'] = [5, 6, 7, 8, 9]
print(df)
print(df['A'])
print(df[:2])

5.&#x83B7;&#x53D6;&#x4E00;&#x884C;&#x7684;&#x6570;&#x636E;
print(df.ix[['a', 'd']])  # &#x83B7;&#x53D6;a, e&#x884C;&#x6570;&#x636E;
print(df.ix['c':'e'])  # &#x5DE6;&#x95ED;&#x53F3;&#x95ED; &#x83B7;&#x53D6;&#x4ECE;c d e&#x4E09;&#x884C;&#x6570;&#x636E;
print(df[1:4])  # &#x6570;&#x5B57;&#x5207;&#x7247;:&#x5DE6;&#x95ED;&#x53F3;&#x5F00;
df.ix['f'] = [10, 20, 30, 40, 50]
6.&#x589E;&#x52A0;&#x4E00;&#x884C;&#x6570;&#x636E;
print(df)

7.&#x67E5;&#x627E;&#x5355;&#x5143;&#x683C;&#x7684;&#x5143;&#x7D20; df.ix[&#x884C;&#x7D22;&#x5F15;&#xFF0C;&#x5217;&#x7D22;&#x5F15;]
print(df['c', 'A'])  #
print(df.ix['c', 'A'])
print(df.ix[['c', 'f'], ['B', 'C']])

8. &#x5220;&#x9664;&#x5217;&#xFF0C;&#x5220;&#x9664;&#x884C;&#xFF08;&#x8FD4;&#x56DE;&#x88AB;&#x5220;&#x9664;&#x6307;&#x5B9A;&#x5217;&#x884C;&#x540E;&#x7684;dataframe&#xFF09;&#x5220;&#x9664;&#x4E4B;&#x540E;&#x751F;&#x6210;&#x65B0;&#x7684;df
print(df.drop('a', axis=0))  # &#x5220;&#x9664;&#x884C;
print(df.drop('B', axis=1))  # &#x5220;&#x9664;&#x5217;
&#x5220;&#x9664;&#x591A;&#x884C; &#x6216;&#x8005;&#x5220;&#x9664;&#x591A;&#x5217;
print(df.drop(['b', 'c', 'e'], axis=0))
print(df.drop(['A', 'D'], axis=1))

9 &#x67E5;&#x770B;&#x524D;&#x540E;&#x6570;&#x636E;
print(df.ix[:3])  # &#x524D;&#x4E09;&#x884C;&#x6570;&#x636E;
print(df.head(3))
print(df.tail(3))
print(list(df.index))  # &#x83B7;&#x53D6;&#x884C;&#x7D22;&#x5F15;
print(list(df.columns)) # &#x83B7;&#x53D6;&#x5217;&#x7D22;&#x5F15;
print(df.values)  # &#x83B7;&#x53D6;DataFrame&#x4E2D;&#x503C; &#x4E8C;&#x7EF4;&#x6570;&#x7EC4;
print(df.shape)  # &#x8F93;&#x51FA;DataFrame shape
print(df.describe())  # &#x5217;&#x7684;&#x7EDF;&#x8BA1;

10 &#x6392;&#x5E8F;&#x548C;&#x8F6C;&#x7F6E;
arr = np.arange(1, 21).reshape(5, 4)
df = pd.DataFrame(arr, index=list('cadbf'), columns=list('DBCA'))
&#x6392;&#x5E8F;
print(df)

&#x884C;/&#x5217;&#x7D22;&#x5F15;&#x6392;&#x5E8F;&#xFF08;axis&#x63A7;&#x5236;&#x884C;&#x5217;&#xFF0C;ascending&#x63A7;&#x5236;&#x5347;&#x964D;&#x5E8F;&#xFF09;
print(df.sort_index(axis=0, ascending=False))  # &#x9ED8;&#x8BA4;&#x6309;&#x7167;&#x884C;&#x6392;&#x5E8F;
print(df.sort_index(axis=1))
#
print(df.sort_values(by= 'A', ascending=False))  # &#x6309;&#x7167;&#x7D22;&#x5F15;A&#x5217;&#x7684;&#x6570;&#x636E;&#x6392;&#x5E8F;
#
print(df.T)  # &#x884C;&#x548C;&#x5217;&#x5012;&#x7F6E;

11 &#x4ECE;csv&#x683C;&#x5F0F;&#x6587;&#x4EF6;
df = pd.read_csv('users.csv')
print(df)

2_2 pandas的使用_DataFrame

import pandas as pd
import numpy as np

2. DataFrame&#x7ED3;&#x6784;&#x7C7B;&#x4F3C;&#x4E8E;&#x6570;&#x636E;&#x5E93;&#x8868;&#x7ED3;&#x6784;&#x7684;&#x6570;&#x636E;&#x7ED3;&#x6784;&#xFF0C;&#x5176;&#x542B;&#x6709;&#x884C;&#x7D22;&#x5F15;index&#x548C;&#x5217;&#x7D22;&#x5F15;columns

1> &#x521B;&#x5EFA;&#xFF1A;&#x5217;&#x8868;&#xFF0C;&#x6570;&#x7EC4;&#xFF0C;&#x5B57;&#x5178;&#xFF08;name,age,gender,tel&#xFF09;
#   NB: &#x5B57;&#x5178;&#x7684;key&#x4E3A;&#x5217;&#x7D22;&#x5F15;&#xFF0C;&#x503C;&#x5E94;&#x8BE5;&#x4E3A;&#x591A;&#x884C;&#x6570;
lst = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
print(pd.DataFrame(lst))
   0  1  2
0  1  2  3
1  4  5  6
2  7  8  9

print(pd.DataFrame(lst, index=list('abc'), columns=list('123')))
   1  2  3
a  1  2  3
b  4  5  6
c  7  8  9

&#x5B57;&#x5178;&#x7684;&#x952E;&#x9ED8;&#x8BA4;&#x662F;&#x5217;&#x7D22;&#x5F15;
dic = {"name": "lihua", "age": 18, "addr": "&#x4E0A;&#x6D77;"}
print(pd.DataFrame(dic, index=list("abc")))
    name  age   addr
a  lihua   18   &#x4E0A;&#x6D77;
b  lihua   18   &#x4E0A;&#x6D77;
c  lihua   18   &#x4E0A;&#x6D77;

2> &#x83B7;&#x53D6;&#x5355;&#x5217;&#x6570;&#x636E;
    df.&#x5217;&#x540D;     df[&#x2018;&#x5217;&#x540D;&#x2019;]
arr = np.arange(1, 21).reshape(5, 4)
df = pd.DataFrame(arr, index=list('abcde'), columns=list("ABCD"))
print(df)
    A   B   C   D
a   1   2   3   4
b   5   6   7   8
c   9  10  11  12
d  13  14  15  16
e  17  18  19  20
print(df.A)
a     1
b     5
c     9
d    13
e    17
print(df['A'])
a     1
b     5
c     9
d    13
e    17

3> &#x83B7;&#x53D6;&#x591A;&#x5217;&#x6570;&#x636E;
    df[[&#x5217;&#x540D;&#x7684;&#x5217;&#x8868;]]
print(df[['A', 'B', 'D']])
    A   B   D
a   1   2   4
b   5   6   8
c   9  10  12
d  13  14  16
e  17  18  20

4> &#x65B0;&#x589E;&#x4E00;&#x5217;&#x6570;&#x636E;
    df[&#x65B0;&#x5217;&#x540D;] = 3  &#xFF08;&#x8FD9;&#x4E00;&#x5217;&#x6240;&#x6709;&#x503C;&#x90FD;&#x4E3A;3&#xFF09;
    df[&#x65B0;&#x5217;&#x540D;] = [1, 2, 3]  &#xFF08;&#x4F9D;&#x6B21;&#x7ED9;&#x8FD9;&#x4E00;&#x5217;&#x7684;&#x6BCF;&#x4E00;&#x884C;&#x8D4B;&#x503C;&#xFF09;

df['E'] = [5, 6, 7, 8, 9]
print(df)

    A   B   C   D  E
a   1   2   3   4  5
b   5   6   7   8  6
c   9  10  11  12  7
d  13  14  15  16  8
e  17  18  19  20  9
print(df['A'])  # &#x83B7;&#x53D6;&#x7684;&#x662F;&#x6307;&#x5B9A;&#x5217;
b     5
c     9
d    13
e    17
print(df[:2])  # &#x83B7;&#x53D6;&#x7684;&#x662F;&#x884C; &#x7D22;&#x5F15;&#x4E3A; 0 1 &#x4F4D;&#x7F6E;&#x7684;&#x884C;
   A  B  C  D  E
a  1  2  3  4  5
b  5  6  7  8  6

5> &#x83B7;&#x53D6;&#x5355;&#x884C;&#x6570;&#x636E;
    df.ix[&#x884C;&#x7D22;&#x5F15;]  &#x901A;&#x7528;&#x65B9;&#x6CD5;&#x9ED8;&#x8BA4;&#x7D22;&#x5F15;&#x6216;&#x81EA;&#x5B9A;&#x4E49;&#x7D22;&#x5F15;
&#x83B7;&#x53D6;b&#x884C;&#x7684;&#x6570;&#x636E;
print(df.ix['b'])
A    5
B    6
C    7
D    8
E    6

6> &#x83B7;&#x53D6;&#x591A;&#x884C;&#x8FDE;&#x7EED;&#x6570;&#x636E;&#x53CA;&#x95F4;&#x65AD;&#x6570;&#x636E;
    df.ix[[&#x884C;&#x7D22;&#x5F15;&#x5217;&#x8868;]]
    df[0:3]  &#x5DE6;&#x95ED;&#x53F3;&#x5F00;   df[&#x2018;a&#x2019;:&#x2019;c&#x2019;]  &#x8FDE;&#x7EED;&#x5207;&#x7247;&#xFF0C;&#x4E24;&#x95ED;
&#x83B7;&#x53D6;a&#x884C;&#x548C;d&#x884C;&#x7684;&#x6570;&#x636E;
print(df.ix[['a', 'd']])
    A   B   C   D  E
a   1   2   3   4  5
d  13  14  15  16  8

&#x6307;&#x5B9A;&#x4E86;&#x5B57;&#x7B26;&#x4E32;&#x7684;&#x5207;&#x7247;    &#x5DE6;&#x95ED;&#x53F3;&#x95ED; &#x83B7;&#x53D6;&#x4ECE;c d  e&#x4E09;&#x884C;&#x7684;
print(df.ix['c':'e'])
    A   B   C   D  E
c   9  10  11  12  7
d  13  14  15  16  8
e  17  18  19  20  9
&#x6570;&#x5B57;&#x5207;&#x7247;:&#x5DE6;&#x95ED;&#x53F3;&#x5F00;
print(df[1:4])
    A   B   C   D  E
b   5   6   7   8  6
c   9  10  11  12  7
d  13  14  15  16  8
7> &#x65B0;&#x589E;&#x5355;&#x884C;&#x6570;&#x636E;
    df.ix[&#x884C;&#x7D22;&#x5F15;] = ['google', 20, 'M', '1686666']
df.ix['f'] = [10, 20, 30, 40, 50]
print(df)
    A   B   C   D   E
a   1   2   3   4   5
b   5   6   7   8   6
c   9  10  11  12   7
d  13  14  15  16   8
e  17  18  19  20   9
f  10  20  30  40  50

8> &#x5220;&#x9664;&#x5217;&#xFF0C;&#x5220;&#x9664;&#x884C;&#xFF08;&#x8FD4;&#x56DE;&#x88AB;&#x5220;&#x9664;&#x6307;&#x5B9A;&#x5217;&#x884C;&#x540E;&#x7684;dataframe&#xFF09;
    df.drop(&#x884C;&#x7D22;&#x5F15;)  &#x9ED8;&#x8BA4;&#x5220;&#x9664;&#x7684;&#x5C31;&#x662F;&#x884C;&#x7D22;&#x5F15;
    df.drop(&#x5217;&#x7D22;&#x5F15;,axis=1)  &#x5220;&#x9664;&#x4E00;&#x5217;&#x503C;   &#x77E5;&#x9053;&#x8F74;&#x7684;&#x65B9;&#x5411;
&#x5220;&#x9664;&#x884C;
print(df.drop('a', axis=0))
    A   B   C   D   E
b   5   6   7   8   6
c   9  10  11  12   7
d  13  14  15  16   8
e  17  18  19  20   9
f  10  20  30  40  50
&#x5220;&#x9664;&#x5217;
print(df.drop('B', axis=1))
    A   C   D   E
a   1   3   4   5
b   5   7   8   6
c   9  11  12   7
d  13  15  16   8
e  17  19  20   9
f  10  30  40  50

&#x5220;&#x9664;&#x591A;&#x884C;
print(df.drop(['a', 'b', 'd']))
    A   B   C   D   E
c   9  10  11  12   7
e  17  18  19  20   9
f  10  20  30  40  50
&#x5220;&#x9664;&#x591A;&#x5217;
print(df.drop(['A', 'E'], axis=1))
    B   C   D
a   2   3   4
b   6   7   8
c  10  11  12
d  14  15  16
e  18  19  20
f  20  30  40

9> &#x9501;&#x5B9A;&#x67D0;&#x4E2A;&#x5177;&#x4F53;&#x7684;&#x5143;&#x7D20;
    df.ix[&#x884C;&#x7D22;&#x5F15;&#xFF0C;&#x5217;&#x7D22;&#x5F15;]   &#x4E5F;&#x53EF;&#x4EE5;&#x4F7F;&#x7528;&#x5207;&#x7247;
print(df.ix['c', 'A'])  # 9
print(df.ix[['c', 'd'], ['A', 'B']])
    A   B
c   9  10
d  13  14

3. DataFrame&#x5E38;&#x7528;&#x7684;&#x67E5;&#x770B;&#x65B9;&#x6CD5;
1> &#x8FD4;&#x56DE;&#x524D;&#x51E0;&#x6761; df.head(num)
print(df.ix[:3])
print(df.head(3))
   A   B   C   D  E
a  1   2   3   4  5
b  5   6   7   8  6
c  9  10  11  12  7

2> &#x8FD4;&#x56DE;&#x540E;&#x51E0;&#x6761; df.tail(num)
print(df.tail(3))
    A   B   C   D   E
d  13  14  15  16   8
e  17  18  19  20   9
f  10  20  30  40  50

3> &#x83B7;&#x53D6;&#x884C;&#x7D22;&#x5F15;indx,&#x83B7;&#x53D6;&#x5217;&#x7D22;&#x5F15;columns
print(list(df.index))  # ['a', 'b', 'c', 'd', 'e', 'f']
print(list(df.columns))  # ['A', 'B', 'C', 'D', 'E']

4> &#x83B7;&#x53D6;dataframe&#x4E2D;&#x7684;&#x503C;values&#xFF0C;&#x662F;&#x4E8C;&#x7EF4;&#x6570;&#x7EC4;
print(df.values)
[[ 1  2  3  4  5]
 [ 5  6  7  8  6]
 [ 9 10 11 12  7]
 [13 14 15 16  8]
 [17 18 19 20  9]
 [10 20 30 40 50]]
&#x8F93;&#x51FA;DataFram shape
print(df.shape)  # (6, 5)   6&#x884C;  5&#x5217;

5> &#x67E5;&#x770B;dataframe&#x6570;&#x636E;&#x7684;&#x5206;&#x5E03;&#x60C5;&#x51B5;&#xFF1A; df.describe()   &#x6309;&#x5217;&#x7EDF;&#x8BA1;&#x7684;&#x6570;&#x636E;  axis=1&#x65B9;&#x5411;
print(df.describe())
               A          B          C          D          E
count   6.000000   6.000000   6.000000   6.000000   6.000000
mean    9.166667  11.666667  14.166667  16.666667  14.166667
std     5.671567   6.976150   9.600347  12.754084  17.611549
min     1.000000   2.000000   3.000000   4.000000   5.000000
25%     6.000000   7.000000   8.000000   9.000000   6.250000
50%     9.500000  12.000000  13.000000  14.000000   7.500000
75%    12.250000  17.000000  18.000000  19.000000   8.750000
max    17.000000  20.000000  30.000000  40.000000  50.000000

6> &#x884C;/&#x5217;&#x7D22;&#x5F15;&#x6392;&#x5E8F;&#xFF08;axis&#x63A7;&#x5236;&#x884C;&#x5217;&#xFF0C;ascending&#x63A7;&#x5236;&#x5347;&#x964D;&#x5E8F;&#xFF09;
    df = df.sort_index() &#x9ED8;&#x8BA4;&#x6309;&#x7167;&#x884C;&#x7D22;&#x5F15;&#x5B57;&#x5178;&#x5347;&#x5E8F;
    df = df.sort_index(axis=1) &#x6309;&#x7167;&#x5217;&#x7D22;&#x5F15;&#x5B57;&#x5178;&#x5347;&#x5E8F;
arr = np.arange(1, 21).reshape(5, 4)
df = pd.DataFrame(arr, index=list("cbaed"), columns=list("DBAC"))
print(df)
   D   B   A   C
c   1   2   3   4
b   5   6   7   8
a   9  10  11  12
e  13  14  15  16
d  17  18  19  20
print(df.sort_index())
    D   B   A   C
a   9  10  11  12
b   5   6   7   8
c   1   2   3   4
d  17  18  19  20
e  13  14  15  16
print(df.sort_index(axis=0,ascending=False))
    D   B   A   C
e  13  14  15  16
d  17  18  19  20
c   1   2   3   4
b   5   6   7   8
a   9  10  11  12

print(df.sort_index(axis=1))
    A   B   C   D
c   3   2   4   1
b   7   6   8   5
a  11  10  12   9
e  15  14  16  13
d  19  18  20  17

7> &#x884C;/&#x5217;&#x7D22;&#x5F15;&#x6392;&#x5E8F;&#xFF08;axis&#x63A7;&#x5236;&#x884C;&#x5217;&#xFF0C;ascending&#x63A7;&#x5236;&#x5347;&#x964D;&#x5E8F;&#xFF09;
    df.sort_values(by="A") &#x5355;&#x5217;&#x503C;&#x5347;&#x5E8F;
    df.sort_values(by="A",ascending=False) &#x5355;&#x5217;&#x503C;&#x964D;&#x5E8F;
    df.sort_values(by=['A', 'B']) &#x591A;&#x5217;&#x6392;&#x5E8F;
&#x6309;&#x7D22;&#x5F15;A&#x5217;&#x7684;&#x6570;&#x636E;&#x6392;&#x5E8F;
print(df.sort_values(by="A"))
    D   B   A   C
c   1   2   3   4
b   5   6   7   8
a   9  10  11  12
e  13  14  15  16
d  17  18  19  20
print(df.sort_values(by="A",ascending=False))
    D   B   A   C
d  17  18  19  20
e  13  14  15  16
a   9  10  11  12
b   5   6   7   8
c   1   2   3   4

8> &#x77E9;&#x9635;&#x8F6C;&#x7F6E; df.T   &#x884C;&#x4E0E;&#x5217;&#x6539;&#x53D8;
print(df.T)
   c  b   a   e   d
D  1  5   9  13  17
B  2  6  10  14  18
A  3  7  11  15  19
C  4  8  12  16  20

#9>&#x8BFB;&#x53D6;csv&#x6587;&#x4EF6;&#x4E2D;
print(pd.read_csv('users.csv'))

import numpy as np
import pandas as pd

5. DataFrame&#x5904;&#x7406;&#x7F3A;&#x5931;&#x6570;&#x636E;

&#x4F7F;&#x7528;pandas&#x521B;&#x5EFA;&#x5982;&#x4E0B;&#x683C;&#x5F0F;&#x7684;&#x6570;&#x636E;&#xFF1A;
#
df.ix[0:1, 'D']
df.ix[3, 'C']

arr=np.arange(1,21).reshape(5,4)
df=pd.DataFrame(arr,columns=list("ABCD"))
print(df)
    A   B   C   D
0   1   2   3   4
1   5   6   7   8
2   9  10  11  12
3  13  14  15  16
4  17  18  19  20

df.ix[3,'C']=np.nan
df.ix[[0,1],'D']=np.nan
print(df)
    A   B     C     D
0   1   2   3.0   NaN
1   5   6   7.0   NaN
2   9  10  11.0  12.0
3  13  14   NaN  16.0
4  17  18  19.0  20.0

(1)&#x5220;&#x9664;nan&#x7684;&#x884C;
print(df.dropna())
print(df.dropna(axis=0))
    A   B     C     D
2   9  10  11.0  12.0
4  17  18  19.0  20.0
(2)&#x5220;&#x9664;nan&#x7684;&#x5217;
print(df.dropna(axis=1))
    A   B
0   1   2
1   5   6
2   9  10
3  13  14
4  17  18
(3)&#x586B;&#x5145;&#x6240;&#x6709;nan&#x7684;&#x6570;&#x636E;   &#x76F8;&#x540C;&#x5217;&#x7684;&#x6570;&#x636E;&#x7C7B;&#x578B;&#x4E00;&#x6837;
print(df.fillna(0.0))
    A   B     C     D
0   1   2   3.0   0.0
1   5   6   7.0   0.0
2   9  10  11.0  12.0
3  13  14   0.0  16.0
4  17  18  19.0  20.0

(4)&#x4F7F;&#x7528;map&#x8FDB;&#x884C;&#x6307;&#x5B9A;&#x5217;nan&#x586B;&#x5145;&#x6570;&#x636E;
print(df.fillna({'C': 0.0, 'D': -1}))
    A   B     C     D   C     D
0   1   2   3.0  -1.0        -1
1   5   6   7.0  -1.0        -1
2   9  10  11.0  12.0
3  13  14   0.0  16.0  0.0
4  17  18  19.0  20.0

import numpy as np
import pandas as pd

df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
                   'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
                   'C': np.random.randint(8, size=8),
                   'D': np.random.randint(8, size=8)})

print(df)
     A      B  C  D
0  foo    one  2  4
1  bar    one  3  1
2  foo    two  5  0
3  bar  three  4  4
4  foo    two  0  1
5  bar    two  7  6
6  foo    one  6  0
7  foo  three  0  3

&#x8BED;&#x6CD5; &#xFF1A; df.groupby(&#x7EDF;&#x8BA1;&#x7684;&#x5217;).sum()
&#x6309;A&#x5217;&#x5206;&#x7EC4;&#x7EDF;&#x8BA1;foo&#x548C;bar&#x7684;&#x6570;&#x91CF;      &#x7EDF;&#x8BA1;&#x7684;&#x662F;C,D&#x5217;&#x7684;&#x6570;&#x636E;
print(df.groupby(by='B').sum())

        C   D
B
one     6  19
three   9  10
two    13   9

&#x5148;&#x6309;A&#x5217;&#x5206;&#x7EC4;&#x7EDF;&#x8BA1;&#xFF0C;&#x5728;&#x6309;B&#x5217;&#x5206;&#x7EC4;&#x7EDF;&#x8BA1;&#x6570;&#x91CF;     &#x7EDF;&#x8BA1;&#x7684;&#x662F;C,D&#x5217;&#x7684;&#x6570;&#x636E;
print(df.groupby(by=['A','B']).sum())
A   B
bar one     7  6
    three   7  2
    two     6  4
foo one    11  6
    three   2  6
    two     5  9

import numpy as np
import pandas as pd
1.  &#x8BFB;&#x53D6;users.csv&#x6587;&#x4EF6;&#x5230;dataframe&#x4E2D;     # &#x9017;&#x53F7;&#x5206;&#x9694; csv  &#x9017;&#x53F7;&#x5206;&#x9694;&#x6587;&#x4EF6;
read_csv = _make_parser_function('read_csv', default_sep=',')   &#x9ED8;&#x8BA4;&#x7684;&#x5206;&#x9694;&#x7B26;,&#x4F20;&#x7684;&#x662F;&#x9017;&#x53F7;
df=pd.read_csv("users.csv",encoding="utf-8")
print(df)

2.  &#x83B7;&#x53D6;&#x524D;20&#x6761;&#x8BB0;&#x5F55;
print(df.head(20))
3.  &#x8FD9;&#x4E2A;&#x6570;&#x636E;&#x96C6;&#x4E00;&#x5171;&#x6709;&#x591A;&#x5C11;&#x6761;&#x6570;&#x636E;
print(df.shape[0])  # 943
print(df.index.size)  # 943
4.  &#x4E00;&#x5171;&#x6709;&#x51E0;&#x5217;
print(df.shape[1]) # 5
print(df.columns.size) # 5
5.  &#x83B7;&#x53D6;&#x6240;&#x6709;&#x7684;&#x5217;&#x540D;&#x53CA;&#x884C;&#x7D22;&#x5F15;&#x540D;
print(list(df.index)) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942]
print(list(df.columns))  # ['user_id', 'age', 'gender', 'occupation', 'zip_code']

6.  &#x6253;&#x5370;&#x51FA;occupation&#x8FD9;&#x4E00;&#x5217;&#x7684;&#x5185;&#x5BB9;
print(df['occupation'])
print(df.occupation)
7.  &#x4E00;&#x5171;&#x6709;&#x591A;&#x5C11;&#x4E2A;&#x4E0D;&#x540C;&#x7684;&#x804C;&#x4F4D;
print(df['occupation'].unique())
print(df['occupation'].unique().size) # 21

8.  &#x6700;&#x591A;&#x51FA;&#x73B0;&#x7684;&#x804C;&#x4F4D;&#x6709;&#x54EA;&#x4E9B;
print(df['occupation'].value_counts().head(10))
student          196
other            105
educator          95
administrator     79
engineer          67
programmer        66
librarian         51
writer            45
executive         32
scientist         31

9.  &#x6240;&#x6709;user&#x7684;&#x5E73;&#x5747;&#x5E74;&#x9F84;
print(df['age'].mean())

10. &#x6240;&#x6709;&#x6027;&#x522B;&#x4E3A;&#x7537;&#x6027;&#x7684;&#x5E73;&#x5747;&#x5E74;&#x9F84;
df=df[df['gender']=='M']
print(df)
print(df['age'].mean()) # 34.149253731343286

6.1 绘图基础知识1

import numpy as np
import pandas as pd

1.  &#x8BFB;&#x53D6;Pokemon.csv
df = pd.read_csv('Pokemon.csv', encoding='utf-8')

2.  &#x5C06;&#x5217;&#x540D;&#x540C;&#x7EDF;&#x4E00;&#x8F6C;&#x6210;&#x5C0F;&#x5199;
df_index = df.columns.str.lower()  # &#x83B7;&#x53D6;&#x8868;&#x683C;&#x7684;&#x5217;&#x7D22;&#x5F15;
df.columns = df_index
print(df)

3.  &#x5C06;#&#x5217;&#x5220;&#x9664;
df = df.drop('#', axis=1)

4.  &#x5C06;name&#x5217;&#x8BBE;&#x7F6E;&#x4E3A;&#x884C;&#x7684;&#x7D22;&#x5F15;&#x5217;
df.index = df['name'].values
df = df.drop('name', axis=1)
df = df.set_index('name')

5.  &#x9009;&#x51FA;legendary&#x7684;&#x5BA0;&#x7269;
df[df['legendary'] == True]

6.  &#x67E5;&#x770B;Pikachu&#x5BA0;&#x7269;&#x7684;&#x6240;&#x6709;&#x5C5E;&#x6027;
df.ix['Pikachu']

7.  &#x67E5;&#x770B;&#x4E00;&#x5171;&#x6709;&#x591A;&#x5C11;&#x79CD;&#x5BA0;&#x7269;&#x7684;&#x7C7B;&#x578B;
df['type 1'].unique()

8.  &#x7B5B;&#x9009;&#x51FA;&#x65E2;&#x662F;&#x706B;&#x7CFB;&#x53C8;&#x662F;&#x9F99;&#x7CFB;&#x7684;&#x5BA0;&#x7269;
df[((df['type 1']=='Fire') & (df['type 2']=='Dragon')) |
  ((df['type 1']=='Dragon') & (df['type 2']=='Fire'))]

9.  &#x603B;&#x5C5E;&#x6027;&#x503C;&#x6700;&#x9AD8;&#x7684;&#x4E09;&#x4E2A;
df.sort_values(by='total', ascending=False).head(3)

10. &#x706B;&#x7CFB;&#x4E2D;&#x653B;&#x51FB;&#x529B;&#x6700;&#x9AD8;&#x7684;&#x4E09;&#x4E2A;&#x5BA0;&#x7269;
data = df[(df['type 1']=='Fire') | (df['type 2']=='Fire')]
data.sort_values(by='attack', ascending=False).head(3)

11. &#x8BA1;&#x7B97;&#x6BCF;&#x4E2A;&#x7C7B;&#x578B;&#x5BA0;&#x7269;&#x7684;&#x6570;&#x91CF;&#xFF08;&#x5206;&#x7C7B;&#x7EDF;&#x8BA1;&#xFF09;
df['type 1'].value_counts()
df['type 2'].value_counts()

12. &#x7EDF;&#x8BA1;Water&#x7CFB;&#x5BA0;&#x7269;&#x7684;&#x6570;&#x91CF;
water = df[(df['type 1']=='Water') | (df['type 2']=='Water')]
print(water.shape[0])

6.2 绘图基础知识2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']

1.&#x8BFB;&#x53D6;csv&#x6587;&#x4EF6;&#x5E76;&#x5904;&#x7406;&#x6570;&#x636E;
df = pd.read_csv(r'Pokemon.csv', encoding='utf-8')
df = df.drop("#", axis=1)
df = df.set_index('Name')  # &#x5C06;Name&#x4F5C;&#x4E3A; &#x884C;&#x7684;&#x7D22;&#x5F15;

2.&#x7ED8;&#x5236;&#x76F4;&#x65B9;&#x56FE; &#x7ED8;&#x5236;&#x5BA0;&#x7269;&#x653B;&#x51FB;&#x529B;&#x76F4;&#x65B9;&#x56FE;
print(df['Attack'])
def draw_hist():
    bins = range(0, 200, 25)
    # &#x7EDF;&#x8BA1;&#x51FA; &#x653B;&#x51FB;&#x529B;&#x5728;bins &#x8303;&#x56F4;&#x7684;&#x4E2A;&#x6570;
    plt.hist(df['Attack'],bins=bins,   width=18, color="b", edgecolor='y')
    plt.title('&#x5BA0;&#x7269;&#x653B;&#x51FB;&#x529B;&#x5206;&#x5E03;&#x76F4;&#x65B9;&#x56FE;')
    plt.xlabel('&#x653B;&#x51FB;&#x529B;')
    plt.ylabel('&#x6570;&#x91CF;')
    # axvline &#x753B;&#x4E00;&#x6761;&#x5782;&#x76F4;&#x7EBF;
    plt.axvline(df['Attack'].mean(), linestyle="dashed", color="r")
    plt.show()

draw_hist()

def draw_scatter():
    water = df[(df['Type 1'] == 'Water') | (df['Type 2'] == 'Water')]
    fire = df[(df['Type 1'] == 'Fire') | (df['Type 2'] == 'Fire')]
    plt.scatter(water.Attack, water.Defense, label='Water', color='blue')
    plt.scatter(fire.Attack, fire.Defense, label='Fire', color='red', marker='*')
    plt.legend()  # &#x751F;&#x6210;&#x56FE;&#x4F8B;
    plt.xlabel("&#x653B;&#x51FB;&#x529B;")
    plt.title("&#x6C34;&#x7CFB;&#x706B;&#x7CFB;&#x5BA0;&#x7269;&#x653B;&#x51FB;&#x529B;/&#x9632;&#x5FA1;&#x529B;&#x5206;&#x5E03;&#x6563;&#x70B9;&#x56FE;", color='red', fontsize=20)
    plt.ylabel("&#x9632;&#x5FA1;&#x529B;")
    plt.show()

draw_scatter()

&#x6563;&#x70B9;&#x76F4;&#x65B9;&#x56FE;
def draw_jointplot():
    water = df[(df['Type 1'] == 'Water') | (df['Type 2'] == 'Water')]
    sns.jointplot(water.Attack, water.Defense)
    plt.show()

def draw_countplot():
    global df
    data = df['Type 1'].value_counts().head(10)
    index = list(data.index)
    df = df[df['Type 1'].isin(index)]
    sns.set(style="darkgrid")
    sns.countplot(x='Type 1', data=df)  # hue='legendary'
    plt.show()

#&#x997C;&#x72B6;&#x56FE;
def draw_pie():
    x = df['Type 1'].values
    labels = list(df['Type 1'].index) # &#x6807;&#x7B7E;
#
    plt.pie(x,labels=labels,)

6.3 绘图基础知识3

import numpy as np
import pandas as pd
import  matplotlib as mb
import  seaborn as sb

&#x6570;&#x636E;&#x5206;&#x6790;Pandas&#x5F3A;&#x5316;
1.  &#x8BFB;&#x53D6;Pokemon.csv
df =pd.read_csv('Pokemon.csv',encoding='utf-8')
print(df)
2.  &#x5C06;&#x5217;&#x540D;&#x540C;&#x7EDF;&#x4E00;&#x8F6C;&#x6210;&#x5C0F;&#x5199;
print(df.columns)
Index(['#', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense',
       'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')
print(df.columns.str) # <pandas.core.strings.stringmethods object at 0x0000000001ecf518>

&#x83B7;&#x53D6;&#x8868;&#x683C;&#x7684;&#x5217;&#x7D22;&#x5F15;
df_index=df.columns.str.lower()
&#x5B57;&#x554A;&#x6B64;&#x8D4B;&#x503C;
df.columns=df_index
print(df.columns)
Index(['#', 'name', 'type 1', 'type 2', 'total', 'hp', 'attack', 'defense',
       'sp. atk', 'sp. def', 'speed', 'generation', 'legendary'],
      dtype='object')
3.  &#x5C06;#&#x5217;&#x5220;&#x9664;
df=df.drop("#",axis=1)
print(df)
4.  &#x5C06;name&#x5217;&#x8BBE;&#x7F6E;&#x4E3A;&#x884C;&#x7684;&#x7D22;&#x5F15;&#x5217;
df.index=df['name'].values
print(df)
df=df.drop('name',axis=1)
print(df)

&#x4E0A;&#x9762;&#x4E24;&#x6B65;&#x7684;&#x7B80;&#x5355;&#x5199;&#x6CD5;
df=df.set_index('name')
print(df)

5.  &#x9009;&#x51FA;legendary&#x7684;&#x5BA0;&#x7269;
print(df[df['legendary']==True])

6.  &#x67E5;&#x770B;Pikachu&#x5BA0;&#x7269;&#x7684;&#x6240;&#x6709;&#x5C5E;&#x6027;
print(list(df.ix['Pikachu']))
['Electric', nan, 320, 35, 55, 40, 50, 50, 90, 1, False]

7.  &#x67E5;&#x770B;&#x4E00;&#x5171;&#x6709;&#x591A;&#x5C11;&#x79CD;&#x5BA0;&#x7269;&#x7684;&#x7C7B;&#x578B;  &#x5B57;&#x6BCD;&#x6211;&#x5DF2;&#x7ECF;&#x5168;&#x90E8;&#x8F6C;&#x5C0F;&#x5199;&#x4E86;(&#x5206;&#x5927;&#x5C0F;&#x5199;&#x7684;)   &#x4E2D;&#x95F4;&#x6709;&#x7A7A;&#x683C;&#x6CE8;&#x610F;
print(df['type 1'].unique())
['Grass' 'Fire' 'Water' 'Bug' 'Normal' 'Poison' 'Electric' 'Ground'
 'Fairy' 'Fighting' 'Psychic' 'Rock' 'Ghost' 'Ice' 'Dragon' 'Dark' 'Steel'
 'Flying']

8.  &#x7B5B;&#x9009;&#x51FA;&#x65E2;&#x662F;&#x706B;&#x7CFB;&#x53C8;&#x662F;&#x9F99;&#x7CFB;&#x7684;&#x5BA0;&#x7269;
print("8*************")
fire_dradon=df[((df['type 1']=='Fire')&(df['type 2']=='Dragon'))|((df['type 1']=='Dragon')&(df['type 2']=='Fire'))]
print(fire_dradon)
                           type 1  type 2  total  ...  speed  generation  legendary
name                                              ...

CharizardMega Charizard X    Fire  Dragon    634  ...    100           1      False
Reshiram                   Dragon    Fire    680  ...     90           5       True

9.  &#x603B;&#x5C5E;&#x6027;&#x503C;&#x6700;&#x9AD8;&#x7684;&#x4E09;&#x4E2A;
&#x4EE5;&#x603B;&#x5C5E;&#x6027;&#x964D;&#x5E8F;&#x6392;&#x5E8F;,&#x5E76;&#x51FA;&#x6765;&#x524D;&#x4E09;&#x4E2A;
print("9***********")
print(df.sort_values(by='total',ascending=False).head(3))
                        type 1    type 2  total  ...  speed  generation  legendary
name                                             ...

RayquazaMega Rayquaza   Dragon    Flying    780  ...    115           3       True
MewtwoMega Mewtwo Y    Psychic       NaN    780  ...    140           1       True
MewtwoMega Mewtwo X    Psychic  Fighting    780  ...    130           1       True

10. &#x706B;&#x7CFB;&#x4E2D;&#x653B;&#x51FB;&#x529B;&#x6700;&#x9AD8;&#x7684;&#x4E09;&#x4E2A;&#x5BA0;&#x7269;
print("10***************")
typefire=df[(df['type 1']=='Fire')| (df['type 2']=='Fire')]

print(typefire.sort_values(by='attack',ascending=False).head(3))

                         type 1    type 2  total  ...  speed  generation  legendary
name                                              ...

GroudonPrimal Groudon    Ground      Fire    770  ...     90           3       True
BlazikenMega Blaziken      Fire  Fighting    630  ...    100           3      False
DarmanitanStandard Mode    Fire       NaN    480  ...     95           5      False

11. &#x8BA1;&#x7B97;&#x6BCF;&#x4E2A;&#x7C7B;&#x578B;&#x5BA0;&#x7269;&#x7684;&#x6570;&#x91CF;&#xFF08;&#x5206;&#x7C7B;&#x7EDF;&#x8BA1;&#xFF09;
print("11******")
print(df['type 1'].value_counts())
Water       112
Normal       98
Grass        70
Bug          69
Psychic      57
Fire         52
Rock         44
Electric     44
Dragon       32
Ground       32
Ghost        32
Dark         31
Poison       28
Steel        27
Fighting     27
Ice          24
Fairy        17
Flying        4
print(df['type 2'].value_counts())

12. &#x7EDF;&#x8BA1;Water&#x7CFB;&#x5BA0;&#x7269;&#x7684;&#x6570;&#x91CF;
typewater=df[(df['type 1']=='Water')| (df['type 2']=='Water')]
print(typewater.shape[0])  # 126
</pandas.core.strings.stringmethods>

6.4 基于Matplotlib数据分析制图

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

&#x6570;&#x636E;&#x5206;&#x6790;Pandas&#x5F3A;&#x5316;
1.&#x8BFB;&#x53D6;Pokemon.csv
df = pd.read_csv('Pokemon.csv', encoding='utf-8')
&#x5220;&#x9664;&#x7B2C;&#x4E00;&#x5217;&#x7684; #
df = df.drop("#", axis=1)
&#x5C06;name&#x4F5C;&#x4E3A;&#x884C;&#x7684;&#x7D22;&#x5F15;
df = df.set_index('Name')
&#x6CE8;&#x610F;&#x5C5E;&#x6027;&#x548C;&#x5B57;&#x6BB5;&#x7684;&#x540D;&#x5B57;&#x5927;&#x5C0F;&#x5199;
print(df)

&#x4E2D;&#x6587;&#x4E71;&#x7801;&#x7684;&#x5904;&#x7406;
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']  # SimHei&#x9ED1;&#x4F53;
plt.rcParams['axes.unicode_minus'] = False

(1)&#x76F4;&#x65B9;&#x56FE; hist &#xFF1A;
&#x7ED8;&#x5236;&#x6240;&#x6709;&#x5BA0;&#x7269;&#x653B;&#x51FB;&#x529B;&#x76F4;&#x65B9;&#x56FE;

def draw_hist():
    # &#x8303;&#x56F4;0&#x5230;200 ,&#x6BCF;&#x4E2A;&#x67F1;&#x7684;&#x5BBD;&#x5EA6;20
    bins = range(0, 200, 20)
    # df['Attack']:&#x6570;&#x636E;&#x6E90;    width=18 &#x6574;&#x4E2A;&#x67F1;&#x5F62;&#x7684;&#x5BBD;&#x5EA6;  &#x4E0E;bins&#x91CC;&#x7684;&#x503C;&#x914D;&#x5408; 0    color="b" &#x67F1;&#x586B;&#x5145;&#x989C;&#x8272;   edgecolor &#x67F1;&#x4F53;&#x8FB9;&#x6846;&#x989C;&#x8272;
    plt.hist(df['Attack'], bins, width=18, color="b", edgecolor='y')
    plt.title('&#x5BA0;&#x7269;&#x653B;&#x51FB;&#x529B;&#x5206;&#x5E03;&#x76F4;&#x65B9;&#x56FE;')
    plt.xlabel('&#x653B;&#x51FB;&#x529B;')
    plt.ylabel('&#x6570;&#x91CF;')
    # df['Attack'].mean():&#x6C42;&#x653B;&#x51FB;&#x529B;&#x7684;&#x5E73;&#x5747;&#x503C;  linestyle="dashed" &#x7EBF;&#x578B;   color="r" &#x7EBF;&#x989C;&#x8272;
    plt.axvline(df['Attack'].mean(), linestyle="dashed", color="r")
    plt.show()

draw_hist()

(2)&#x6563;&#x70B9;&#x56FE; scatter&#xFF1A;
&#x6C34;&#x7CFB;&#x706B;&#x7CFB;&#x5BA0;&#x7269;&#x653B;&#x51FB;&#x529B;/&#x9632;&#x5FA1;&#x529B;&#x5206;&#x5E03;&#x6563;&#x70B9;&#x56FE;
def draw_scatter():
    water = df[(df['Type 1'] == 'Water') | (df['Type 2'] == 'Water')]
    fire = df[(df['Type 1'] == 'Fire') | (df['Type 2'] == 'Fire')]
    # water.attack 0&#x8F74;&#x53C2;&#x6570;   water.defense 1&#x8F74;&#x53C2;&#x6570;  label='Water' &#x6807;&#x7B7E;    color='blue'&#x989C;&#x8272;
    plt.scatter(water.Attack, water.Defense, label='Water', color='blue')
    plt.scatter(fire.Attack, fire.Defense, label='Fire', color='red', marker='*')
    # &#x751F;&#x6210;&#x56FE;&#x4F8B;
    plt.legend()
    plt.xlabel("&#x653B;&#x51FB;&#x529B;")
    plt.title("&#x6C34;&#x7CFB;&#x706B;&#x7CFB;&#x5BA0;&#x7269;&#x653B;&#x51FB;&#x529B;/&#x9632;&#x5FA1;&#x529B;&#x5206;&#x5E03;&#x6563;&#x70B9;&#x56FE;", color='red', fontsize=18)
    plt.ylabel("&#x9632;&#x5FA1;&#x529B;")
    # &#x663E;&#x793A;&#x56FE;&#x7247;
    plt.show()

(3) &#x57FA;&#x4E8E;seaborn&#x7684;&#x6563;&#x70B9;&#x76F4;&#x65B9;&#x56FE; jointplot&#xFF1A;
&#x6C34;&#x7CFB;&#x5BA0;&#x7269;&#x653B;&#x51FB;&#x529B;/&#x9632;&#x5FA1;&#x529B;&#x5206;&#x5E03;&#x6563;&#x70B9;&#x76F4;&#x65B9;&#x56FE;     &#x5728;&#x6563;&#x70B9;&#x56FE;&#x4E0A;&#x9762;&#x52A0;&#x4E86;&#x76F4;&#x65B9;&#x56FE;(&#x770B;&#x7740;&#x5F88;&#x4E11;)
def draw_jointplot():
    # &#x627E;&#x5230;&#x7C7B;&#x578B;&#x4E3A;&#x6C34;&#x6027;&#x7684;&#x503C;
    water = df[(df['Type 1'] == 'Water') | (df['Type 2'] == 'Water')]
    # water.Attackv  x&#x8F74;    water.Defense y&#x8F74;
    sns.jointplot(water.Attack, water.Defense)
    plt.show()

(4)&#x57FA;&#x4E8E;seaborn&#x7684;&#x5206;&#x7C7B;&#x7EDF;&#x8BA1;&#x56FE; countplot&#xFF1A;
&#x5BA0;&#x7269;&#x7C7B;&#x578B;&#x6570;&#x91CF;&#x6392;&#x540D;&#x524D;&#x5341;&#x5206;&#x7C7B;&#x7EDF;&#x8BA1;&#x56FE;
def draw_countplot(df):
    # df['Type 1']&#x8BA1;&#x6570;,&#x81EA;&#x52A8;&#x6392;&#x5E8F;,&#x62FF;&#x5230;&#x524D;10&#x7684;&#x6570;&#x636E;
    data = df['Type 1'].value_counts().head(10)
    # list(data.index) &#x6240;&#x6709;&#x6570;&#x636E;&#x7684;&#x884C;&#x7D22;&#x5F15;&#x503C;
    index = list(data.index)
    # df &#x91CD;&#x65B0;&#x8D4B;&#x503C;&#x4E86;,df&#x5C31;&#x5F53;&#x505A;&#x5C40;&#x90E8;&#x53D8;&#x91CF;&#x4E86; &#x6240;&#x4EE5;&#x9700;&#x8981;&#x4F20;&#x53C2;&#x6570;  &#x6216;&#x8005;&#x52A0;&#x4E00;&#x4E2A; global df
    df = df[df['Type 1'].isin(index)]
    # &#x753B;&#x56FE;&#x98CE;&#x683C;  style&#x7684;&#x53C2;&#x6570;   white, dark, whitegrid, darkgrid, ticks
    sns.set(style="darkgrid")
    # x='Type 1' &#x662F;X&#x8F74;&#x7684;&#x6807;&#x7B7E;&#x540D;     data=df &#x7ECF;&#x8FC7;&#x7B5B;&#x9009;&#x4E4B;&#x540E;&#x7684;&#x524D;10&#x884C;&#x6570;&#x636E;
    sns.countplot(x='Type 1', data=df)  # hue='legendary'
    # &#x5C55;&#x793A;&#x56FE;&#x7247;
    plt.show()

print("&#x57FA;&#x4E8E;seaborn&#x5206;&#x7C7B;&#x7EDF;&#x8BA1;&#x56FE;")
draw_countplot(df)

data=df['Type 1'].value_counts().head(10)
index=list(data.index)   # ['Water', 'Normal', 'Grass', 'Bug', 'Psychic', 'Fire', 'Rock', 'Electric', 'Ghost', 'Ground']
df = df[df['Type 1'].isin(index)]
print(data)
print(index)
print(df)

draw_countplot(df)

(5)&#x997C;&#x72B6;&#x56FE;
def draw_pie():
    Info=df['Type 1'].value_counts()
    # labels&#x7C7B;&#x578B;&#x7684;&#x503C;
    labels= df['Type 1'].value_counts().index
    # sizes&#x76F8;&#x5E94;&#x7684;&#x503C;  # [112  98  70  69  57  52  44  44  32  32  32  31  28  27  27  24  17   4]
    sizes = df['Type 1'].value_counts().values
    # 0.1&#x662F;&#x7A81;&#x51FA;&#x7684;&#x4E00;&#x90E8;&#x5206;
    explode=(0,0,0.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
   # autopct="%1.1f %%"  &#x6574;&#x6570;&#x6709;&#x4E00;&#x4F4D;,&#x5C0F;&#x6570;&#x6709;&#x4E00;&#x4F4D;&#x7684;&#x6D6E;&#x70B9;&#x6570;            &#x5706;&#x91CC;&#x9762;&#x7684;&#x6587;&#x672C;&#x683C;&#x5F0F;&#xFF0C;%3.1f%%&#x8868;&#x793A;&#x5C0F;&#x6570;&#x6709;&#x4E09;&#x4F4D;&#xFF0C;&#x6574;&#x6570;&#x6709;&#x4E00;&#x4F4D;&#x7684;&#x6D6E;&#x70B9;&#x6570;
    plt.pie(sizes,explode=explode,labels=labels,autopct="%1.1f %%")
    plt.axis('equal') # &#x5706;&#x578B;
    plt.title("&#x4E0D;&#x540C;&#x7C7B;&#x578B;&#x5BA0;&#x7269;&#x6BD4;&#x4F8B;")
    plt.show()  # &#x5404;&#x7CFB;&#x6BD4;&#x4F8B;&#x997C;&#x56FE;

draw_pie()

Info = df['Type 1'].value_counts()
labels =df['Type 1'].value_counts().index
size = df['Type 1'].value_counts().values
print(Info)
print(labels)
print(size)

(6)&#x7BB1;&#x7EBF;&#x56FE;
&#x6240;&#x6709;&#x5BA0;&#x7269;&#x5404;&#x5C5E;&#x6027;&#x503C; &#x7BB1;&#x578B;&#x56FE; &#x5206;&#x5E03;
def draw_box():
    # &#x5220;&#x9664;&#x6307;&#x5B9A;'Generation','Total','Legendary'&#x4E09;&#x5217;   &#x7ED9;&#x6574;&#x4E2A;&#x5217;&#x8868;&#x6570;&#x636E;,&#x5C31;&#x5F97;&#x5230;&#x4E86;
    df2=df.drop(['Generation','Total','Legendary'],axis=1)
    # whis&#x53C2;&#x6570;&#x6307;&#x80E1;&#x987B;&#x7684;&#x957F;&#x5EA6;&#x662F;&#x76D2;&#x5B50;&#x957F;&#x5EA6;&#x7684;&#x51E0;&#x500D;,&#x8D85;&#x51FA;&#x8FD9;&#x4E2A;&#x503C;&#x88AB;&#x8BA4;&#x4E3A;&#x662F;&#x79BB;&#x7FA4;&#x70B9;(&#x5F02;&#x5E38;&#x503C;),&#x9ED8;&#x8BA4;1.5&#x500D;
    sns.boxplot(data=df2,whis=1.5)
    # &#x8BBE;&#x7F6E;y&#x8F74;&#x8303;&#x56F4;
    plt.ylim(0,300)
    plt.show()

#draw_box()
df2=df.drop(['Generation','Total','Legendary'],axis=1)
#print(df2)  # Type 1  Type 2   HP  ...  Sp. Atk  Sp. Def  Speed    &#x6709;&#x8FD9;&#x4E48;&#x591A;&#x5B57;&#x6BB5;,&#x5176;&#x4E2D;Type1 Type2&#x4E0D;&#x662F;&#x6570;&#x636E;&#x7C7B;&#x578B;,&#x662F;&#x53EF;&#x4EE5;&#x81EA;&#x52A8;&#x4E0D;&#x5230;&#x60F3;&#x5148;&#x56FE;&#x91CC;&#x9762;&#x7684;

&#x5BA0;&#x7269;&#x5C5E;&#x6027;&#x5206;&#x7C7B;&#x7684;&#x7BB1;&#x7EBF;&#x56FE;
def draw_box2():
    plt.title('&#x4EE5;Tpye1&#x5206;&#x7C7B;')
    # x&#x8F74; x="Type 1"   y&#x8F74; y='Attack'  &#x662F;Attack&#x7684;&#x503C;
    sns.boxplot(x="Type 1",y='Attack',data=df)
    # &#x8BBE;&#x7F6E;y&#x8F74;&#x8303;&#x56F4;
    plt.ylim(0.200)
    plt.show()
    #&#x63D0;&#x7434;&#x56FE;,&#x548C;&#x76D2;&#x5B50;&#x4F5C;&#x7528;&#x7C7B;&#x4F3C;,&#x4F46;&#x4E0D;&#x662F;&#x663E;&#x793A;&#x771F;&#x5B9E;&#x503C;,&#x663E;&#x793A;&#x7684;&#x662F;&#x6982;&#x7387;&#x5206;&#x5E03;
    sns.violinplot(x="Type 1",y="Attack",data=df)
    # &#x8BBE;&#x7F6E;y&#x8F74;&#x8303;&#x56F4;
    plt.ylim(0.200)
    plt.show()

draw_box2()

(7)&#x5C0F;&#x63D0;&#x7434;&#x56FE;
&#x4E24;&#x4EE3;&#x4E4B;&#x95F4;&#x5BA0;&#x7269;&#x5C5E;&#x6027;&#x5BF9;&#x6BD4;
'Generation &#x4E2D;&#x53EA;&#x6709;1,2, &#x6700;&#x540E;&#x7684;&#x56FE;&#x5C31;&#x662F;&#x8FD9;&#x4E24;&#x4E2A;&#x5BF9;&#x6BD4;
def draw_violin():
    # &#x7B5B;&#x9009;&#x51FA;'Generation'&#x7684;&#x503C;&#x662F;1 ,&#x662F;2&#x7684;&#x6240;&#x6709;&#x884C;
    data=df[df['Generation'].isin([1,2])]
    # &#x7B5B;&#x9009;&#x51FA;Type 1 &#x4E2D;&#x662F; 'Fire','Water','Grass','Dragon' &#x8FD9;&#x5217;&#x5B57;&#x6BB5;&#x7684;&#x884C;
    data=data[data['Type 1'].isin(['Fire','Water','Grass','Dragon'])]
    # x='Type 1'   x&#x8F74;&#x7684;&#x540D;&#x79F0;  // y='Total  y&#x8F74;&#x540D;&#x79F0;   //  hue='Generation' &#x8981;&#x5206;&#x7C7B;&#x7684;&#x5B57;&#x6BB5;
    sns.violinplot(x='Type 1',y='Total',hue='Generation',data=data, split=True)
    plt.show()

#draw_violin()
data=df[df['Generation'].isin([1,2])]
print(data)
data=data[data['Type 1'].isin(['Fire','Water','Grass','Dragon'])]
print(data)

#(8)&#x7C7B;&#x522B;&#x6563;&#x5E03;&#x56FE;
def draw_swarm():
    # &#x6BCF;&#x4E2A;&#x7C7B;&#x578B;&#x7684;&#x7C7B;&#x522B;&#x6563;&#x5E03;&#x56FE;(Type 1&#x4E2D;&#x524D;10&#x7684;&#x6570;)
    top_types=df['Type 1'].value_counts()[:10]
    # &#x53D6;&#x51FA;&#x6570;&#x91CF;&#x6700;&#x591A;&#x7684;&#x7C7B;&#x578B;
    df1 =df[df['Type 1'].isin(top_types.index)]
    # &#x6BCF;&#x4E00;&#x70B9;&#x4EE3;&#x8868;&#x4E00;&#x4E2A;&#x5BA0;&#x7269;  x&#x8F74;&#x662F;'Type 1    y&#x8F74;&#x662F;HP  Legendary &#x662F;&#x5206;&#x7C7B;&#x7684;&#x6807;&#x51C6;
    sns.swarmplot(x='Type 1',y='HP',data=df1,hue='Legendary')
    # &#x5747;&#x503C;&#x7EBF;
    plt.axhline(df1['HP'].mean(),color='red',linestyle='dashed')
    plt.show()

draw_swarm()
top_types=df['Type 1'].value_counts()[:10]
print(top_types)
Water       112
Normal       98
Grass        70
Bug          69
Psychic      57
Fire         52
Rock         44
Electric     44
Ground       32
Ghost        32
df1 =df[df['Type 1'].isin(top_types.index)]
print(df1)  # &#x53EA;&#x8981;&#x524D;10
print("mean**************")
print(df1['HP'].mean)

(9)&#x6298;&#x7EBF;&#x56FE;
def draw_group():
    # &#x6BCF;&#x4E00;&#x4EE3;&#x5BA0;&#x7269;&#x7C7B;&#x522B;&#x6570;&#x91CF;&#x7684;&#x53D8;&#x5316;
    a=df.groupby(['Generation','Type 1']).count().reset_index()
    a=a[['Generation','Type 1','Total']]
    # &#x753B;&#x56FE;
    # pivot&#x8F6C;&#x6210;&#x5BBD;&#x683C;&#x5F0F;   index='Generation' &#x56FE;&#x5F62;&#x6A2A;&#x8F74;&#x7684;&#x5206;&#x7C7B;   columns='Type 1'&#x6298;&#x7EBF;&#x7684;&#x7C7B;&#x522B;    values='Total'&#x7EB5;&#x5750;&#x6807;&#x7684;&#x503C;
    a=a.pivot(index='Generation',columns='Type 1',values='Total')
    # &#x9009;&#x51FA;&#x4E00;&#x90E8;&#x5206;&#x753B;  columns='Type 1' &#x4E2D;&#x9009;&#x51FA;&#x4E00;&#x90E8;&#x5206;
    a=a[['Water','Fire','Grass','Dragon','Normal','Rock','Electric']]
    a.plot(marker='o')
    # &#x7EC8;&#x7AEF;&#x663E;&#x793A;&#x56FE;&#x7247;
    plt.show()

draw_group()   &#x4EE5;'Generation'&#x4E3A;&#x7B2C;&#x4E00;&#x4E2A;&#x5206;&#x7C7B;    &#x4EE5;'Type 1' &#x4E3A;&#x5728;&#x7B2C;&#x4E00;&#x4E2A;&#x7C7B;&#x4E0B;&#x7684;&#x7B2C;&#x4E8C;&#x4E2A;&#x5206;&#x7C7B;
a=df.groupby(['Generation','Type 1']).count()
print(a)
&#x5C06;Generation&#x524D;&#x9762;&#x7684;1 2 3 ...&#x5168;&#x90E8;&#x52A0;&#x4E0A;&#x53BB;
a=df.groupby(['Generation','Type 1']).count().reset_index()
print(a)
draw_group()

#(10)&#x8BCD;&#x4E91;&#x56FE;

#(11)&#x96F7;&#x8FBE;&#x56FE;

6.5 自定义图片词云图

from PIL import Image
from wordcloud import WordCloud, ImageColorGenerator
import matplotlib.pyplot as plt
import numpy as np
import  jieba
def GetWordCloud():
   path_txt = 'jack3.txt'
   path_img = "timg2.jpg"
   f = open(path_txt, 'r', encoding='UTF-8').read()
   background_image = np.array(Image.open(path_img))
   # &#x7ED3;&#x5DF4;&#x5206;&#x8BCD;&#xFF0C;&#x751F;&#x6210;&#x5B57;&#x7B26;&#x4E32;&#xFF0C;&#x5982;&#x679C;&#x4E0D;&#x901A;&#x8FC7;&#x5206;&#x8BCD;&#xFF0C;&#x65E0;&#x6CD5;&#x76F4;&#x63A5;&#x751F;&#x6210;&#x6B63;&#x786E;&#x7684;&#x4E2D;&#x6587;&#x8BCD;&#x4E91;,&#x611F;&#x5174;&#x8DA3;&#x7684;&#x670B;&#x53CB;&#x53EF;&#x4EE5;&#x53BB;&#x67E5;&#x4E00;&#x4E0B;&#xFF0C;&#x6709;&#x591A;&#x79CD;&#x5206;&#x8BCD;&#x6A21;&#x5F0F;
   #Python join() &#x65B9;&#x6CD5;&#x7528;&#x4E8E;&#x5C06;&#x5E8F;&#x5217;&#x4E2D;&#x7684;&#x5143;&#x7D20;&#x4EE5;&#x6307;&#x5B9A;&#x7684;&#x5B57;&#x7B26;&#x8FDE;&#x63A5;&#x751F;&#x6210;&#x4E00;&#x4E2A;&#x65B0;&#x7684;&#x5B57;&#x7B26;&#x4E32;&#x3002;
   cut_text = " ".join(jieba.cut(f))

   wordcloud = WordCloud(
       # &#x8BBE;&#x7F6E;&#x5B57;&#x4F53;&#xFF0C;&#x4E0D;&#x7136;&#x4F1A;&#x51FA;&#x73B0;&#x53E3;&#x5B57;&#x4E71;&#x7801;&#xFF0C;&#x6587;&#x5B57;&#x7684;&#x8DEF;&#x5F84;&#x662F;&#x7535;&#x8111;&#x7684;&#x5B57;&#x4F53;&#x4E00;&#x822C;&#x8DEF;&#x5F84;&#xFF0C;&#x53EF;&#x4EE5;&#x6362;&#x6210;&#x522B;&#x7684;
       font_path="C:/Windows/Fonts/simfang.ttf",
       background_color="white",
       # mask&#x53C2;&#x6570;=&#x56FE;&#x7247;&#x80CC;&#x666F;&#xFF0C;&#x5FC5;&#x987B;&#x8981;&#x5199;&#x4E0A;&#xFF0C;&#x53E6;&#x5916;&#x6709;mask&#x53C2;&#x6570;&#x518D;&#x8BBE;&#x5B9A;&#x5BBD;&#x9AD8;&#x662F;&#x65E0;&#x6548;&#x7684;
       mask=background_image).generate(cut_text)
   # &#x751F;&#x6210;&#x989C;&#x8272;&#x503C;
   image_colors = ImageColorGenerator(background_image)
   # &#x4E0B;&#x9762;&#x4EE3;&#x7801;&#x8868;&#x793A;&#x663E;&#x793A;&#x56FE;&#x7247;
   plt.imshow(wordcloud.recolor(color_func=image_colors), interpolation="bilinear")
   plt.axis("off")
   plt.show()

if __name__ == '__main__':
   GetWordCloud()

Original: https://blog.csdn.net/lihuazaizheli/article/details/124896712
Author: 黄土高坡上的独孤前辈
Title: Python的Numpy与Pandas包的使用

原创文章受到原创版权保护。转载请注明出处：https://www.johngo689.com/696139/

转载文章受原作者版权保护。转载请注明原作者出处！

人工智能

【自取】最近整理的，有需要可以领取学习：

Linux核心资料大放送~

全栈面试题汇总（持续更新&可下载）

一个提高学习100%效率的工具！

【超详细】深度学习面试题目！

LeetCode Python刷题答案下载！

LeetCode Java版刷题答案下载！

LeetCode C++ 版本，抓紧保存！

LeetCode GO语言刷题答案下载！

回归预测 | MATLAB实现LSTM(长短期记忆神经网络)多输入单输出

回归预测 | MATLAB实现LSTM(长短期记忆神经网络)多输入单输出程序设计环境准备清理工作区间及命令窗口 clc;clear; warning off; 导入数据准备输入…

人工智能 2023年6月18日
0090
python 总结

1.1 列表包括列表的排序、 zip函数、交集、并集等。 1.2 元组包括元组的创建和常用方法（ count方法） 1.3 字典包括字典的创建和操作（访问、插入、替换、检查、…

人工智能 2023年7月17日
0035
【深度学习】基于卷积神经网络（tensorflow）的人脸识别项目（四）

目录前言基本思路测试人脸识别效果 * 设计思路详细代码 – 加载模型人脸预测主要逻辑测试效果总结所有代码 * face_predict_use_ker…

人工智能 2023年7月29日
0056
使用Keras的面部表情识别

使用Keras的面部表情识别项目实施… 介绍和概述 Keras是一个非常强大的开源Python库，它运行在TensorFlow、Theano等其他开源机器库之上，用于…

人工智能 2023年6月27日
0062
Power BI数据查询编辑

数据导入使用Power BI Desktop进行数据分析，需要先获取数据，Power BI Desktop支持从文件、数据库、Power Platform等多种数据源获取数据。为…

人工智能 2023年6月11日
0095
图像处理（一）图像灰度化的三种方式

图像处理的第一步操作基本都是进行灰度化，二灰度化的方式都很多种，可以根据自己的具体需要进行参数调整，基本分为四大种，分别为 YUV亮度灰度化、最大值灰度化、平均值灰度化、 Ga…

人工智能 2023年6月17日
0074
开源SPL强化MangoDB计算

MongoDB是NoSQL数据库的典型代表，支持文档结构的存储方式数据存储和使用更为便捷，数据存取效率也很高，但计算能力较弱，实际使用中涉及MongoDB的计算尤其是复杂计算会很麻…

人工智能 2023年7月30日
0033
推荐系统笔记（十）：InfoNCE Loss 损失函数

背景对比学习损失函数有多种，其中比较常用的一种是InfoNCE loss。最近学习实现了SGL推荐系统算法，对InfoNCE Loss做一个总结。 InfoNCE Loss损失函…

人工智能 2023年7月4日
0071
Python dataframe 多条件筛选/过滤数据的方法及函数isin，query，contains，loc的使用介绍

1. 背景概述日常的数据分析中，经常要根据各种不同的条件从数据集中筛选出相应的数据记录，再进行提取、替换、修改和分析等操作。因此筛选是数据分析中使用频率最高的操作之一。在刚开始做…

人工智能 2023年7月4日
0067
python 中，sklearn包下的f1_score、precision、recall使用方法，Accuracy、Precision、Recall和F1-score公式，TP、FP、TN、FN的概念

目录 sklearn.metrics.f1_score sklearn.metrics.precision_score sklearn.metrics.recall_score A…

人工智能 2023年7月5日
00113
深度学习（初识tensorflow2.版本）之三好学生成绩问题（1）

🔝🔝🔝🔝🔝🔝🔝🔝🔝🔝🔝🔝🥰 博客首页：knighthood2001😗 欢迎点赞👍评论🗨️❤️ 热爱python，期待与大家一同进步成长！！❤️👀 给大家推荐一款很火爆的刷题、面试求…

人工智能 2023年6月15日
0060
本科生学深度学习，搭建环境，再不入坑就晚了

1、目的 2、心理准备 3、IDE的选择 4、AI框架的选择 5、安装环境 6、总结最近没怎么写游戏了，一直在写python，是因为我对深度学习感兴趣，想学习一下，同时也觉得AI…

人工智能 2023年7月23日
0067
.pb文件转换为tflite文件遇到问题汇总

1、AttributeError: type object ‘TFLiteConverterV2’ has no attribute ‘from…

人工智能 2023年5月26日
00102
聚类和分类算法的区别

抵扣说明： 1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。2.余额无法直接购买下载，可以购买VIP、C币套餐、付费专栏及课程。 Original: https:…

人工智能 2023年6月2日
0066
基于时间序列的残差自回归模型

实验数据来源于课本课后习题： 1 、首先加载所需的数据包，并画出时序图：时序图可以看出数据呈现上升趋势。 2 、所以我们先对趋势进行拟合，首先通过时间 t 作为解释变量对趋势进行…

人工智能 2023年6月17日
0089
TensorBoard详解之安装使用和代码介绍

目录 * – 1.TensorBoard详解 – + 1.1 环境 + 1.2 安装 + 1.3 展示 + 1.4说明 – 2.使用 &#821…

人工智能 2023年7月28日
0060

2024 年 4 月
一	二	三	四	五	六	日
1	2	3	4	5	6	7
8	9	10	11	12	13	14
15	16	17	18	19	20	21
22	23	24	25	26	27	28
29	30

Python的Numpy与Pandas包的使用

1.1 ndarry对象

1.2 numpy数据类型

1.3 numpy数组属性

1.4 numpy创建数组

1.5 numpy从已有的数据中创建数组

1.6 NumPy从数值范围创建数组

1.7 numpy的切片和索引

1.8 numpy高级索引

1.9 NumPY广播(Broadcast)

1.10 numpy迭代数组

1.11 numpy数组操作

1.13 NumPy5种常见函数

1.17 numpy线性代数

1.18 numpy IO

3.1 Pandas的使用Series序列

3.2 Panads的使用DataFram

6.1 绘图基础知识1

6.2 绘图基础知识2

6.3 绘图基础知识3

6.4 基于Matplotlib数据分析制图

6.5 自定义图片词云图

大家都在看