Pandas知识点
导入:
import pandas as pd
df=pd.read_excel("data/梁山108将.xlsx")
打印数据的数据结构
显示数据的形状(行、列)/每一列索引
/通过索引输出一列/查看列数据类型
Series结构
; dataframe
创建dataframe
基本操作
data=[{'座次':'1','姓名':'宋江'},{'座次':'2','姓名':'卢俊义'},{'座次':'3','姓名':'吴用'}]
df=pd.DataFrame(data,index=['rank1','rank2','rank3'])
df['绰号']=['及时雨','玉麒麟','智多星']
df['绰号']=['呼保义','玉麒麟','智多星']
df['姓名']
del df['绰号']
删除列:
del 直接删除源数据,没有返回值
df.pop() 直接删除源数据,返回删除的series
df.drop() 不删除源数据
df.loc['rank1']
rank2=df.iloc[1]
df.loc['rank4'] = ['4','公孙胜','入云龙']
df.loc['rank5'] = {'姓名':'关胜','绰号':'大刀','座次':'5'}
df.loc['rank4']={'姓名'::'关胜','绰号':'大刀关胜','座次':'4'}
运行结果截图:
df1 = df.drop(index = ['rank3','rank5'])
df1 = df.drop(index = ['rank3','rank5'],inplace=True)
关于索引
以梁山108将为例
df=pd.read_excel("data/梁山108将.xlsx")
df.head()
df.tail()
df.index()
df.columns()
df[0:11]
df['姓名']
df['姓名','绰号','梁山泊职位']
df.loc[5:10,['姓名','绰号']
df.loc[5:10,'姓名':'梁山泊职位']
df.iloc[5:10,1:4]
! 注意loc和iloc的区别,取范围时loc包含最后一个,iloc不包含
df.reindex(columns=['绰号','姓名'])
df.reindex(index=[9,99],columns=['绰号','姓名'])
算数运算
1·Series
s1=pd.Series([1,2,3,4,5],index=['a','b','c','d','e'])
s2=pd.Series([11,12,13,14,15],index=['a','b','c','d','e'])
s=s1+s2
2·DataFrame
import numpy as np
df1=pd.DataFrame(np.arange(9).reshape(3,3),index=['宋江','李逵','武松'],columns=['语文','数学','英语']
df2=pd.DataFrame(np.arange(9).reshape(3,3),index=['宋江','李逵','武松'],columns=['语文','数学','物理']
df1['总成绩']=df1['语文']+df1['数学']+df1['英语']
df=df1.add(df2,fill_value=0)
加法add,减法sub,乘法mul,除法 div
函数运用
import numpy as np
import pandas as pd
df1=pd.DataFrame(np.arange(9).reshape(3,3),index=['宋江','李逵','武松'],columns=['语文','数学','英语']
np.max(df1)
np.max(df1,axis=1)
numpy数学函数
排序
df1.rename({'宋江':'songjiang','李逵':'likui','武松':'wusong'},axis='index')
df1.rename({'宋江':'songjiang','李逵':'likui','武松':'wusong'},axis='index',inplace=True)
df1.sort_index()
df1.sort_index(ascending=False)
df1.rename({'语文':'yuwen','数学':'shuxue','英语':'yingyu'},axis=1,inplace=True)
df1.sort_index(axis=1)
df1.sort_index(axis=1,ascending=False)
df1.sort_values(by=['shuxue'])
df1.sort_value(by=['shuxue','yingyu']
处理缺失值
df1['yingyu'] = [np.nan,5,8]
df1.isnull()
df1.notnull()
df1.info()
处理方式1:删除缺失值
df1.dropna()
df1.dropna(axis=1)
df1.dropna(how='all')
2·填充缺失值
df1.fillna(0)
df1.fillna(df1.mean() )
读取和存储数据
movies = pd.reaad_csv('douban20.txt',
sep='|',
encoding="utf-8-sig",
header=None,
names = ['电影名','评分','评论数']
nrows = 10,
na_value=0,
skiprows=10,
)
movies.columns=['电影名','评分','评论数']
movies.to_csv('douban250.txt')
merge合并问题
zoo = pd.read_csv('Data/zoo.csv')
zoo_eat = pd.read_csv('Data/zoo_eat.csv')
pd.merge(zoo,zoo_eat)
pd.merge(zoo,zoo_eat,on='animal',how='inner')
pd.merge(zoo,zoo_eat,left_on='animal',right_on='animals',how='outer')
pd.merge(zoo,zoo_eat,left_on='animal',right_on='animals',how='outer').fillna(0)
pd.merge(zoo,zoo_eat,left_on='animal',right_on='animals',how='left')
pd.merge(zoo,zoo_eat,left_on='animal',right_on='animals',how='right')
concat连接
zoo1 = pd.concat([zoo,zoo_eat])
zoo1 = pd.concat([zoo,zoo_eat],
axis=1,
ignore_index = True
)
去重
data = pd.contat([zoo,zoo_eat])
data.drop_duplicate(inplace=True)
data.drop_duplicate(inplace=True,keep='forst')
data.drop_duplicate(inplace=True,keep='last')
data.drop_duplicate(subset=['animal','water_need'],inplace=True,keep='last')
Pandas与MySQL
import pymysql
conn = pymysql.connect(
host='localhost',
user='root',
password='admin123',
db='world',
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
import pandas as pd
data = pd.read_sql('select * from citl',conn)
import pymysql
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine('mysql+pymysql://root:admin123@localhost:3306/world')
conn = engine.connect()
data = pd.read_sql('select * from city',conn)
conn.close()
MySQl查询语句大全 https://www.cnblogs.com/mofujin/p/11355517.html
MySQL基础语句
插入数据
insert into 表名(字段名1,字段名2)VALUES(字段值)删除数据
delete from 表名 where 条件修改数据
update 表名 set 字段赋值 where 条件查询数据
select 字段名 from 表名 where 条件
可对以上示例 进行限制筛选结果条数
模糊查询
name LIKE…
; 正则表达式
数字
字符串
Original: https://blog.csdn.net/An_0330/article/details/120819963
Author: ximu VS code
Title: Python-Pandas知识点整理
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/675928/
转载文章受原作者版权保护。转载请注明原作者出处!