【笔记】电商订单数据分析实战

文章目录

一、数据清洗


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

%config InlineBackend.figure_format = 'svg'

ec_df = pd.read_csv('datas/ecomm/某电商平台2021年订单数据.csv', index_col=0)

ec_df['orderTime'] = pd.to_datetime(ec_df['orderTime'])
ec_df['payTime'] = pd.to_datetime(ec_df['payTime'])
ec_df

ec_df = ec_df[ec_df['orderTime'].dt.year == 2021]
ec_df

ec_df = ec_df[(ec_df['payTime'] - ec_df['orderTime']).dt.total_seconds()  1800]
ec_df = ec_df[ec_df['payTime'] >= ec_df['orderTime']]
ec_df = ec_df[(ec_df['payment'] >= 0) & (ec_df['orderAmount'] >= 0)]
ec_df

ec_df['chanelID'] = ec_df['chanelID'].fillna(ec_df['chanelID'].mode()[0])
ec_df.info()

ec_df = ec_df.rename(columns={'chanelID': 'channelID', 'platfromType': 'platformType'})
ec_df

ser = ec_df['platformType'].replace(r'[\s·]', '', regex=True)
ser = ser.str.title()
ec_df['platformType'] = ser.replace(['薇信', 'Vx', '网页', '网站'],
                                    ['微信', '微信', 'Web', 'Web'])

temp_df = ec_df[ec_df['payment']  ec_df['orderAmount']]
mean_discount = (temp_df['payment'] / temp_df['orderAmount']).mean()
ec_df['payment'] = np.round(
    ec_df['payment'].where(
        ec_df['payment']  ec_df['orderAmount'],
        ec_df['orderAmount'] * mean_discount
    )
)

二、数据分析

2.1 计算总体指标


GMV = ec_df['orderAmount'].sum()
print(GMV)

all_payment = ec_df['payment'].sum()
print(all_payment)

true_payment = ec_df[ec_df['chargeback'] == False]['payment'].sum()
print(true_payment)

chargeback_ratio = f"{100 * len(ec_df[ec_df['chargeback']]) / len(ec_df):.2f}%"
print(chargeback_ratio)

ARPU = true_payment / ec_df['userID'].nunique()
print(ARPU)

2.2 计算每月GMV及趋势分析


ec_df['month'] = ec_df['orderTime'].dt.month
ec_df

temp_df = ec_df
temp_df['true_payment'] = temp_df['payment'].where(ec_df['chargeback'] == False, 0)

monthly_gmv = pd.pivot_table(temp_df, index='month', values=['orderAmount', 'true_payment'], aggfunc='sum')
monthly_gmv = monthly_gmv.applymap(lambda x: np.round(x/10000, 2))

import pyecharts.options as opts
from pyecharts.charts import Line

gmv_line = Line()

gmv_line.add_xaxis(monthly_gmv.index.to_list())

gmv_line.add_yaxis('GMV', monthly_gmv['orderAmount'].to_list())

gmv_line.add_yaxis('实际销售额', monthly_gmv['true_payment'].to_list())

gmv_line.set_global_opts(
    title_opts=opts.TitleOpts(title="2021年按月销售额图"),
    yaxis_opts=opts.AxisOpts(name='GMV(单位:万元)'),
    xaxis_opts=opts.AxisOpts(name='时间(单位:月)')
)

gmv_line.render_notebook()

【笔记】电商订单数据分析实战

2.3 流量渠道来源拆解GMV占比


channel_gmv = pd.pivot_table(temp_df, index='channelID', values=['orderAmount'], aggfunc='sum')
channel_gmv = channel_gmv.applymap(lambda x: x/ 10000)
channel_gmv

from pyecharts.charts import Pie

channel_pie = Pie()

channel_pie.add(
    '',
    channel_gmv.reset_index().values.tolist(),
    radius=["50%", "75%"]
)

channel_pie.set_global_opts(
        title_opts=opts.TitleOpts(title="各渠道GMV占比"),
        legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%")
)

channel_pie.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:  {d}%"))

channel_pie.render_notebook()

【笔记】电商订单数据分析实战

2.4 按星期几统计下单量


temp_df['weekday'] = temp_df['orderTime'].dt.weekday.map(lambda x: f'星期{x + 1}')
temp_df

weekday_count = pd.pivot_table(temp_df, index='weekday', values='orderID', aggfunc='nunique')
weekday_count

from pyecharts.charts import Bar
from pyecharts.globals import ThemeType

weekday_bar = Bar({"theme": ThemeType.LIGHT})

weekday_bar.add_xaxis(weekday_count.index.tolist())

weekday_bar.add_yaxis("下单量", weekday_count['orderID'].tolist())

weekday_bar.set_global_opts(title_opts=opts.TitleOpts(title="各星期数下单量"))

weekday_bar.set_series_opts(
    label_opts=opts.LabelOpts(is_show=True),
    markline_opts=opts.MarkLineOpts(
        data=[
            opts.MarkLineItem(type_="min", name="最小值"),
            opts.MarkLineItem(type_="max", name="最大值")
        ]
    ),
)

weekday_bar.render_notebook()

【笔记】电商订单数据分析实战

2.5 根据下单时段统计下单量(30分钟一段)


temp_df['hour'] = temp_df['orderTime'].dt.floor('30T').dt.strftime('%H:%M')
temp_df

time_count = pd.pivot_table(temp_df, index='hour', values='orderID', aggfunc='nunique')
time_count

weekday_bar = Bar({"theme": ThemeType.ESSOS})

weekday_bar.add_xaxis(time_count.index.tolist())

weekday_bar.add_yaxis("下单量", time_count['orderID'].tolist())

weekday_bar.set_global_opts(
    title_opts=opts.TitleOpts(title="各时段下单量"),
    datazoom_opts=opts.DataZoomOpts()
)

weekday_bar.set_series_opts(
    label_opts=opts.LabelOpts(is_show=True),
    markline_opts=opts.MarkLineOpts(
        data=[
            opts.MarkLineItem(type_="min", name="最小值"),
            opts.MarkLineItem(type_="max", name="最大值")
        ]
    ),
)

weekday_bar.render_notebook()

【笔记】电商订单数据分析实战

2.6 按月统计复购率


multiple_bought = pd.pivot_table(temp_df, index='userID', columns='month', values='orderID', aggfunc='nunique')
multiple_bought = multiple_bought.applymap(
    lambda x: np.nan if np.isnan(x) else (
        0 if x == 1 else 1
    )
)
np.round(100 * multiple_bought.sum() / multiple_bought.count(), 2)

三、RFM模型

3.1 数据预处理


temp_df = pd.pivot_table(
    ec_df[ec_df['chargeback'] == False],
    index='userID',
    values=['orderTime', 'orderID', 'payment'],
    aggfunc={'orderTime': 'max', 'orderID': 'nunique', 'payment': 'sum'}
)
temp_df

from datetime import datetime

last_day = datetime(2021, 12, 31)
temp_df['orderTime'] = (last_day - temp_df['orderTime']).dt.days
temp_df

temp_df.rename(columns={'orderTime': 'R', 'orderID': 'F', 'payment': 'M'}, inplace=True)
temp_df = temp_df.reindex(columns=['R', 'F', 'M'])
temp_df

3.2 获取RFM等级分


temp_df['R_score'] = pd.cut(
    temp_df['R'],
    bins=[-1, 8, 31, 61, 121, 201, 365],
    right=False,
    labels=[6, 5, 4, 3, 2, 1]
).astype(np.int64)

temp_df['F_score'] = pd.cut(
    temp_df['F'],
    bins=[1, 2, 3, 4, 5, 6, 8],
    right=False,
    labels=[1, 2, 3, 4, 5, 6]
).astype(np.int64)

temp_df['M_score'] = pd.cut(
    temp_df['M'],
    bins=[0, 200, 500, 1000, 2000, 5000, 99999],
    right=False,
    labels=[1, 2, 3, 4, 5, 6]
).astype(np.int64)

3.3 用户分群


temp_df = (temp_df >= temp_df.mean()).applymap(lambda x: '1' if x else '0')
temp_df['tag'] = temp_df['R_score'] + temp_df['F_score'] + temp_df['M_score']
temp_df

def customer_type(tag):
    if tag == '111':
        return '重要价值客户'
    elif tag == '101':
        return '重要发展客户'
    elif tag == '011':
        return '重要保持用户'
    elif tag == '001':
        return '重要挽留用户'
    elif tag == '110':
        return '一般价值客户'
    elif tag == '100':
        return '一般发展客户'
    elif tag == '010':
        return '一般保持客户'
    return '一般挽留客户'

temp_df['type'] = temp_df['tag'].map(customer_type)
temp_df

data = pd.pivot_table(temp_df, index='type', values='tag', aggfunc='count')
data = data.reset_index().values.tolist()

from pyecharts.charts import Pie

rfm_pie = Pie()

rfm_pie.add(
    "",
    data,
    radius=["50%", "75%"]
)

rfm_pie.set_global_opts(
    title_opts=opts.TitleOpts(title="RFM模型"),
    legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%"),
)

rfm_pie.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))

rfm_pie.render_notebook()

【笔记】电商订单数据分析实战

Original: https://blog.csdn.net/SpriteNym/article/details/125547727
Author: Sprite.Nym
Title: 【笔记】电商订单数据分析实战

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/675431/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球