利用 Python 读取数据合并对比同时写入Excel生成报告

文章目录

*
前言
1. 数据格式
2. 程序

+ 1. 生成随机名字
+ 2. 生成随机整数组
+ 3. 通过字典合并数据
+ 4. 通过 Pandas 的 merge 合并数据
+ 5. 将 Pandas 的 DataFrame 数据写入 Excel 生成报告
+ 6. 随机生成数据并测试
+ 7. 完整代码
3. 程序结果

+ 1. Terminal 输出
+ 2. Excel 内容
4. 总结

前言

学习工作中经常需要对比数据,比如上一版本代码和目前代码跑的数据精度或者时间性能对比。如下程序就给出了两种方法,推荐第二种,同时方便生成数据报告,也可以方便利用 Python 将数据直接写进 Excel 中,生成报告。具体代码可以去我的GitHub下载。

1. 数据格式

id name height
0 ubhyms 207
1 HXZXey 176
2 WqSpPM 192
3 MKWzzI 188
4 kGGQXy 182
5 weFUul 156
6 vdORms 174

id name weight
0 CnVBzn 171
1 xGZqvG 121
2 HNYven 222
3 FLCyed 112
4 ObOfpY 148
5 NlvyTc 234
6 OrxPhQ 158
7 ORoavJ 212

2. 程序

1. 生成随机名字

def generate_name(number, max_length_of_name):
  names = []
  ascii_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
  for i in range(number):
    name = ''
    for j in range(max_length_of_name):
      name = name + random.choice(ascii_letters)
    names.append(name)
  return names

2. 生成随机整数组


def generate_number(quantity, min_number, max_number):
  datas = []
  for i in range(quantity):
    datas.append(random.randint(min_number, max_number))
  return datas

3. 通过字典合并数据

def merge_data_by_map(data1, data2, head1, head2, data1_same_id, data2_same_id):
  datas = []
  dict_map = {}
  for i in range(len(data1)) :
    data = []
    for j in range(len(data1[i])):
      if j != data1_same_id :
        data.append(data1[i][j])
    dict_map[str(data1[i][data1_same_id])] = data

  for i in range(len(data2)):
    data = data2[i].copy()
    key = data[data2_same_id]
    if dict_map.get( key ) is not None:
      val = dict_map[key]
      for j in range(len(val)) :
        data.append(val[j])
      datas.append(data)
  return datas

4. 通过 Pandas 的 merge 合并数据

def merge_data_by_pandas(data1, data2, head1, head2, data1_same_id, data2_same_id):
  df1 = pd.DataFrame(data1, columns=head1)
  df2 = pd.DataFrame(data2, columns=head2)

  key = head1[data1_same_id]
  df3 = pd.merge(df1, df2, how='inner', on=key)
  return df3

5. 将 Pandas 的 DataFrame 数据写入 Excel 生成报告


def write_excel(file_name, datas):
  workbook = xlsxwriter.Workbook(file_name)
  style = workbook.add_format({
      "fg_color": "yellow",
      "bold": 1,
      "align": "center",
      "valign": "vcenter",
      "font_color": "red"
    })
  style_cen = workbook.add_format({
      "align": "center",
      "valign": "vcenter",
  })

  sheetname = "data"
  worksheet = workbook.add_worksheet(sheetname)

  row, col = len(datas), datas.shape[1]

  head = [column for column in datas]
  worksheet.write_row('A1', head, style)
  worksheet.freeze_panes(1, 1)

  for i in range(row):
    for j in range(col):
      worksheet.write(i+1, j, datas.iloc[[i], [j]].values[0][0], style_cen)

  worksheet.set_column(0, col, 16)
  workbook.close()

注:此处可以参考我之前的一篇博客:
Python利用xlsxwriter读写Excel文件(持续补充)

6. 随机生成数据并测试

def test_of_merge_data():
  Number = 1000
  names = generate_name(Number, 6)
  height = generate_number(Number, 150, 210)
  weight = generate_number(Number, 80, 250)

  Number_of_data1 = 100
  Number_of_data2 = 80
  data1 = []
  for i in range(Number_of_data1):
    id = random.randint(0, Number-1)
    data = [names[id], height[id]]
    data1.append(data)

  data2 = []
  for i in range(Number_of_data2):
    id = random.randint(0, Number-1)
    data = [names[id], weight[id]]
    data2.append(data)

  datas = merge_data_by_map(data1, data2, ["name", "height"], ["name", "weight"], 0, 0)
  print("datas")
  print(datas)

  df = merge_data_by_pandas(data1, data2, ["name", "height"], ["name", "weight"], 0, 0)
  print("df")
  print(df)

  write_excel("data.xlsx", df)

7. 完整代码

import os
import re
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xlrd
import xlwt
import xlsxwriter

from matplotlib.font_manager import FontProperties

def generate_name(number, max_length_of_name):
  names = []
  ascii_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
  for i in range(number):
    name = ''
    for j in range(max_length_of_name):
      name = name + random.choice(ascii_letters)
    names.append(name)
  return names

def generate_number(quantity, min_number, max_number):
  datas = []
  for i in range(quantity):
    datas.append(random.randint(min_number, max_number))
  return datas

def merge_data_by_map(data1, data2, head1, head2, data1_same_id, data2_same_id):
  datas = []
  dict_map = {}
  for i in range(len(data1)) :
    data = []
    for j in range(len(data1[i])):
      if j != data1_same_id :
        data.append(data1[i][j])
    dict_map[str(data1[i][data1_same_id])] = data

  for i in range(len(data2)):
    data = data2[i].copy()
    key = data[data2_same_id]
    if dict_map.get( key ) is not None:
      val = dict_map[key]
      for j in range(len(val)) :
        data.append(val[j])
      datas.append(data)
  return datas

def merge_data_by_pandas(data1, data2, head1, head2, data1_same_id, data2_same_id):
  df1 = pd.DataFrame(data1, columns=head1)
  df2 = pd.DataFrame(data2, columns=head2)

  key = head1[data1_same_id]
  df3 = pd.merge(df1, df2, how='inner', on=key)
  return df3

def write_excel(file_name, datas):
  workbook = xlsxwriter.Workbook(file_name)
  style = workbook.add_format({
      "fg_color": "yellow",
      "bold": 1,
      "align": "center",
      "valign": "vcenter",
      "font_color": "red"
    })
  style_cen = workbook.add_format({
      "align": "center",
      "valign": "vcenter",
  })

  sheetname = "data"
  worksheet = workbook.add_worksheet(sheetname)

  row, col = len(datas), datas.shape[1]

  head = [column for column in datas]
  worksheet.write_row('A1', head, style)
  worksheet.freeze_panes(1, 1)

  for i in range(row):
    for j in range(col):
      worksheet.write(i+1, j, datas.iloc[[i], [j]].values[0][0], style_cen)

  worksheet.set_column(0, col, 16)
  workbook.close()

def test_of_merge_data():
  Number = 1000
  names = generate_name(Number, 6)
  height = generate_number(Number, 150, 210)
  weight = generate_number(Number, 80, 250)

  Number_of_data1 = 100
  Number_of_data2 = 80
  data1 = []
  for i in range(Number_of_data1):
    id = random.randint(0, Number-1)
    data = [names[id], height[id]]
    data1.append(data)

  data2 = []
  for i in range(Number_of_data2):
    id = random.randint(0, Number-1)
    data = [names[id], weight[id]]
    data2.append(data)

  datas = merge_data_by_map(data1, data2, ["name", "height"], ["name", "weight"], 0, 0)
  print("datas")
  print(datas)

  df = merge_data_by_pandas(data1, data2, ["name", "height"], ["name", "weight"], 0, 0)
  print("df")
  print(df)

  write_excel("data.xlsx", df)

if __name__ == "__main__":
  test_of_merge_data()

3. 程序结果

1. Terminal 输出

利用 Python 读取数据合并对比同时写入Excel生成报告

; 2. Excel 内容

利用 Python 读取数据合并对比同时写入Excel生成报告

4. 总结

最好是用第二种方法,其 merge 函数有更多选择方式,具体可以参考:
Pandas 官方文档

这样可以直接将数据传递给 write_excel 函数,将 DataFrame 数据直接写进 Excel 生成报告。

Original: https://blog.csdn.net/guihunkun/article/details/120167436
Author: guihunkun
Title: 利用 Python 读取数据合并对比同时写入Excel生成报告

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/755708/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球