Python爬虫实战+数据分析+数据可视化(前程无忧招聘信息)

一、爬虫部分

爬虫说明:
1、本爬虫是以面向对象的方式进行代码架构的
2、本爬虫是通过将前程无忧网页转换成移动端来进行求职信息爬取的
3、本爬虫爬取的数据存入到MongoDB数据库中
4、爬虫代码中有详细注释

代码展示

import time
from pymongo import MongoClient
import requests
from lxml import html

class JobSpider():
    def __init__(self):

        self.headers = {
            "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1"
        }

        self.start_url = 'https://msearch.51job.com/job_list.php?keyword=python&jobarea=000000&pageno=1'

        self.url_list = 'https://msearch.51job.com/job_list.php?keyword=python&jobarea=000000&pageno={}'

        self.client = MongoClient()
        self.collection = self.client['test']['wuyou_job']

    def get_url_list(self,total_num):
        total_num = int(total_num)
        page = total_num//50+1 if total_num%50!=0 else total_num//50
        return [self.url_list.format(i) for i in range(1,page+1)]

    def parse_url(self,url):

        time.sleep(0.5)
        rest = requests.get(url,headers=self.headers)
        return rest.content.decode()

    def get_content_list(self,str_html):
        str_html = html.etree.HTML(str_html)

        job_list = str_html.xpath('//div[@id="pageContent"]/div[@class="list"]/a')

        for i in job_list:
            detail_url = i.xpath('./@href')[0]
            self.parse_detail(detail_url)

    def parse_detail(self,detail_url):
        time.sleep(0.1)
        rest = requests.get(detail_url,headers = self.headers)
        str_html = html.etree.HTML(rest.content.decode())
        item = {}

        item['职位'] = str_html.xpath('//p[@class="jname"]/text()')
        item['职位'] = item['职位'][0] if len(item['职位'])>0 else None
        item['公司名称'] = str_html.xpath('//div[@class="info"]/h3/text()')
        item['公司名称'] = item['公司名称'][0] if len(item['公司名称'])>0 else None
        item['公司地点'] = str_html.xpath('//div[@class="jbox"]/div[@class="m_bre"]/span[2]/text()')
        item['公司地点'] = item['公司地点'][0] if len(item['公司地点'])>0 else None
        item['公司性质'] = str_html.xpath('//div[@class="info"]/div[@class="m_bre"]/span[1]/text()')
        item['公司性质'] = item['公司性质'][0] if len(item['公司性质'])>0 else None
        item['薪资'] = str_html.xpath('//p[@class="sal"]/text()')
        item['薪资'] = item['薪资'][0] if len(item['薪资'])>0 else None
        item['学历要求'] = str_html.xpath('//div[@class="jbox"]/div[@class="m_bre"]/span[3]/text()')
        item['学历要求'] = item['学历要求'][0] if len(item['学历要求'])>0 else None
        item['工作经验'] = str_html.xpath('//div[@class="jbox"]/div[@class="m_bre"]/span[4]/text()')
        item['工作经验'] = item['工作经验'][0] if len(item['工作经验'])>0 else None
        item['公司规模'] = str_html.xpath('//div[@class="info"]/div[@class="m_bre"]/span[2]/text()')
        item['公司规模'] = item['公司规模'][0] if len(item['公司规模'])>0 else None
        item['公司类型'] = str_html.xpath('//div[@class="info"]/div[@class="m_bre"]/span[3]/text()')
        item['公司类型'] = item['公司类型'][0] if len(item['公司类型'])>0 else None
        item['公司福利'] = str_html.xpath('//div[@class="tbox"]/span/text()')
        item['公司福利'] = '-'.join(item['公司福利'])
        item['发布时间'] = str_html.xpath('//span[@class="date"]/text()')
        item['发布时间'] = item['发布时间'][0] if len(item['发布时间'])>0 else None
        print(item)
        self.save(item)

    def save(self,item):
        self.collection.insert(item)

    def run(self):

        rest = requests.get(self.start_url,headers = self.headers)
        str_html = html.etree.HTML(rest.content.decode())
        total_num = str_html.xpath('//p[@class="result"]/span/text()')[0]

        url_list = self.get_url_list(total_num)

        for i in url_list:
            str_html = self.parse_url(i)
            self.get_content_list(str_html)

if __name__ == '__main__':
    job = JobSpider()
    job.run()

二、数据分析和数据可视化部分

数据分析和数据可视化说明:
1、本博客通过Flask框架来进行数据分析和数据可视化
2、项目的架构图为

Python爬虫实战+数据分析+数据可视化(前程无忧招聘信息)

代码展示

  • 数据分析代码展示(analysis.py)
import re
import pandas as pd
import numpy as np
from pymongo import MongoClient
import pymysql

def salary_process(df):

    df['薪资'] = df['薪资'].apply(
        lambda x: str(round(float(re.findall('(.*)元', x)[0]) / 1000 * 30, 1)) if x.endswith('元/天') else x)

    df['薪资'] = df['薪资'].apply(lambda x: str(
        round((float(re.findall(r'(.*)千', x)[0].split('-')[0]) + float(re.findall(r'(.*)千', x)[0].split('-')[1])) / 2,
              1)) if x.endswith('千/月') else x)

    df['薪资'] = df['薪资'].apply(lambda x: str(round(
        (float(re.findall(r'(.*)万', x)[0].split('-')[0]) + float(re.findall(r'(.*)万', x)[0].split('-')[1])) / 2 * 10,
        1)) if len(re.findall(r'万', x)) > 0 and len(re.findall(r'-', x)) > 0 else x)

    df['薪资'] = df['薪资'].apply(lambda x: re.findall('(.*)千以下', x)[0] if x.endswith('千以下/月') else x)

    df['薪资'] = df['薪资'].apply(lambda x: x if len(re.findall('\d\.\d', x)) > 0 else np.nan)
    df['薪资'].dropna(how='any', inplace=True)

    df['薪资'] = df['薪资'].astype(np.float)
    df.reset_index(drop=True, inplace=True)
    return df

def experience_salary(df):

    grouped = df.groupby('工作经验')['薪资'].mean().reset_index()

    grouped = grouped[(grouped['工作经验'].str.contains('经验'))|(grouped['工作经验'].str.contains('在校生'))]

    grouped['薪资'] = grouped['薪资'].apply(lambda x:round(x/10,1))

    data = [[i['工作经验'],i['薪资']] for i in grouped.to_dict(orient='records')]
    print(data)
    return data

def education_salary(df):
    grouped = df.groupby('学历要求').mean().reset_index()
    grouped['薪资'] = grouped['薪资'].apply(lambda x:round(x/10,1))

    grouped = grouped[grouped['学历要求'].str.contains('经验')==False]

    data = [[i['学历要求'],i['薪资']] for i in grouped.to_dict(orient='records')]
    return data

def city_need(df):

    geoCoordMap = {
        '海门': [121.15, 31.89],
        '鄂尔多斯': [109.781327, 39.608266],
        '招远': [120.38, 37.35],
        '舟山': [122.207216, 29.985295],
        '齐齐哈尔': [123.97, 47.33],
        '盐城': [120.13, 33.38],
        '赤峰': [118.87, 42.28],
        '青岛': [120.33, 36.07],
        '乳山': [121.52, 36.89],
        '金昌': [102.188043, 38.520089],
        '泉州': [118.58, 24.93],
        '莱西': [120.53, 36.86],
        '日照': [119.46, 35.42],
        '胶南': [119.97, 35.88],
        '南通': [121.05, 32.08],
        '拉萨': [91.11, 29.97],
        '云浮': [112.02, 22.93],
        '梅州': [116.1, 24.55],
        '文登': [122.05, 37.2],
        '上海': [121.48, 31.22],
        '攀枝花': [101.718637, 26.582347],
        '威海': [122.1, 37.5],
        '承德': [117.93, 40.97],
        '厦门': [118.1, 24.46],
        '汕尾': [115.375279, 22.786211],
        '潮州': [116.63, 23.68],
        '丹东': [124.37, 40.13],
        '太仓': [121.1, 31.45],
        '曲靖': [103.79, 25.51],
        '烟台': [121.39, 37.52],
        '福州': [119.3, 26.08],
        '瓦房店': [121.979603, 39.627114],
        '即墨': [120.45, 36.38],
        '抚顺': [123.97, 41.97],
        '玉溪': [102.52, 24.35],
        '张家口': [114.87, 40.82],
        '阳泉': [113.57, 37.85],
        '莱州': [119.942327, 37.177017],
        '湖州': [120.1, 30.86],
        '汕头': [116.69, 23.39],
        '昆山': [120.95, 31.39],
        '宁波': [121.56, 29.86],
        '湛江': [110.359377, 21.270708],
        '揭阳': [116.35, 23.55],
        '荣成': [122.41, 37.16],
        '连云港': [119.16, 34.59],
        '葫芦岛': [120.836932, 40.711052],
        '常熟': [120.74, 31.64],
        '东莞': [113.75, 23.04],
        '河源': [114.68, 23.73],
        '淮安': [119.15, 33.5],
        '泰州': [119.9, 32.49],
        '南宁': [108.33, 22.84],
        '营口': [122.18, 40.65],
        '惠州': [114.4, 23.09],
        '江阴': [120.26, 31.91],
        '蓬莱': [120.75, 37.8],
        '韶关': [113.62, 24.84],
        '嘉峪关': [98.289152, 39.77313],
        '广州': [113.23, 23.16],
        '延安': [109.47, 36.6],
        '太原': [112.53, 37.87],
        '清远': [113.01, 23.7],
        '中山': [113.38, 22.52],
        '昆明': [102.73, 25.04],
        '寿光': [118.73, 36.86],
        '盘锦': [122.070714, 41.119997],
        '长治': [113.08, 36.18],
        '深圳': [114.07, 22.62],
        '珠海': [113.52, 22.3],
        '宿迁': [118.3, 33.96],
        '咸阳': [108.72, 34.36],
        '铜川': [109.11, 35.09],
        '平度': [119.97, 36.77],
        '佛山': [113.11, 23.05],
        '海口': [110.35, 20.02],
        '江门': [113.06, 22.61],
        '章丘': [117.53, 36.72],
        '肇庆': [112.44, 23.05],
        '大连': [121.62, 38.92],
        '临汾': [111.5, 36.08],
        '吴江': [120.63, 31.16],
        '石嘴山': [106.39, 39.04],
        '沈阳': [123.38, 41.8],
        '苏州': [120.62, 31.32],
        '茂名': [110.88, 21.68],
        '嘉兴': [120.76, 30.77],
        '长春': [125.35, 43.88],
        '胶州': [120.03336, 36.264622],
        '银川': [106.27, 38.47],
        '张家港': [120.555821, 31.875428],
        '三门峡': [111.19, 34.76],
        '锦州': [121.15, 41.13],
        '南昌': [115.89, 28.68],
        '柳州': [109.4, 24.33],
        '三亚': [109.511909, 18.252847],
        '自贡': [104.778442, 29.33903],
        '吉林': [126.57, 43.87],
        '阳江': [111.95, 21.85],
        '泸州': [105.39, 28.91],
        '西宁': [101.74, 36.56],
        '宜宾': [104.56, 29.77],
        '呼和浩特': [111.65, 40.82],
        '成都': [104.06, 30.67],
        '大同': [113.3, 40.12],
        '镇江': [119.44, 32.2],
        '桂林': [110.28, 25.29],
        '张家界': [110.479191, 29.117096],
        '宜兴': [119.82, 31.36],
        '北海': [109.12, 21.49],
        '西安': [108.95, 34.27],
        '金坛': [119.56, 31.74],
        '东营': [118.49, 37.46],
        '牡丹江': [129.58, 44.6],
        '遵义': [106.9, 27.7],
        '绍兴': [120.58, 30.01],
        '扬州': [119.42, 32.39],
        '常州': [119.95, 31.79],
        '潍坊': [119.1, 36.62],
        '重庆': [106.54, 29.59],
        '台州': [121.420757, 28.656386],
        '南京': [118.78, 32.04],
        '滨州': [118.03, 37.36],
        '贵阳': [106.71, 26.57],
        '无锡': [120.29, 31.59],
        '本溪': [123.73, 41.3],
        '克拉玛依': [84.77, 45.59],
        '渭南': [109.5, 34.52],
        '马鞍山': [118.48, 31.56],
        '宝鸡': [107.15, 34.38],
        '焦作': [113.21, 35.24],
        '句容': [119.16, 31.95],
        '北京': [116.46, 39.92],
        '徐州': [117.2, 34.26],
        '衡水': [115.72, 37.72],
        '包头': [110, 40.58],
        '绵阳': [104.73, 31.48],
        '乌鲁木齐': [87.68, 43.77],
        '枣庄': [117.57, 34.86],
        '杭州': [120.19, 30.26],
        '淄博': [118.05, 36.78],
        '鞍山': [122.85, 41.12],
        '溧阳': [119.48, 31.43],
        '库尔勒': [86.06, 41.68],
        '安阳': [114.35, 36.1],
        '开封': [114.35, 34.79],
        '济南': [117, 36.65],
        '德阳': [104.37, 31.13],
        '温州': [120.65, 28.01],
        '九江': [115.97, 29.71],
        '邯郸': [114.47, 36.6],
        '临安': [119.72, 30.23],
        '兰州': [103.73, 36.03],
        '沧州': [116.83, 38.33],
        '临沂': [118.35, 35.05],
        '南充': [106.110698, 30.837793],
        '天津': [117.2, 39.13],
        '富阳': [119.95, 30.07],
        '泰安': [117.13, 36.18],
        '诸暨': [120.23, 29.71],
        '郑州': [113.65, 34.76],
        '哈尔滨': [126.63, 45.75],
        '聊城': [115.97, 36.45],
        '芜湖': [118.38, 31.33],
        '唐山': [118.02, 39.63],
        '平顶山': [113.29, 33.75],
        '邢台': [114.48, 37.05],
        '德州': [116.29, 37.45],
        '济宁': [116.59, 35.38],
        '荆州': [112.239741, 30.335165],
        '宜昌': [111.3, 30.7],
        '义乌': [120.06, 29.32],
        '丽水': [119.92, 28.45],
        '洛阳': [112.44, 34.7],
        '秦皇岛': [119.57, 39.95],
        '株洲': [113.16, 27.83],
        '石家庄': [114.48, 38.03],
        '莱芜': [117.67, 36.19],
        '常德': [111.69, 29.05],
        '保定': [115.48, 38.85],
        '湘潭': [112.91, 27.87],
        '金华': [119.64, 29.12],
        '岳阳': [113.09, 29.37],
        '长沙': [113, 28.21],
        '衢州': [118.88, 28.97],
        '廊坊': [116.7, 39.53],
        '菏泽': [115.480656, 35.23375],
        '合肥': [117.27, 31.86],
        '武汉': [114.31, 30.52],
        '大庆': [125.03, 46.58]
    };
    geoCoordMap = list(geoCoordMap.keys())

    df['公司地点'] = df['公司地点'].apply(lambda x:x.split('-')[0] if len(re.findall(r'-',x))>0 else x)

    grouped = df.groupby('公司地点')['职位'].count().reset_index()
    grouped = grouped[grouped['公司地点'].isin(geoCoordMap)].sort_values('职位',ascending=False)

    data = [[i['公司地点'], i['职位']] for i in grouped.to_dict(orient='records')]
    return data

def experience_count(df):
    grouped = df.groupby('工作经验')['职位'].count().reset_index()

    grouped = grouped[(grouped['工作经验'].str.contains('经验'))|(grouped['工作经验'].str.contains('在校生'))]

    data = [[i['工作经验'], i['职位']] for i in grouped.to_dict(orient='records')]
    return data

def eduction_count(df):
    grouped = df.groupby('学历要求')['职位'].count().reset_index()

    grouped = grouped[grouped['学历要求'].str.contains('经验')==False]

    data = [[i['学历要求'],i['职位']] for i in grouped.to_dict(orient='records')]
    print(data)
    return data

def welfare_count(df):

    welfare = df['公司福利'].str.split('-').tolist()

    welfare_list = [j for i in welfare for j in i]

    welfare_list = list(set(welfare_list))

    zero_list = pd.DataFrame(np.zeros((len(welfare),len(welfare_list))),columns=welfare_list)

    for i in welfare_list:
        zero_list[i][df['公司福利'].str.contains(i)] = 1

    welfare_nums = zero_list.sum().astype(np.int).sort_values(ascending=False)

    welfare_nums = welfare_nums.reset_index()

    welfare_nums = welfare_nums.replace(['五险','年终奖','绩效奖','体检'],['五险一金','年终奖金','绩效奖金','定期体检'])[:25]
    welfare_nums = welfare_nums.replace(['.*补贴','.*年假','.*旅游','.*双休','.*医疗.*','.*奖金'],['补贴','带薪年假','员工旅游','周末双休','医疗保险','奖金'],regex=True)

    welfare_nums.columns = ['员工福利', '数量']

    grouped = welfare_nums.groupby('员工福利')['数量'].sum().reset_index()
    grouped = grouped.sort_values(by='数量',ascending=False)[:10]
    print(grouped)
    data = [[i['员工福利'],i['数量']] for i in grouped.to_dict(orient='records')]
    print(data)
    return data

if __name__ == '__main__':
    client = MongoClient()
    collection = client['test']['wuyou_job']

    jobs = collection.find({}, {'_id': 0})

    df = pd.DataFrame(jobs)

    df.drop_duplicates(subset=['职位', '公司名称', '公司地点',
                               '公司性质', '薪资', '学历要求', '工作经验', '公司规模', '公司类型', '公司福利', '发布时间'], keep='first')

    df['公司福利'] = df['公司福利'].apply(lambda x: x if len(x) > 0 else np.nan)

    df.dropna(how='any', inplace=True)

    df = salary_process(df)
    print(df.head(1))
    print(df.info())

    data = welfare_count(df)

    conn = pymysql.connect(host='localhost',port=3306,user='root',password='123456',database='wuyou',charset='utf8')

    with conn.cursor() as cursor:
        try:

            sql = 'insert into db_welfare_count(welfare,count) values(%s,%s)'
            result = cursor.executemany(sql,data)
            print(result)
            conn.commit()
        except pymysql.MySQLError as error:
            print(error)
            conn.rollback()
        finally:
            conn.close()
  • 数据库模型文件展示(models.py)
from api_1_0 import db

class ExperienceSalary(db.Model):
    __tablename__ = 'db_experience_salary'
    id = db.Column(db.Integer,primary_key=True,autoincrement=True)
    exp_date = db.Column(db.String(64),nullable=False)
    salary = db.Column(db.Float,nullable=False)

class EducationSalary(db.Model):
    __tablename__ = 'db_education_salary'
    id = db.Column(db.Integer,primary_key=True,autoincrement=True)
    edu_level = db.Column(db.String(64),nullable=False)
    salary = db.Column(db.Float,nullable=False)

class CityJobs(db.Model):
    __tablename__  = 'db_city_jobs'
    id = db.Column(db.Integer,primary_key=True,autoincrement=True)
    city = db.Column(db.String(64),nullable=False)
    count = db.Column(db.Integer,nullable=False)

class ExperienceCount(db.Model):
    __tablename__ = 'db_experience_count'
    id = db.Column(db.Integer,primary_key=True,autoincrement=True)
    exp_date = db.Column(db.String(64),nullable=False)
    count = db.Column(db.Integer,nullable=False)

class EducationCount(db.Model):
    __tablename__ = 'db_education_count'
    id = db.Column(db.Integer,primary_key=True,autoincrement=True)
    edu_level = db.Column(db.String(64),nullable=False)
    count = db.Column(db.Integer,nullable=False)

class WelfareCount(db.Model):
    __tablename__ = 'db_welfare_count'
    id = db.Column(db.Integer,primary_key=True,autoincrement=True)
    welfare = db.Column(db.String(64),nullable=True)
    count = db.Column(db.Integer,nullable=False)
  • 配置文件代码展示(config.py)

class Config(object):

    SECRET_KEY = 'ma5211314'

    SQLALCHEMY_DATABASE_URI = 'mysql://root:123456@localhost:3306/wuyou'
    SQLALCHEMY_TRACK_MODIFICATIONS = True

class DevelopmentConfig(Config):
    DEBUG = True

class ProjectConfig(Config):
    pass

config_map = {'develop':DevelopmentConfig,'project':ProjectConfig}

  • 主工程目录代码展示(api_1_0/_ _init__.py)
from flask import Flask
from config import config_map
from flask_sqlalchemy import SQLAlchemy
import pymysql

pymysql.install_as_MySQLdb()

db = SQLAlchemy()

def create_app(config_name='develop'):
    app = Flask(__name__)

    config = config_map[config_name]
    app.config.from_object(config)

    db.init_app(app)

    from . import view
    app.register_blueprint(view.blue,url_prefix='/show')

    return app

  • 主程序文件代码展示(manager.py)
import flask
from flask_script import Manager
from api_1_0 import create_app,db
from flask_migrate import Migrate,MigrateCommand
from flask import render_template
app = create_app()
manger = Manager(app)

Migrate(app,db)
manger.add_command('db',MigrateCommand)

@app.route('/')
def index():
    return render_template('index.html')

if __name__ == '__main__':
    manger.run()

  • 视图文件代码展示(api_1_0/views/_ _init__.py,show.py)

_ _init__.py

from flask import Blueprint

from api_1_0 import db,models
blue = Blueprint('job',__name__)

from . import show

show.py

from api_1_0.view import blue
from ..models import EducationCount,EducationSalary,ExperienceCount,ExperienceSalary,WelfareCount,CityJobs
from flask import render_template

'''
将数据转换成列表的形式或者是列表嵌套字典的形式
locals()方法能够以字典的形式返回函数内所有声明的变量
'''

@blue.route('/drawLine')
def drawLine():
    exp_data = ExperienceSalary.query.all()
    exp_date = [i.exp_date for i in exp_data]
    exp_sal = [i.salary for i in exp_data]
    edu_data = EducationSalary.query.all()
    edu_level = [i.edu_level for i in edu_data]
    edu_sal = [i.salary for i in edu_data]
    return render_template('showLine.html',**locals())

@blue.route('/drawGeo')
def drawGeo():
    city_jobs_data = CityJobs.query.all()
    city_job = [{'name':i.city,'value':i.count} for i in city_jobs_data]
    return render_template('showGeo.html',**locals())

@blue.route('/drawBar')
def drawBar():
    exp_count_data = ExperienceCount.query.all()
    exp_date = [i.exp_date for i in exp_count_data]
    count = [i.count for i in exp_count_data]
    return render_template('showBar.html',**locals())

@blue.route('/drawPie')
def drawPie():
    wel_count_data = WelfareCount.query.all()
    wel_count = [{'name':i.welfare,'value':i.count} for i in wel_count_data]
    edu_count_date = EducationCount.query.all()
    edu_count = [{'name':i.edu_level,'value':i.count} for i in edu_count_date]
    return render_template('showPie.html', **locals())

  • 首页(index.html)

主页简单创建了四个超链接指向对应的图表

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>首页说明</title>
    <style>
        .container{
            width: 100%;
            height: 600px;
            padding: 40px;
            line-height: 60px;
        }
        ul{
            margin: auto;
            width: 60%;
        }
    </style>
</head>
<body>
    <div class="container">
        <ul>
            <li><a href="http://127.0.0.1:5000/show/drawBar" target="_blank"><h3>工作经验与人数分布图</h3></a></li>
            <li><a href="http://127.0.0.1:5000/show/drawGeo" target="_blank"><h3>python城市需求地理位置分布图</h3></a></li>
            <li><a href="http://127.0.0.1:5000/show/drawLine" target="_blank"><h3>工作经验与薪资关系图&学历与薪资关系图</h3></a></li>
            <li><a href="http://127.0.0.1:5000/show/drawPie" target="_blank"><h3>福利类型数量前十占比图&不同学历与求职人数占比图</h3></a></li>
        </ul>
    </div>
</body>
</html>

Python爬虫实战+数据分析+数据可视化(前程无忧招聘信息)
  • 模板文件代码展示(showBar.html,showLine.html,showGeo.html,showPie.html)

showGeo.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>python城市需求地理位置分布图</title>
    <script src="../static/js/echarts.min.js"></script>
    <script src="../static/js/jquery.min.js"></script>
    <script src="../static/theme/vintage.js"></script>
</head>
<body>
    <div class="chart" style="width: 800px;height: 600px;margin: auto"></div>
<script>
    // 初始化echarts
    var MyCharts = echarts.init(document.querySelector('.chart'),'vintage')
    // 获取视图函数传入数据并转换成json格式
    var city_job = {{ city_job|tojson }}
    var geoCoordMap = {
    '海门':[121.15,31.89],
    '鄂尔多斯':[109.781327,39.608266],
    '招远':[120.38,37.35],
    '舟山':[122.207216,29.985295],
    '齐齐哈尔':[123.97,47.33],
    '盐城':[120.13,33.38],
    '赤峰':[118.87,42.28],
    '青岛':[120.33,36.07],
    '乳山':[121.52,36.89],
    '金昌':[102.188043,38.520089],
    '泉州':[118.58,24.93],
    '莱西':[120.53,36.86],
    '日照':[119.46,35.42],
    '胶南':[119.97,35.88],
    '南通':[121.05,32.08],
    '拉萨':[91.11,29.97],
    '云浮':[112.02,22.93],
    '梅州':[116.1,24.55],
    '文登':[122.05,37.2],
    '上海':[121.48,31.22],
    '攀枝花':[101.718637,26.582347],
    '威海':[122.1,37.5],
    '承德':[117.93,40.97],
    '厦门':[118.1,24.46],
    '汕尾':[115.375279,22.786211],
    '潮州':[116.63,23.68],
    '丹东':[124.37,40.13],
    '太仓':[121.1,31.45],
    '曲靖':[103.79,25.51],
    '烟台':[121.39,37.52],
    '福州':[119.3,26.08],
    '瓦房店':[121.979603,39.627114],
    '即墨':[120.45,36.38],
    '抚顺':[123.97,41.97],
    '玉溪':[102.52,24.35],
    '张家口':[114.87,40.82],
    '阳泉':[113.57,37.85],
    '莱州':[119.942327,37.177017],
    '湖州':[120.1,30.86],
    '汕头':[116.69,23.39],
    '昆山':[120.95,31.39],
    '宁波':[121.56,29.86],
    '湛江':[110.359377,21.270708],
    '揭阳':[116.35,23.55],
    '荣成':[122.41,37.16],
    '连云港':[119.16,34.59],
    '葫芦岛':[120.836932,40.711052],
    '常熟':[120.74,31.64],
    '东莞':[113.75,23.04],
    '河源':[114.68,23.73],
    '淮安':[119.15,33.5],
    '泰州':[119.9,32.49],
    '南宁':[108.33,22.84],
    '营口':[122.18,40.65],
    '惠州':[114.4,23.09],
    '江阴':[120.26,31.91],
    '蓬莱':[120.75,37.8],
    '韶关':[113.62,24.84],
    '嘉峪关':[98.289152,39.77313],
    '广州':[113.23,23.16],
    '延安':[109.47,36.6],
    '太原':[112.53,37.87],
    '清远':[113.01,23.7],
    '中山':[113.38,22.52],
    '昆明':[102.73,25.04],
    '寿光':[118.73,36.86],
    '盘锦':[122.070714,41.119997],
    '长治':[113.08,36.18],
    '深圳':[114.07,22.62],
    '珠海':[113.52,22.3],
    '宿迁':[118.3,33.96],
    '咸阳':[108.72,34.36],
    '铜川':[109.11,35.09],
    '平度':[119.97,36.77],
    '佛山':[113.11,23.05],
    '海口':[110.35,20.02],
    '江门':[113.06,22.61],
    '章丘':[117.53,36.72],
    '肇庆':[112.44,23.05],
    '大连':[121.62,38.92],
    '临汾':[111.5,36.08],
    '吴江':[120.63,31.16],
    '石嘴山':[106.39,39.04],
    '沈阳':[123.38,41.8],
    '苏州':[120.62,31.32],
    '茂名':[110.88,21.68],
    '嘉兴':[120.76,30.77],
    '长春':[125.35,43.88],
    '胶州':[120.03336,36.264622],
    '银川':[106.27,38.47],
    '张家港':[120.555821,31.875428],
    '三门峡':[111.19,34.76],
    '锦州':[121.15,41.13],
    '南昌':[115.89,28.68],
    '柳州':[109.4,24.33],
    '三亚':[109.511909,18.252847],
    '自贡':[104.778442,29.33903],
    '吉林':[126.57,43.87],
    '阳江':[111.95,21.85],
    '泸州':[105.39,28.91],
    '西宁':[101.74,36.56],
    '宜宾':[104.56,29.77],
    '呼和浩特':[111.65,40.82],
    '成都':[104.06,30.67],
    '大同':[113.3,40.12],
    '镇江':[119.44,32.2],
    '桂林':[110.28,25.29],
    '张家界':[110.479191,29.117096],
    '宜兴':[119.82,31.36],
    '北海':[109.12,21.49],
    '西安':[108.95,34.27],
    '金坛':[119.56,31.74],
    '东营':[118.49,37.46],
    '牡丹江':[129.58,44.6],
    '遵义':[106.9,27.7],
    '绍兴':[120.58,30.01],
    '扬州':[119.42,32.39],
    '常州':[119.95,31.79],
    '潍坊':[119.1,36.62],
    '重庆':[106.54,29.59],
    '台州':[121.420757,28.656386],
    '南京':[118.78,32.04],
    '滨州':[118.03,37.36],
    '贵阳':[106.71,26.57],
    '无锡':[120.29,31.59],
    '本溪':[123.73,41.3],
    '克拉玛依':[84.77,45.59],
    '渭南':[109.5,34.52],
    '马鞍山':[118.48,31.56],
    '宝鸡':[107.15,34.38],
    '焦作':[113.21,35.24],
    '句容':[119.16,31.95],
    '北京':[116.46,39.92],
    '徐州':[117.2,34.26],
    '衡水':[115.72,37.72],
    '包头':[110,40.58],
    '绵阳':[104.73,31.48],
    '乌鲁木齐':[87.68,43.77],
    '枣庄':[117.57,34.86],
    '杭州':[120.19,30.26],
    '淄博':[118.05,36.78],
    '鞍山':[122.85,41.12],
    '溧阳':[119.48,31.43],
    '库尔勒':[86.06,41.68],
    '安阳':[114.35,36.1],
    '开封':[114.35,34.79],
    '济南':[117,36.65],
    '德阳':[104.37,31.13],
    '温州':[120.65,28.01],
    '九江':[115.97,29.71],
    '邯郸':[114.47,36.6],
    '临安':[119.72,30.23],
    '兰州':[103.73,36.03],
    '沧州':[116.83,38.33],
    '临沂':[118.35,35.05],
    '南充':[106.110698,30.837793],
    '天津':[117.2,39.13],
    '富阳':[119.95,30.07],
    '泰安':[117.13,36.18],
    '诸暨':[120.23,29.71],
    '郑州':[113.65,34.76],
    '哈尔滨':[126.63,45.75],
    '聊城':[115.97,36.45],
    '芜湖':[118.38,31.33],
    '唐山':[118.02,39.63],
    '平顶山':[113.29,33.75],
    '邢台':[114.48,37.05],
    '德州':[116.29,37.45],
    '济宁':[116.59,35.38],
    '荆州':[112.239741,30.335165],
    '宜昌':[111.3,30.7],
    '义乌':[120.06,29.32],
    '丽水':[119.92,28.45],
    '洛阳':[112.44,34.7],
    '秦皇岛':[119.57,39.95],
    '株洲':[113.16,27.83],
    '石家庄':[114.48,38.03],
    '莱芜':[117.67,36.19],
    '常德':[111.69,29.05],
    '保定':[115.48,38.85],
    '湘潭':[112.91,27.87],
    '金华':[119.64,29.12],
    '岳阳':[113.09,29.37],
    '长沙':[113,28.21],
    '衢州':[118.88,28.97],
    '廊坊':[116.7,39.53],
    '菏泽':[115.480656,35.23375],
    '合肥':[117.27,31.86],
    '武汉':[114.31,30.52],
    '大庆':[125.03,46.58]
};
    var convertData = function (data) {
        var res = [];
        for (var i = 0; i < data.length; i++) {
            var geoCoord = geoCoordMap[data[i].name];
            if (geoCoord) {
                res.push({
                    name: data[i].name,
                    value: geoCoord.concat(data[i].value)
                });
            }
    }
    return res;
}
    $.get('../static/json/map/my_china.json',function (ret){
        echarts.registerMap('chinaMap',ret)
        var option = {
            geo:{
                type:'map',
                map:'chinaMap',
                roam:true,
                label:{
                    show:true,
                    fontSize:10,
                },
                itemStyle: {
                    color: 'Moccasin'
                },
                zoom:2
            },
            series:[
                {
                    data: convertData(city_job),
                    type: 'effectScatter',
                    rippleEffect: {
                        scale: 2 // 设置涟漪动画的缩放比例
                    },
                    itemStyle:{
                        color:'red'
                    },
                    coordinateSystem:'geo',
                    label:{
                        show: true,
                        position:'bottom',
                        formatter:function (rst)
                        {
                            return '城市:'+rst.name+'\n所需岗位:'+rst.value[2]
                        }
                    }

                }
            ],
        }
        MyCharts.setOption(option)
    })

</script>
</body>
</html>

图表可以进行缩放进行详细查看

Python爬虫实战+数据分析+数据可视化(前程无忧招聘信息)

结论:
中国东部以及东南部对Python人才的需求量大,其中以北京、上海、广州、深圳最为突出。

showBar.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>工作经验的人数分布图</title>
    <script src="../static/js/echarts.min.js"></script>
    <script src="../static/theme/vintage.js"></script>
</head>
<body>
<div class="chart" style="width: 800px;height: 600px;margin: auto"></div>
<script>
    // 初始化echarts
    var myCharts = echarts.init(document.querySelector('.chart'),'vintage')
    // 获取视图函数传入数据并转换成json格式
    var exp_date = {{ exp_date|tojson }}
    var count = {{ count|tojson }}
    var option = {
        title:{
          text:'工作经验的人数分布图',
          textStyle:{
              fontSize:21,
              fontFamily:'楷体'
          }
        },
        xAxis:{
           type:'category',
           data:exp_date,
           axisLabel:{
               interval:0,
               rotate:40,
               margin:10
           }
        },
        yAxis:{
            type:'value',
            scale:true
        },
        tooltip:{
          trigger:'item',
          triggerOn:'mousemove',
          formatter:function (rst){
              return '工作经验:'+rst.name+''+'所需岗位:'+rst.value+'个'
          }
        },
        legend:{
            name: ['人数']
        },
        series:[
            {
                name:'人数',
                type:'bar',
                data:count,
                label:{
                    show:true,
                    rotate: 40,
                    position:'top',
                    distance:10
                },

            }
        ]
    }
    myCharts.setOption(option)
</script>
</body>
</html>

Python爬虫实战+数据分析+数据可视化(前程无忧招聘信息)
结论:
Python人才需求主要集中在3-4年工作经验,市场对Python工作经验较高的人才需求较少。

showLine.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>工作经验与薪资关系图&学历与薪资关系图</title>
    <script src="../static/js/echarts.min.js"></script>
    <script src="../static/theme/dark.js"></script>
    <script src="../static/theme/my_theme.js"></script>
    <script src="../static/theme/vintage.js"></script>
    <style>
        .chart-group{
            width: 100%;
            height: 1000px;
            padding: 50px;
            display: flex;
            justify-content: space-evenly;
            margin: auto;
        }
        .chart1{
            width: 600px;
            height: 400px;
        }
        .chart2{
            width: 600px;
            height: 400px;
        }
    </style>
</head>
<body>
<div class="chart-group">
    <div class="chart1"></div>
    <div class="chart2"></div>
</div>
<script>
    // 初始化echarts
    var myCharts1 =  echarts.init(document.querySelector('.chart1'),'vintage')
    var myCharts2 =  echarts.init(document.querySelector('.chart2'),'vintage')
    // 获取视图函数传入数据并转换成json格式
    var exp_date = {{ exp_date|tojson }}
    var exp_sal = {{ exp_sal|tojson }}
    var edu_level = {{ edu_level|tojson }}
    var edu_sal = {{ edu_sal|tojson }}
    function getOption(category,data,desc){
        var option = {
        title:{
          text : desc,
          textStyle:{
              fontFamily:'楷体',
              fontSize:21
          }
        },
        xAxis:{
            type:'category',
            data:category,
            axisLabel:{
                interval:0,
                rotate:40,
                margin:10
            }
        },
        yAxis:{
            type:'value',
            scale:true
        },
        legend:{
          name:['工资水平']
        },
        tooltip:{
            trigger:'axis',
            triggerOn:'mousemove',
            formatter: function (rst){
                if(desc=="工作经验与薪资关系图"){
                    return '工作经验:'+rst[0].name+''+'每月薪资:'+rst[0].value+'k/月'
                }else{
                    return '学历:'+rst[0].name+''+'每月薪资:'+rst[0].value+'k/月'
                }
            }
        },
        series:[
            {
                name:'工资水平',
                type:'line',
                data:data,
                label:{
                    show:true
                },
                smooth:true,
                markPoint:{
                    data:[
                        {
                            name:'最大值',
                            type:"max",
                            symbolSize:[40,40],//容器大小
                            symbolOffset:[0,-20],//位置偏移,
                            label:{
                                show:true,
                                formatter:function (rst){
                                    return rst.name
                                }
                            }
                        },
                        {
                            name:'最小值',
                            type:'min',
                            symbolSize:[40,40],//容器大小
                            symbolOffset:[0,-20],//位置偏移
                            label:{
                                show:true,
                                formatter:function (rst){
                                    return rst.name
                                }
                            }
                        }
                    ]
                },
                markLine:{
                    data:[
                        {
                            name:'平均值',
                            type:'average',
                            label:{
                                formatter:function (rst)
                                {
                                    return rst.name+":\n"+rst.value
                                }
                            }
                        }
                    ]
                }
            }
        ]
    }
        return option
    }
    var option1 = getOption(exp_date,exp_sal,'工作经验与薪资关系图')
    var option2 = getOption(edu_level,edu_sal,'学历与薪资关系图')
    myCharts1.setOption(option1)
    myCharts2.setOption(option2)
</script>
</body>
</html>

Python爬虫实战+数据分析+数据可视化(前程无忧招聘信息)

结论:
工作经验和学历与薪资呈正相关,工作经验越丰富工资越高,学历越高工资越高。

showPie.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>福利类型数量前十占比&不同学历与求职人数占比</title>
    <script src="../static/js/echarts.min.js"></script>
    <script src="../static/theme/dark.js"></script>
    <script src="../static/theme/my_theme.js"></script>
    <script src="../static/theme/vintage.js"></script>
    <style>
        .chart-group{
            width: 100%;
            height: 1000px;
            padding: 50px;
            display: flex;
            justify-content: space-evenly;
            margin: auto;
        }
        .chart1{
            width: 600px;
            height: 400px;
        }
        .chart2{
            width: 600px;
            height: 400px;
        }
    </style>
</head>
<body>
<div class="chart-group">
    <div class="chart1"></div>
    <div class="chart2"></div>
</div>
<script>
    // 初始化echarts
    var myCharts1 =  echarts.init(document.querySelector('.chart1'),'vintage')
    var myCharts2 =  echarts.init(document.querySelector('.chart2'),'vintage')
    // 获取视图函数传入数据并转换成json格式
    var wel_count = {{ wel_count|tojson }}
    var edu_count = {{ edu_count|tojson }}
    var value = []
    function getOption(data,desc){
        var option = {
        title:{
          text : desc,
          textStyle:{
              fontFamily:'楷体',
              fontSize:21
          },

        },
        tooltip:{
          trigger:'item',
          triggerOn:'mousemove',
          formatter:function(rst){
              if(desc=="工作经验的人数分布图"){
                    return '福利类型:'+rst.name+''+'企业数量:'+rst.value+'个'+''+'占比:'+rst.percent+"%"
                }else{
                    return '学历:'+rst.name+''+'所需岗位:'+rst.value+'人'+''+'占比:'+rst.percent+"%"
                }
          }
        },
        legend:{
            name:['人数占比'],
            orient: 'vertical',
            bottom:10,
            right:10,

        },
        series:[
            {
                name:'人数占比',
                type:'pie',
                data:data,
                label:{
                    show:true
                },
                roseType:'radius',
                selectedMode:'multiple',
                selectedOffset: 10
            }
        ]
    }
        return option
    }
    var option1 = getOption(wel_count,'福利类型数量前十占比')
    var option2 = getOption(edu_count,'学历人数占比图')
    myCharts1.setOption(option1)
    myCharts2.setOption(option2)
</script>
</body>
</html>

Python爬虫实战+数据分析+数据可视化(前程无忧招聘信息)
结论:
福利类型前十中,奖金的占比超过了总占比的四分之一;学历人数中Python岗位对本科人数的需求占据总需求半数以上,对高学历和低学历的需求相对较少。

以下项目源码,希望能够帮助你们,如有疑问,下方评论
flask项目代码链接

Original: https://blog.csdn.net/qq_45821420/article/details/112663825
Author: 专业的小马哥
Title: Python爬虫实战+数据分析+数据可视化(前程无忧招聘信息)

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/749586/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球