python 爬取媒体文件(无防火墙)

#coding = utf-8
import requests
import pandas as pd
import os,time

root_path = './根目录/'
input_file = '码表.xlsx'
url = 'http://api.map.baidu.com/geocoder/v2/?id = %s&local=1'
fail_file = root_path +'fail.csv'

class Auto_down:
    def __init__(self):
        print("--start--")
    def read_excel(self):
        # pd.read_excel(converters = {u'列名':str})按照str类型读入,不会出现0被舍去的情况
        sheet = pd.read_excel(input_file,converters = {u'列名':str},sheetname = '子表名')
        cust_Id = sheet['cust_id']
        void_Id = sheet['void_id']

        for i in range(len(cust_Id)):
            self.create_file(cust_Id[i],void_Id[i])

    def download_voice(self,custid_filename,voiceid):
        print(voiceid)
        try:
            r = requests.get(url%voiceid)
            return_code = r.status_code
            if return_code == 200:
                voice_filename = '%s/%s.mp3'%(custid_filename,voiceid)
                with open(voice_filename, 'wb') as fd:
                    fd.write(r.content)
            else:
                with open(fail_file, 'a+') as ff:
                    ff.write(voiceid + '\n')
        except:
            print('request url is fail!!')
            with open(fail_file, 'a+') as ff:
                ff.write(voiceid + '\n')

    def create_file(self, custid, voiceid):
        custid_filename = root_path + custid
        if not os.path.exists(custid_filename):
            os.mkdir(custid_filename)
        else:
            self.download_voice(custid_filename,voiceid)

if __name__ == '__main__':
    tStart = time.clock()

    AD = Auto_down()
    AD.read_excel()

    tEnd = time.clock()

    print("%s s"%(tEnd - tStart))
#coding = utf-8
import requests

root_path = "./下载/"

url = ""
fail_file = root_path + 'fail.csv'
voiceid = '11111'
for i in range(3):
    try:
        r = requests.get(url)
        return_code = r.status_code
        if r.status_code == 200:
            voice_filename = root_path + 'dada.fdf'
            with open(voice_filename,'wb') as fd:
                fd.write(r.content)
        else:
            with open(fail_file,'a+') as ff:
                ff.write(voiceid + '\n')
    except:
        prin("fail")
        with open(fail_file,'a+') as ff:
            ff.write(voiceid + '\n')

r = request.get(url)
r.status_code 获取响应状态码
r.text 获取响应内容
r.headers 获取响应头
r.encoding 获取响应编码
r.content 获取二进制响应内容
r.json() 获取JSON响应内容

Original: https://www.cnblogs.com/smuxiaolei/p/10847369.html
Author: 懵懂的菜鸟
Title: python 爬取媒体文件(无防火墙)

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/10746/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

免费咨询
免费咨询
扫码关注
扫码关注
联系站长

站长Johngo!

大数据和算法重度研究者!

持续产出大数据、算法、LeetCode干货,以及业界好资源!

2022012703491714

微信来撩,免费咨询:xiaozhu_tec

分享本页
返回顶部