您现在的位置是：首页 > 后端

当前栏目

python支持断点续传的多线程下载示例

Python 多线程下载示例支持断点续传

2023-06-13 09:15:16 时间

复制代码代码如下:

#!/usr/bin/envpython
#coding=utf-8

from__future__importunicode_literals

frommultiprocessing.dummyimportPoolasThreadPool
importthreading

importos
importsys
importcPickle
fromcollectionsimportnamedtuple
importurllib2
fromurlparseimporturlsplit

importtime

#globallock
lock=threading.Lock()

#defaultparameters
defaults=dict(thread_count=10,
buffer_size=10*1024,
block_size=1000*1024)

defprogress(percent,width=50):
   print"%s%d%%\r"%(("%%-%ds"%width)%(width*percent/100*"="),percent),
   ifpercent>=100:
       print
       sys.stdout.flush()

defwrite_data(filepath,data):
withopen(filepath,"wb")asoutput:
cPickle.dump(data,output)

defread_data(filepath):
withopen(filepath,"rb")asoutput:
returncPickle.load(output)

FileInfo=namedtuple("FileInfo","urlnamesizelastmodified")

defget_file_info(url):
   classHeadRequest(urllib2.Request):
       defget_method(self):
           return"HEAD"
   res=urllib2.urlopen(HeadRequest(url))
   res.read()
   headers=dict(res.headers)
   size=int(headers.get("content-length",0))
   lastmodified=headers.get("last-modified","")
   name=None
   ifheaders.has_key("content-disposition"):
       name=headers["content-disposition"].split("filename=")[1]
       ifname[0]=="""orname[0]==""":
           name=name[1:-1]
   else:
       name=os.path.basename(urlsplit(url)[2])

returnFileInfo(url,name,size,lastmodified)

defdownload(url,output,
       thread_count=defaults["thread_count"],
       buffer_size=defaults["buffer_size"],
       block_size=defaults["block_size"]):
   #getlatestfileinfo
   file_info=get_file_info(url)

   #initpath
   ifoutputisNone:
       output=file_info.name
   workpath="%s.ing"%output
   infopath="%s.inf"%output

   #splitfiletoblocks.everyblockisaarray[start,offset,end],
   #theneachgreenletdownloadfilepartaccordingtoablock,and
   #updatetheblock"offset.
   blocks=[]

   ifos.path.exists(infopath):
       #loadblocks
       _x,blocks=read_data(infopath)
       if(_x.url!=urlor
               _x.name!=file_info.nameor
               _x.lastmodified!=file_info.lastmodified):
           blocks=[]

   iflen(blocks)==0:
       #setblocks
       ifblock_size>file_info.size:
           blocks=[[0,0,file_info.size]]
       else:
           block_count,remain=divmod(file_info.size,block_size)
           blocks=[[i*block_size,i*block_size,(i+1)*block_size-1]foriinrange(block_count)]
           blocks[-1][-1]+=remain
       #createnewblankworkpath
       withopen(workpath,"wb")asfobj:
           fobj.write("")

   print"Downloading%s"%url
   #startmonitor
   threading.Thread(target=_monitor,args=(infopath,file_info,blocks)).start()

   #startdownloading
   withopen(workpath,"rb+")asfobj:
       args=[(url,blocks[i],fobj,buffer_size)foriinrange(len(blocks))ifblocks[i][1]<blocks[i][2]]

ifthread_count>len(args):
thread_count=len(args)

       pool=ThreadPool(thread_count)
       pool.map(_worker,args)
       pool.close()
       pool.join()

   #renameworkpathtooutput
   ifos.path.exists(output):
       os.remove(output)
   os.rename(workpath,output)

   #deleteinfopath
   ifos.path.exists(infopath):
       os.remove(infopath)

assertall([block[1]>=block[2]forblockinblocks])isTrue

def_worker((url,block,fobj,buffer_size)):
   req=urllib2.Request(url)
   req.headers["Range"]="bytes=%s-%s"%(block[1],block[2])
   res=urllib2.urlopen(req)

   while1:
       chunk=res.read(buffer_size)
       ifnotchunk:
           break
       withlock:
           fobj.seek(block[1])
           fobj.write(chunk)
           block[1]+=len(chunk)

def_monitor(infopath,file_info,blocks):
   while1:
       withlock:
           percent=sum([block[1]-block[0]forblockinblocks])*100/file_info.size
           progress(percent)
           ifpercent>=100:
               break
           write_data(infopath,(file_info,blocks))
       time.sleep(2)

if__name__=="__main__":
   importargparse
   parser=argparse.ArgumentParser(description="Downloadfilebymulti-threads.")
   parser.add_argument("url",type=str,help="urlofthedownloadfile")
   parser.add_argument("-o",type=str,default=None,dest="output",help="outputfile")
   parser.add_argument("-t",type=int,default=defaults["thread_count"],dest="thread_count",help="threadcountstodownloading")
   parser.add_argument("-b",type=int,default=defaults["buffer_size"],dest="buffer_size",help="buffersize")
   parser.add_argument("-s",type=int,default=defaults["block_size"],dest="block_size",help="blocksize")

argv=sys.argv[1:]

iflen(argv)==0:
argv=["https://eyes.nasa.gov/eyesproduct/EYES/os/win"]

args=parser.parse_args(argv)

   start_time=time.time()
   download(args.url,args.output,args.thread_count,args.buffer_size,args.block_size)
   print"times:%ds"%int(time.time()-start_time)

猜你喜欢

IBM发布“比盐小”的微型电脑，未来将结合区块链实现物品溯源
快速、高效、稳定——惠普安装linux体验分享（惠普安装linux）
Surfing the Oracle IP Ocean With Confidence（oracleip）
Oracle主表更新子表的技巧分享（oracle主表更新子表）
Oracle数据库中的表锁机制（oracle中表锁）
病例分享|骨髓移植治愈先天遗传病WAS
国网企标B接口记录（附件）：视频监控系统地址编码
MySQL优化：构建高性能内存数据库（mysql内存数据库）
ORA-28373: missing ENCRYPTION clause for encrypted tablespace ORACLE 报错故障修复远程处理
解决加载不到模块mssql的问题（加载不到模块mssql）
Sugarhosts服务器搭ChatGPT代理环境（图文教程）
Linux稳定性持久，一路稳当前行（linux稳定版本）
Windows10配置PowerShell
MySQL查询：从用户中解密真相。（mysql查询用户）
感谢大家支持，SRS成为20k Star开源项目
现在的荣耀，像极了曾经的华为

zl程序教程

当前栏目

python支持断点续传的多线程下载示例

相关文章