zl程序教程

您现在的位置是:首页 >  后端

当前栏目

python支持断点续传的多线程下载示例

Python多线程下载 示例 支持 断点续传
2023-06-13 09:15:16 时间

复制代码代码如下:


#!/usr/bin/envpython
#coding=utf-8

from__future__importunicode_literals

frommultiprocessing.dummyimportPoolasThreadPool
importthreading

importos
importsys
importcPickle
fromcollectionsimportnamedtuple
importurllib2
fromurlparseimporturlsplit

importtime


#globallock
lock=threading.Lock()


#defaultparameters
defaults=dict(thread_count=10,
   buffer_size=10*1024,
   block_size=1000*1024)


defprogress(percent,width=50):
   print"%s%d%%\r"%(("%%-%ds"%width)%(width*percent/100*"="),percent),
   ifpercent>=100:
       print
       sys.stdout.flush()


defwrite_data(filepath,data):
   withopen(filepath,"wb")asoutput:
       cPickle.dump(data,output)


defread_data(filepath):
   withopen(filepath,"rb")asoutput:
       returncPickle.load(output)


FileInfo=namedtuple("FileInfo","urlnamesizelastmodified")


defget_file_info(url):
   classHeadRequest(urllib2.Request):
       defget_method(self):
           return"HEAD"
   res=urllib2.urlopen(HeadRequest(url))
   res.read()
   headers=dict(res.headers)
   size=int(headers.get("content-length",0))
   lastmodified=headers.get("last-modified","")
   name=None
   ifheaders.has_key("content-disposition"):
       name=headers["content-disposition"].split("filename=")[1]
       ifname[0]=="""orname[0]==""":
           name=name[1:-1]
   else:
       name=os.path.basename(urlsplit(url)[2])

   returnFileInfo(url,name,size,lastmodified)


defdownload(url,output,
       thread_count=defaults["thread_count"],
       buffer_size=defaults["buffer_size"],
       block_size=defaults["block_size"]):
   #getlatestfileinfo
   file_info=get_file_info(url)

   #initpath
   ifoutputisNone:
       output=file_info.name
   workpath="%s.ing"%output
   infopath="%s.inf"%output

   #splitfiletoblocks.everyblockisaarray[start,offset,end],
   #theneachgreenletdownloadfilepartaccordingtoablock,and
   #updatetheblock"offset.
   blocks=[]

   ifos.path.exists(infopath):
       #loadblocks
       _x,blocks=read_data(infopath)
       if(_x.url!=urlor
               _x.name!=file_info.nameor
               _x.lastmodified!=file_info.lastmodified):
           blocks=[]

   iflen(blocks)==0:
       #setblocks
       ifblock_size>file_info.size:
           blocks=[[0,0,file_info.size]]
       else:
           block_count,remain=divmod(file_info.size,block_size)
           blocks=[[i*block_size,i*block_size,(i+1)*block_size-1]foriinrange(block_count)]
           blocks[-1][-1]+=remain
       #createnewblankworkpath
       withopen(workpath,"wb")asfobj:
           fobj.write("")

   print"Downloading%s"%url
   #startmonitor
   threading.Thread(target=_monitor,args=(infopath,file_info,blocks)).start()

   #startdownloading
   withopen(workpath,"rb+")asfobj:
       args=[(url,blocks[i],fobj,buffer_size)foriinrange(len(blocks))ifblocks[i][1]<blocks[i][2]]

       ifthread_count>len(args):
           thread_count=len(args)

       pool=ThreadPool(thread_count)
       pool.map(_worker,args)
       pool.close()
       pool.join()


   #renameworkpathtooutput
   ifos.path.exists(output):
       os.remove(output)
   os.rename(workpath,output)

   #deleteinfopath
   ifos.path.exists(infopath):
       os.remove(infopath)

   assertall([block[1]>=block[2]forblockinblocks])isTrue


def_worker((url,block,fobj,buffer_size)):
   req=urllib2.Request(url)
   req.headers["Range"]="bytes=%s-%s"%(block[1],block[2])
   res=urllib2.urlopen(req)

   while1:
       chunk=res.read(buffer_size)
       ifnotchunk:
           break
       withlock:
           fobj.seek(block[1])
           fobj.write(chunk)
           block[1]+=len(chunk)


def_monitor(infopath,file_info,blocks):
   while1:
       withlock:
           percent=sum([block[1]-block[0]forblockinblocks])*100/file_info.size
           progress(percent)
           ifpercent>=100:
               break
           write_data(infopath,(file_info,blocks))
       time.sleep(2)


if__name__=="__main__":
   importargparse
   parser=argparse.ArgumentParser(description="Downloadfilebymulti-threads.")
   parser.add_argument("url",type=str,help="urlofthedownloadfile")
   parser.add_argument("-o",type=str,default=None,dest="output",help="outputfile")
   parser.add_argument("-t",type=int,default=defaults["thread_count"],dest="thread_count",help="threadcountstodownloading")
   parser.add_argument("-b",type=int,default=defaults["buffer_size"],dest="buffer_size",help="buffersize")
   parser.add_argument("-s",type=int,default=defaults["block_size"],dest="block_size",help="blocksize")

   argv=sys.argv[1:]

   iflen(argv)==0:
       argv=["https://eyes.nasa.gov/eyesproduct/EYES/os/win"]

   args=parser.parse_args(argv)

   start_time=time.time()
   download(args.url,args.output,args.thread_count,args.buffer_size,args.block_size)
   print"times:%ds"%int(time.time()-start_time)