python支持断点续传的多线程下载示例
#!/usr/bin/envpython
#coding=utf-8
from__future__importunicode_literals
frommultiprocessing.dummyimportPoolasThreadPool
importthreading
importos
importsys
importcPickle
fromcollectionsimportnamedtuple
importurllib2
fromurlparseimporturlsplit
importtime
#globallock
lock=threading.Lock()
#defaultparameters
defaults=dict(thread_count=10,
buffer_size=10*1024,
block_size=1000*1024)
defprogress(percent,width=50):
print"%s%d%%\r"%(("%%-%ds"%width)%(width*percent/100*"="),percent),
ifpercent>=100:
print
sys.stdout.flush()
defwrite_data(filepath,data):
withopen(filepath,"wb")asoutput:
cPickle.dump(data,output)
defread_data(filepath):
withopen(filepath,"rb")asoutput:
returncPickle.load(output)
FileInfo=namedtuple("FileInfo","urlnamesizelastmodified")
defget_file_info(url):
classHeadRequest(urllib2.Request):
defget_method(self):
return"HEAD"
res=urllib2.urlopen(HeadRequest(url))
res.read()
headers=dict(res.headers)
size=int(headers.get("content-length",0))
lastmodified=headers.get("last-modified","")
name=None
ifheaders.has_key("content-disposition"):
name=headers["content-disposition"].split("filename=")[1]
ifname[0]=="""orname[0]==""":
name=name[1:-1]
else:
name=os.path.basename(urlsplit(url)[2])
returnFileInfo(url,name,size,lastmodified)
defdownload(url,output,
thread_count=defaults["thread_count"],
buffer_size=defaults["buffer_size"],
block_size=defaults["block_size"]):
#getlatestfileinfo
file_info=get_file_info(url)
#initpath
ifoutputisNone:
output=file_info.name
workpath="%s.ing"%output
infopath="%s.inf"%output
#splitfiletoblocks.everyblockisaarray[start,offset,end],
#theneachgreenletdownloadfilepartaccordingtoablock,and
#updatetheblock"offset.
blocks=[]
ifos.path.exists(infopath):
#loadblocks
_x,blocks=read_data(infopath)
if(_x.url!=urlor
_x.name!=file_info.nameor
_x.lastmodified!=file_info.lastmodified):
blocks=[]
iflen(blocks)==0:
#setblocks
ifblock_size>file_info.size:
blocks=[[0,0,file_info.size]]
else:
block_count,remain=divmod(file_info.size,block_size)
blocks=[[i*block_size,i*block_size,(i+1)*block_size-1]foriinrange(block_count)]
blocks[-1][-1]+=remain
#createnewblankworkpath
withopen(workpath,"wb")asfobj:
fobj.write("")
print"Downloading%s"%url
#startmonitor
threading.Thread(target=_monitor,args=(infopath,file_info,blocks)).start()
#startdownloading
withopen(workpath,"rb+")asfobj:
args=[(url,blocks[i],fobj,buffer_size)foriinrange(len(blocks))ifblocks[i][1]<blocks[i][2]]
ifthread_count>len(args):
thread_count=len(args)
pool=ThreadPool(thread_count)
pool.map(_worker,args)
pool.close()
pool.join()
#renameworkpathtooutput
ifos.path.exists(output):
os.remove(output)
os.rename(workpath,output)
#deleteinfopath
ifos.path.exists(infopath):
os.remove(infopath)
assertall([block[1]>=block[2]forblockinblocks])isTrue
def_worker((url,block,fobj,buffer_size)):
req=urllib2.Request(url)
req.headers["Range"]="bytes=%s-%s"%(block[1],block[2])
res=urllib2.urlopen(req)
while1:
chunk=res.read(buffer_size)
ifnotchunk:
break
withlock:
fobj.seek(block[1])
fobj.write(chunk)
block[1]+=len(chunk)
def_monitor(infopath,file_info,blocks):
while1:
withlock:
percent=sum([block[1]-block[0]forblockinblocks])*100/file_info.size
progress(percent)
ifpercent>=100:
break
write_data(infopath,(file_info,blocks))
time.sleep(2)
if__name__=="__main__":
importargparse
parser=argparse.ArgumentParser(description="Downloadfilebymulti-threads.")
parser.add_argument("url",type=str,help="urlofthedownloadfile")
parser.add_argument("-o",type=str,default=None,dest="output",help="outputfile")
parser.add_argument("-t",type=int,default=defaults["thread_count"],dest="thread_count",help="threadcountstodownloading")
parser.add_argument("-b",type=int,default=defaults["buffer_size"],dest="buffer_size",help="buffersize")
parser.add_argument("-s",type=int,default=defaults["block_size"],dest="block_size",help="blocksize")
argv=sys.argv[1:]
iflen(argv)==0:
argv=["https://eyes.nasa.gov/eyesproduct/EYES/os/win"]
args=parser.parse_args(argv)
start_time=time.time()
download(args.url,args.output,args.thread_count,args.buffer_size,args.block_size)
print"times:%ds"%int(time.time()-start_time)
相关文章
- python写入txt文件中文乱码_python中怎么输入文件
- 苹果电脑python官网下载步骤-Python下载和安装图文教程[超详细]
- python常用面试题_Python+Selenium 常见面试题整理[通俗易懂]
- python对文件的操作
- python–threading多线程总结[通俗易懂]
- Python&R语言-python和r相遇
- Python 基础语法(一)「建议收藏」
- java和python哪个值得学-学java好还是Python好?
- 【说站】python双向链表的概念介绍
- 9个应知应会的单行Python代码
- python分析人口出生率代码_国家统计局居然也能用的上Python?人口数据Python脚本了解一下?…[通俗易懂]
- python中copy.deepcopy_Python eval
- 软件测试|Python的流程控制,你真的会了吗?(一)
- Python多线程结合队列下载百度音乐代码详解编程语言
- python下载大文件代码详解编程语言
- python之多线程队列详解编程语言
- Python结合MySQL实现信息交互(python与mysql交互)
- 用一个开源工具实现多线程 Python 程序的可视化
- Python轻松连接SQL Server数据库(python连接sqlserver)
- y r使用 SQLServer 全面支持 Python、R,实现数据分析的极致体验(sqlserver p)
- 用Python仿写MSSQL 编程体验更有趣(python仿mssql)
- Python连接MySQL数据库:初学者指南(python入mysql)
- Python操作MySQL数据库的必备模块mysqlpython(mysql_python)
- python多线程编程方式分析示例详解
- python使用urllib模块开发的多线程豆瓣小站mp3下载器
- Python入门篇之正则表达式