Suite à un ticket sur le bug tracker de Adopy, je décide de recoder la méthode s’occupant du téléchargement. L’objectif est simple : être capable de reprendre un téléchargement là où il s’est arrêté en cas de problème (et dans le futur, être capable de le reprendre après l’avoir mis en pause volontairement).
Avoir du code propre, entièrement fonctionnel et performant n’étant pas chose aisée dans ce cas, je m’oriente vers une solution alternative : modifier la méthode retrive() de la class URLopener (de urllib).
Et afin de ne pas modifier l’intégralité de la lib, je procède comme ceci :
from urllib import quote_plus, FancyURLopener, URLopener, unwrap,\ toBytes, splittype def retrieve(self, url, filename=None, reporthook=None, data=None, maxtries=5, r_range=None): """retrieve(url) returns (filename, headers) for a local object or (tempfilename, headers) for a remote object. If it fails, it relaunches itself until the dl is complete or maxtries == 0 (maxtries == -1 for unlimited tries). Range tuple(start, end) indicates the range of the remote object we have to retrieve (ignored for local files)""" if maxtries < -1: raise ValueError, 'maxtries must be at least equal with -1' url = unwrap(toBytes(url)) if self.tempcache and url in self.tempcache: return self.tempcache[url] type, url1 = splittype(url) if filename is None and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() fp.close() return url2pathname(splithost(url1)[1]), hdrs except IOError, msg: pass if not r_range is None: try: self.addheader(('Range', 'bytes=%d-%d' % r_range)) except TypeError: raise ValueError, 'r_range argument must be a tuple of two int : (start, end)' fp = self.open(url, data) try: headers = fp.info() if filename: tfp = open(filename, 'ab') else: import tempfile garbage, path = splittype(url) garbage, path = splithost(path or "") path, garbage = splitquery(path or "") path, garbage = splitattr(path or "") suffix = os.path.splitext(path)[1] (fd, filename) = tempfile.mkstemp(suffix) self.__tempfiles.append(filename) tfp = os.fdopen(fd, 'ab') try: result = filename, headers if self.tempcache is not None: self.tempcache[url] = result bs = 1024*8 size = -1 read = 0 blocknum = 0 if "content-length" in headers: size = int(headers["Content-Length"]) elif r_range is not None: size = r_range[1] if reporthook: reporthook(blocknum, bs, size) while 1: block = fp.read(bs) if block == "": break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) finally: tfp.close() finally: fp.close() # raise exception if actual size does not match content-length # header and if maxtries <= 0 if size >= 0 and read < size: if maxtries > 0 or maxtries == -1: self.retrieve(url, filename, reporthook, data, maxtries if maxtries == -1 else maxtries-1, r_range=(read, size)) else: raise ContentTooShortError("retrieval incomplete: got only %i out " "of %i bytes" % (read, size), result) return result #to use our function in the opener URLopener.retrieve = retrieve
N’hésitez pas à me donner votre avis =)