我有大量的数据(几个演出)我需要用Python写一个zip文件.我不能一次将它全部加载到内存中以传递给ZipFile的.writestr方法,我真的不想使用临时文件将它全部输出到磁盘然后再读回来.
有没有办法将生成器或类文件对象提供给ZipFile库?或者是否有某些原因似乎不支持此功能?
通过zip文件,我的意思是zip文件.正如Python zipfile包中所支持的那样.
唯一的解决方案是重写用于压缩文件以从缓冲区读取的方法.将它添加到标准库中是微不足道的; 我有点惊讶它还没有完成.我认为整个界面需要彻底检查,并且这似乎阻止了任何增量改进.
import zipfile, zlib, binascii, struct class BufferedZipFile(zipfile.ZipFile): def writebuffered(self, zipinfo, buffer): zinfo = zipinfo zinfo.file_size = file_size = 0 zinfo.flag_bits = 0x00 zinfo.header_offset = self.fp.tell() self._writecheck(zinfo) self._didModify = True zinfo.CRC = CRC = 0 zinfo.compress_size = compress_size = 0 self.fp.write(zinfo.FileHeader()) if zinfo.compress_type == zipfile.ZIP_DEFLATED: cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) else: cmpr = None while True: buf = buffer.read(1024 * 8) if not buf: break file_size = file_size + len(buf) CRC = binascii.crc32(buf, CRC) & 0xffffffff if cmpr: buf = cmpr.compress(buf) compress_size = compress_size + len(buf) self.fp.write(buf) if cmpr: buf = cmpr.flush() compress_size = compress_size + len(buf) self.fp.write(buf) zinfo.compress_size = compress_size else: zinfo.compress_size = file_size zinfo.CRC = CRC zinfo.file_size = file_size position = self.fp.tell() self.fp.seek(zinfo.header_offset + 14, 0) self.fp.write(struct.pack("
2> haridsv..:我接受了Chris B.的回答并创建了一个完整的解决方案.这是以防其他人感兴趣的:
import os import threading from zipfile import * import zlib, binascii, struct class ZipEntryWriter(threading.Thread): def __init__(self, zf, zinfo, fileobj): self.zf = zf self.zinfo = zinfo self.fileobj = fileobj zinfo.file_size = 0 zinfo.flag_bits = 0x00 zinfo.header_offset = zf.fp.tell() zf._writecheck(zinfo) zf._didModify = True zinfo.CRC = 0 zinfo.compress_size = compress_size = 0 zf.fp.write(zinfo.FileHeader()) super(ZipEntryWriter, self).__init__() def run(self): zinfo = self.zinfo zf = self.zf file_size = 0 CRC = 0 if zinfo.compress_type == ZIP_DEFLATED: cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) else: cmpr = None while True: buf = self.fileobj.read(1024 * 8) if not buf: self.fileobj.close() break file_size = file_size + len(buf) CRC = binascii.crc32(buf, CRC) if cmpr: buf = cmpr.compress(buf) compress_size = compress_size + len(buf) zf.fp.write(buf) if cmpr: buf = cmpr.flush() compress_size = compress_size + len(buf) zf.fp.write(buf) zinfo.compress_size = compress_size else: zinfo.compress_size = file_size zinfo.CRC = CRC zinfo.file_size = file_size position = zf.fp.tell() zf.fp.seek(zinfo.header_offset + 14, 0) zf.fp.write(struct.pack("