############################################################################## # # Copyright (c) 2003 Zope Corporation and Contributors. # All Rights Reserved. # # This software is subject to the provisions of the Zope Public License, # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # FOR A PARTICULAR PURPOSE # ############################################################################## # # File-based ZODB storage # # Files are arranged as follows. # # - The first 4 bytes are a file identifier. # # - The rest of the file consists of a sequence of transaction # "records". # # A transaction record consists of: # # - 8-byte transaction id, which is also a time stamp. # # - 8-byte transaction record length - 8. # # - 1-byte status code # ' ' (a blank) completed transaction that hasn't been packed # 'p' completed transaction that has been packed # 'c' checkpoint -- a transaction in progress, at the end of the file; # it's been thru vote() but not finish(); if finish() completes # normally, it will be overwritten with a blank; if finish() dies # (e.g., out of disk space), cleanup code will try to truncate # the file to chop off this incomplete transaction # 'u' uncertain; no longer used; was previously used to record something # about non-transactional undo # # - 2-byte length of user name # # - 2-byte length of description # # - 2-byte length of extension attributes # # - user name # # - description # # - extension attributes # # * A sequence of data records # # - 8-byte redundant transaction length -8 # # A data record consists of # # - 8-byte oid. # # - 8-byte tid, which matches the transaction id in the transaction record. # # - 8-byte previous-record file-position. # # - 8-byte beginning of transaction record file position. # # - 2-byte version length # # - 8-byte data length # # ? 8-byte position of non-version data record # (if version length > 0) # # ? 8-byte position of previous record in this version # (if version length > 0) # # ? version string # (if version length > 0) # # ? data # (data length > 0) # # ? 8-byte position of data record containing data # (data length == 0) # # Note that the lengths and positions are all big-endian. # Also, the object ids time stamps are big-endian, so comparisons # are meaningful. # # Version handling # # There isn't a separate store for versions. Each record has a # version field, indicating what version it is in. The records in a # version form a linked list. Each record that has a non-empty # version string has a pointer to the previous record in the version. # Version back pointers are retained *even* when versions are # committed or aborted or when transactions are undone. # # There is a notion of "current" version records, which are the # records in a version that are the current records for their # respective objects. When a version is comitted, the current records # are committed to the destination version. When a version is # aborted, the current records are aborted. # # When committing or aborting, we search backward through the linked # list until we find a record for an object that does not have a # current record in the version. If we find a record for which the # non-version pointer is the same as the previous pointer, then we # forget that the corresponding object had a current record in the # version. This strategy allows us to avoid searching backward through # previously committed or aborted version records. # # Of course, we ignore records in undone transactions when committing # or aborting. # # Backpointers # # When we commit or abort a version, we don't copy (or delete) # and data. Instead, we write records with back pointers. # # A version record *never* has a back pointer to a non-version # record, because we never abort to a version. A non-version record # may have a back pointer to a version record or to a non-version # record. import struct import logging from ZODB.POSException import POSKeyError from ZODB.utils import u64, oid_repr class CorruptedError(Exception): pass class CorruptedDataError(CorruptedError): def __init__(self, oid=None, buf=None, pos=None): self.oid = oid self.buf = buf self.pos = pos def __str__(self): if self.oid: msg = "Error reading oid %s. Found %r" % (oid_repr(self.oid), self.buf) else: msg = "Error reading unknown oid. Found %r" % self.buf if self.pos: msg += " at %d" % self.pos return msg # the struct formats for the headers TRANS_HDR = ">8sQcHHH" DATA_HDR = ">8s8sQQHQ" # constants to support various header sizes TRANS_HDR_LEN = 23 DATA_HDR_LEN = 42 DATA_VERSION_HDR_LEN = 58 assert struct.calcsize(TRANS_HDR) == TRANS_HDR_LEN assert struct.calcsize(DATA_HDR) == DATA_HDR_LEN logger = logging.getLogger('ZODB.FileStorage.format') class FileStorageFormatter(object): """Mixin class that can read and write the low-level format.""" # subclasses must provide _file _metadata_size = 4L _format_version = "21" def _read_num(self, pos): """Read an 8-byte number.""" self._file.seek(pos) return u64(self._file.read(8)) def _read_data_header(self, pos, oid=None): """Return a DataHeader object for data record at pos. If ois is not None, raise CorruptedDataError if oid passed does not match oid in file. If there is version data, reads the version part of the header. If there is no pickle data, reads the back pointer. """ self._file.seek(pos) s = self._file.read(DATA_HDR_LEN) if len(s) != DATA_HDR_LEN: raise CorruptedDataError(oid, s, pos) h = DataHeaderFromString(s) if oid is not None and oid != h.oid: raise CorruptedDataError(oid, s, pos) if h.vlen: s = self._file.read(16 + h.vlen) h.parseVersion(s) if not h.plen: h.back = u64(self._file.read(8)) return h def _write_version_header(self, file, pnv, vprev, version): s = struct.pack(">8s8s", pnv, vprev) file.write(s + version) def _read_txn_header(self, pos, tid=None): self._file.seek(pos) s = self._file.read(TRANS_HDR_LEN) if len(s) != TRANS_HDR_LEN: raise CorruptedDataError(tid, s, pos) h = TxnHeaderFromString(s) if tid is not None and tid != h.tid: raise CorruptedDataError(tid, s, pos) h.user = self._file.read(h.ulen) h.descr = self._file.read(h.dlen) h.ext = self._file.read(h.elen) return h def _loadBack_impl(self, oid, back, fail=True): # shared implementation used by various _loadBack methods # # If the backpointer ultimately resolves to 0: # If fail is True, raise KeyError for zero backpointer. # If fail is False, return the empty data from the record # with no backpointer. while 1: if not back: # If backpointer is 0, object does not currently exist. raise POSKeyError(oid) h = self._read_data_header(back) if h.plen: return self._file.read(h.plen), h.tid, back, h.tloc if h.back == 0 and not fail: return None, h.tid, back, h.tloc back = h.back def _loadBackTxn(self, oid, back, fail=True): """Return data and txn id for backpointer.""" return self._loadBack_impl(oid, back, fail)[:2] def _loadBackPOS(self, oid, back): return self._loadBack_impl(oid, back)[2] def getTxnFromData(self, oid, back): """Return transaction id for data at back.""" h = self._read_data_header(back, oid) return h.tid def fail(self, pos, msg, *args): s = ("%s:%s:" + msg) % ((self._name, pos) + args) logger.error(s) raise CorruptedError(s) def checkTxn(self, th, pos): if th.tid <= self.ltid: self.fail(pos, "time-stamp reduction: %s <= %s", oid_repr(th.tid), oid_repr(self.ltid)) self.ltid = th.tid if th.status == "c": self.fail(pos, "transaction with checkpoint flag set") if not th.status in " pu": # recognize " ", "p", and "u" as valid self.fail(pos, "invalid transaction status: %r", th.status) if th.tlen < th.headerlen(): self.fail(pos, "invalid transaction header: " "txnlen (%d) < headerlen(%d)", th.tlen, th.headerlen()) def checkData(self, th, tpos, dh, pos): if dh.tloc != tpos: self.fail(pos, "data record does not point to transaction header" ": %d != %d", dh.tloc, tpos) if pos + dh.recordlen() > tpos + th.tlen: self.fail(pos, "data record size exceeds transaction size: " "%d > %d", pos + dh.recordlen(), tpos + th.tlen) if dh.prev >= pos: self.fail(pos, "invalid previous pointer: %d", dh.prev) if dh.back: if dh.back >= pos: self.fail(pos, "invalid back pointer: %d", dh.prev) if dh.plen: self.fail(pos, "data record has back pointer and data") def DataHeaderFromString(s): return DataHeader(*struct.unpack(DATA_HDR, s)) class DataHeader(object): """Header for a data record.""" __slots__ = ( "oid", "tid", "prev", "tloc", "vlen", "plen", "back", # These three attributes are only defined when vlen > 0 "pnv", "vprev", "version") def __init__(self, oid, tid, prev, tloc, vlen, plen): self.back = 0 # default self.version = "" # default self.oid = oid self.tid = tid self.prev = prev self.tloc = tloc self.vlen = vlen self.plen = plen def asString(self): s = struct.pack(DATA_HDR, self.oid, self.tid, self.prev, self.tloc, self.vlen, self.plen) if self.version: v = struct.pack(">QQ", self.pnv, self.vprev) return s + v + self.version else: return s def setVersion(self, version, pnv, vprev): self.version = version self.vlen = len(version) self.pnv = pnv self.vprev = vprev def parseVersion(self, buf): pnv, vprev = struct.unpack(">QQ", buf[:16]) self.pnv = pnv self.vprev = vprev self.version = buf[16:] def recordlen(self): rlen = DATA_HDR_LEN + (self.plen or 8) if self.version: rlen += 16 + self.vlen return rlen def TxnHeaderFromString(s): return TxnHeader(*struct.unpack(TRANS_HDR, s)) class TxnHeader(object): """Header for a transaction record.""" __slots__ = ("tid", "tlen", "status", "user", "descr", "ext", "ulen", "dlen", "elen") def __init__(self, tid, tlen, status, ulen, dlen, elen): self.tid = tid self.tlen = tlen self.status = status self.ulen = ulen self.dlen = dlen self.elen = elen assert elen >= 0 def asString(self): s = struct.pack(TRANS_HDR, self.tid, self.tlen, self.status, self.ulen, self.dlen, self.elen) return "".join(map(str, [s, self.user, self.descr, self.ext])) def headerlen(self): return TRANS_HDR_LEN + self.ulen + self.dlen + self.elen