# This class (OE) reads an .dbx file from Outlook Express 5|6 # and builds a simple structure with messsage positions. # # Many many thanks to Holden Web LLC ( http://www.holdenweb.com/Python/ ) for their "dbxread.py" work. # # Usage: # # from dbx import * # obj = OE('articles.dbx') # print obj.messagelist # number of emails # for i in obj.messagelist: # obj.MailHeaders(i.Position) # debug # print obj.ReadMessage(i.position) # # Usefull links: # # http://www.python.org/ # http://oedbx.aroh.de/ # http://sourceforge.net/projects/ol2mbox # http://www.wotsit.org/ # http://search.cpan.org/~vparseval/Mail-Transport-Dbx-0.04/ # # by Olavo Santos (nbk@cyberdude.com) # # imports import struct # constants WORDSIZE = 2 DWORDSIZE = 4 INTEGERSIZE = 2 LONGINTSIZE = 4 HdrDecode = { 0x02: "Sent Date", 0x03: "Filename", 0x05: "Subject", 0x04: "Message Position 04", 0x06: "???? 06", 0x07: "Message-Id", 0x08: "Subject", 0x09: "From", 0x0a: "References", 0x0b: "newsgroup", 0x0d: "From", 0x0e: "Reply-To", 0x12: "Received Date", 0x13: "To", 0x14: "???? 14", 0x1A: "Account", 0x1b: "Account-Id", 0x80: "Message Number", 0x81: "Message Status", 0x84: "Message Position 84", 0x91: "Message Size" } # global read methods def readLongInt(f, pos=None): if pos: f.seek(pos) return(struct.unpack("l", f.read(LONGINTSIZE)))[0] def readWord(f, pos=None): if pos: f.seek(pos) return(struct.unpack("h", f.read(WORDSIZE)))[0] def readDWord(f, pos=None): if pos: f.seek(pos) return(struct.unpack("l", f.read(DWORDSIZE)))[0] def readInteger(f, pos=None): if pos: f.seek(pos) return(struct.unpack("h", f.read(INTEGERSIZE)))[0] # oe classes class TOE5_IndexHeader: def __init__(self, f, pos): self.FilePos = readLongInt(f, pos) self.Unknown1 = readLongInt(f) self.PrevIndex = readLongInt(f) self.NextIndex = readLongInt(f) self.Count = readLongInt(f) self.Unknown2 = readLongInt(f) class TOE5_IndexItem: def __init__(self, f, pos): self.HeaderPos = readLongInt(f, pos) self.ChildIndex = readLongInt(f) self.Unknown = readLongInt(f) class THeaderData: def __init__(self, f, pos): self.Position = readLongInt(f, pos) self.DataLength = readLongInt(f) self.HeaderLength = readWord(f) self.FlagCount = readWord(f) class TOE5_MessageInfo: pass class TOE5_MsgItem: def __init__(self, f, pos): self.FilePos = readLongInt(f, pos) self.Unknown = readLongInt(f) self.ItemSize = readLongInt(f) self.NextItem = readLongInt(f) self.MsgContent = f.read(512) class OE: # init class def __init__(self, fname): self.f = open(fname, "rb") self.messagelist = [] # seek start pos = readLongInt(self.f, 0x30) nodelst = [] self.messagelist = self.messagelist + self.ReadIndex(pos, nodelst, 0) # read index def ReadIndex(self, pos, tl, folders): ml = [] iheader = TOE5_IndexHeader(self.f, pos) if pos <> iheader.FilePos: raise ValueError, "Index header has incorrect position" tl.append(pos) if iheader.NextIndex and iheader.NextIndex not in tl: ml2 = self.ReadIndex(iheader.NextIndex, tl, folders) for m in ml2: if m not in ml: ml.append(m) if iheader.PrevIndex and iheader.PrevIndex not in tl: ml2 = self.ReadIndex(iheader.PrevIndex, tl, folders) for m in ml2: if m not in ml: ml.append(m) icount = iheader.Count >> 8 if icount: lpos = iheader.FilePos+24 for i in range(icount): indexItem = TOE5_IndexItem(self.f, lpos) if indexItem.HeaderPos: if folders: raise ValueError, "cannot handle folders yet" else: mpos = self.ReadMessageInfo(indexItem.HeaderPos, tl) if mpos and mpos not in ml: ml.append(mpos) if indexItem.ChildIndex: if indexItem.ChildIndex not in tl: ml2 = self.ReadIndex(indexItem.ChildIndex, tl, folders) for m in ml2: if m not in ml: ml.append(m) lpos = lpos+12 return ml # read message info def ReadMessageInfo(self, pos, tl): HeaderData = THeaderData(self.f, pos) if HeaderData.Position != pos: raise ValueError, "Message at %d in file %s has incorrect Position %d" % (pos, self.f.name, HeaderData.Position) oe5_MessageInfo = TOE5_MessageInfo() oe5_MessageInfo.Position = pos Flags = HeaderData.FlagCount & 0xff FlagSize = Flags * DWORDSIZE DataSize = HeaderData.DataLength - FlagSize FlagBuf = self.f.read(FlagSize) # purely to locate the data DataBuf = self.f.read(DataSize) for i in range(Flags): FlagDWord, = struct.unpack("l", FlagBuf[:DWORDSIZE]) FlagBuf = FlagBuf[DWORDSIZE:] FlagType = FlagDWord & 0xff FlagOffset = FlagDWord >> 8 if FlagType == 0x84: oe5_MessageInfo.position = FlagOffset #print "Position (24-bit):", oe5_MessageInfo.position #elif FlagType == 0xd: # oe5_MessageInfo.From = self.stringFrom(DataBuf, FlagOffset) # #print "From: ", oe5_MessageInfo.From elif FlagType == 0x4: oe5_MessageInfo.position, = struct.unpack("l", DataBuf[FlagOffset:FlagOffset+DWORDSIZE]) #print "Position (32-bit):", oe5_MessageInfo.position else: #print "ReadMessageInfo: Unprocessed Flag Type %02x" % FlagType pass return oe5_MessageInfo # read messsage headers (usefull for debug) def MailHeaders(self, pos): HeaderData = THeaderData(self.f, pos) if pos <> HeaderData.Position: raise ValueError, "Incorrect Header Data Position at %d in %s" % (pos, self.f.name) Flags = HeaderData.FlagCount & 0xff print Flags, "flags" Size = Flags*DWORDSIZE DataSize = HeaderData.DataLength-Size print "Size:", Size, "DataSize", DataSize xstr = "*** Message Header Dump *** %d **\n\n*" % pos for i in range(Flags): flag = readDWord(self.f) ftype = flag & 0xff fstring = "::: Unknown :::" if HdrDecode.has_key(ftype): fstring = HdrDecode[ftype] fval = flag >> 8 xstr += "%2d = %02x: %08x ==> %s\n" % (i, ftype, fval, fstring) print xstr # read message def ReadMessage(self, pos): IndexItemsCount = readInteger(self.f, 0xc4) if not IndexItemsCount > 0: raise ValueError, "File %s has invalid IndexItemsCount %d at 0xc4" % (self.f.name, IndexItemsCount) msg = "" while 1: oe5MsgItem = TOE5_MsgItem(self.f, pos) if oe5MsgItem.FilePos <> pos: break msg += oe5MsgItem.MsgContent[:oe5MsgItem.ItemSize] pos = oe5MsgItem.NextItem if pos == 0: break return msg # aux method def stringFrom(self, s, offset): fin = s.find('\0', offset) return s[offset:fin] # del class def __del__(self): self.f.close()