Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

# Copyright (C) 2001-2010 Python Software Foundation 

# Contact: email-sig@python.org 

 

"""Classes to generate plain text from a message object tree.""" 

 

__all__ = ['Generator', 'DecodedGenerator'] 

 

import re 

import sys 

import time 

import random 

import warnings 

 

from cStringIO import StringIO 

from email.header import Header 

 

UNDERSCORE = '_' 

NL = '\n' 

 

fcre = re.compile(r'^From ', re.MULTILINE) 

 

def _is8bitstring(s): 

    if isinstance(s, str): 

        try: 

            unicode(s, 'us-ascii') 

        except UnicodeError: 

            return True 

    return False 

 

 

 

class Generator: 

    """Generates output from a Message object tree. 

 

    This basic generator writes the message to the given file object as plain 

    text. 

    """ 

    # 

    # Public interface 

    # 

 

    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): 

        """Create the generator for message flattening. 

 

        outfp is the output file-like object for writing the message to.  It 

        must have a write() method. 

 

        Optional mangle_from_ is a flag that, when True (the default), escapes 

        From_ lines in the body of the message by putting a `>' in front of 

        them. 

 

        Optional maxheaderlen specifies the longest length for a non-continued 

        header.  When a header line is longer (in characters, with tabs 

        expanded to 8 spaces) than maxheaderlen, the header will split as 

        defined in the Header class.  Set maxheaderlen to zero to disable 

        header wrapping.  The default is 78, as recommended (but not required) 

        by RFC 2822. 

        """ 

        self._fp = outfp 

        self._mangle_from_ = mangle_from_ 

        self._maxheaderlen = maxheaderlen 

 

    def write(self, s): 

        # Just delegate to the file object 

        self._fp.write(s) 

 

    def flatten(self, msg, unixfrom=False): 

        """Print the message object tree rooted at msg to the output file 

        specified when the Generator instance was created. 

 

        unixfrom is a flag that forces the printing of a Unix From_ delimiter 

        before the first object in the message tree.  If the original message 

        has no From_ delimiter, a `standard' one is crafted.  By default, this 

        is False to inhibit the printing of any From_ delimiter. 

 

        Note that for subobjects, no From_ line is printed. 

        """ 

        if unixfrom: 

            ufrom = msg.get_unixfrom() 

            if not ufrom: 

                ufrom = 'From nobody ' + time.ctime(time.time()) 

            print >> self._fp, ufrom 

        self._write(msg) 

 

    def clone(self, fp): 

        """Clone this generator with the exact same options.""" 

        return self.__class__(fp, self._mangle_from_, self._maxheaderlen) 

 

    # 

    # Protected interface - undocumented ;/ 

    # 

 

    def _write(self, msg): 

        # We can't write the headers yet because of the following scenario: 

        # say a multipart message includes the boundary string somewhere in 

        # its body.  We'd have to calculate the new boundary /before/ we write 

        # the headers so that we can write the correct Content-Type: 

        # parameter. 

        # 

        # The way we do this, so as to make the _handle_*() methods simpler, 

        # is to cache any subpart writes into a StringIO.  The we write the 

        # headers and the StringIO contents.  That way, subpart handlers can 

        # Do The Right Thing, and can still modify the Content-Type: header if 

        # necessary. 

        oldfp = self._fp 

        try: 

            self._fp = sfp = StringIO() 

            self._dispatch(msg) 

        finally: 

            self._fp = oldfp 

        # Write the headers.  First we see if the message object wants to 

        # handle that itself.  If not, we'll do it generically. 

        meth = getattr(msg, '_write_headers', None) 

        if meth is None: 

            self._write_headers(msg) 

        else: 

            meth(self) 

        self._fp.write(sfp.getvalue()) 

 

    def _dispatch(self, msg): 

        # Get the Content-Type: for the message, then try to dispatch to 

        # self._handle_<maintype>_<subtype>().  If there's no handler for the 

        # full MIME type, then dispatch to self._handle_<maintype>().  If 

        # that's missing too, then dispatch to self._writeBody(). 

        main = msg.get_content_maintype() 

        sub = msg.get_content_subtype() 

        specific = UNDERSCORE.join((main, sub)).replace('-', '_') 

        meth = getattr(self, '_handle_' + specific, None) 

        if meth is None: 

            generic = main.replace('-', '_') 

            meth = getattr(self, '_handle_' + generic, None) 

            if meth is None: 

                meth = self._writeBody 

        meth(msg) 

 

    # 

    # Default handlers 

    # 

 

    def _write_headers(self, msg): 

        for h, v in msg.items(): 

            print >> self._fp, '%s:' % h, 

            if self._maxheaderlen == 0: 

                # Explicit no-wrapping 

                print >> self._fp, v 

            elif isinstance(v, Header): 

                # Header instances know what to do 

                print >> self._fp, v.encode() 

            elif _is8bitstring(v): 

                # If we have raw 8bit data in a byte string, we have no idea 

                # what the encoding is.  There is no safe way to split this 

                # string.  If it's ascii-subset, then we could do a normal 

                # ascii split, but if it's multibyte then we could break the 

                # string.  There's no way to know so the least harm seems to 

                # be to not split the string and risk it being too long. 

                print >> self._fp, v 

            else: 

                # Header's got lots of smarts, so use it.  Note that this is 

                # fundamentally broken though because we lose idempotency when 

                # the header string is continued with tabs.  It will now be 

                # continued with spaces.  This was reversedly broken before we 

                # fixed bug 1974.  Either way, we lose. 

                print >> self._fp, Header( 

                    v, maxlinelen=self._maxheaderlen, header_name=h).encode() 

        # A blank line always separates headers from body 

        print >> self._fp 

 

    # 

    # Handlers for writing types and subtypes 

    # 

 

    def _handle_text(self, msg): 

        payload = msg.get_payload() 

        if payload is None: 

            return 

        if not isinstance(payload, basestring): 

            raise TypeError('string payload expected: %s' % type(payload)) 

        if self._mangle_from_: 

            payload = fcre.sub('>From ', payload) 

        self._fp.write(payload) 

 

    # Default body handler 

    _writeBody = _handle_text 

 

    def _handle_multipart(self, msg): 

        # The trick here is to write out each part separately, merge them all 

        # together, and then make sure that the boundary we've chosen isn't 

        # present in the payload. 

        msgtexts = [] 

        subparts = msg.get_payload() 

        if subparts is None: 

            subparts = [] 

        elif isinstance(subparts, basestring): 

            # e.g. a non-strict parse of a message with no starting boundary. 

            self._fp.write(subparts) 

            return 

        elif not isinstance(subparts, list): 

            # Scalar payload 

            subparts = [subparts] 

        for part in subparts: 

            s = StringIO() 

            g = self.clone(s) 

            g.flatten(part, unixfrom=False) 

            msgtexts.append(s.getvalue()) 

        # BAW: What about boundaries that are wrapped in double-quotes? 

        boundary = msg.get_boundary() 

        if not boundary: 

            # Create a boundary that doesn't appear in any of the 

            # message texts. 

            alltext = NL.join(msgtexts) 

            boundary = _make_boundary(alltext) 

            msg.set_boundary(boundary) 

        # If there's a preamble, write it out, with a trailing CRLF 

        if msg.preamble is not None: 

            if self._mangle_from_: 

                preamble = fcre.sub('>From ', msg.preamble) 

            else: 

                preamble = msg.preamble 

            print >> self._fp, preamble 

        # dash-boundary transport-padding CRLF 

        print >> self._fp, '--' + boundary 

        # body-part 

        if msgtexts: 

            self._fp.write(msgtexts.pop(0)) 

        # *encapsulation 

        # --> delimiter transport-padding 

        # --> CRLF body-part 

        for body_part in msgtexts: 

            # delimiter transport-padding CRLF 

            print >> self._fp, '\n--' + boundary 

            # body-part 

            self._fp.write(body_part) 

        # close-delimiter transport-padding 

        self._fp.write('\n--' + boundary + '--') 

        if msg.epilogue is not None: 

            print >> self._fp 

            if self._mangle_from_: 

                epilogue = fcre.sub('>From ', msg.epilogue) 

            else: 

                epilogue = msg.epilogue 

            self._fp.write(epilogue) 

 

    def _handle_multipart_signed(self, msg): 

        # The contents of signed parts has to stay unmodified in order to keep 

        # the signature intact per RFC1847 2.1, so we disable header wrapping. 

        # RDM: This isn't enough to completely preserve the part, but it helps. 

        old_maxheaderlen = self._maxheaderlen 

        try: 

            self._maxheaderlen = 0 

            self._handle_multipart(msg) 

        finally: 

            self._maxheaderlen = old_maxheaderlen 

 

    def _handle_message_delivery_status(self, msg): 

        # We can't just write the headers directly to self's file object 

        # because this will leave an extra newline between the last header 

        # block and the boundary.  Sigh. 

        blocks = [] 

        for part in msg.get_payload(): 

            s = StringIO() 

            g = self.clone(s) 

            g.flatten(part, unixfrom=False) 

            text = s.getvalue() 

            lines = text.split('\n') 

            # Strip off the unnecessary trailing empty line 

            if lines and lines[-1] == '': 

                blocks.append(NL.join(lines[:-1])) 

            else: 

                blocks.append(text) 

        # Now join all the blocks with an empty line.  This has the lovely 

        # effect of separating each block with an empty line, but not adding 

        # an extra one after the last one. 

        self._fp.write(NL.join(blocks)) 

 

    def _handle_message(self, msg): 

        s = StringIO() 

        g = self.clone(s) 

        # The payload of a message/rfc822 part should be a multipart sequence 

        # of length 1.  The zeroth element of the list should be the Message 

        # object for the subpart.  Extract that object, stringify it, and 

        # write it out. 

        # Except, it turns out, when it's a string instead, which happens when 

        # and only when HeaderParser is used on a message of mime type 

        # message/rfc822.  Such messages are generated by, for example, 

        # Groupwise when forwarding unadorned messages.  (Issue 7970.)  So 

        # in that case we just emit the string body. 

        payload = msg.get_payload() 

        if isinstance(payload, list): 

            g.flatten(msg.get_payload(0), unixfrom=False) 

            payload = s.getvalue() 

        self._fp.write(payload) 

 

 

 

_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' 

 

class DecodedGenerator(Generator): 

    """Generates a text representation of a message. 

 

    Like the Generator base class, except that non-text parts are substituted 

    with a format string representing the part. 

    """ 

    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): 

        """Like Generator.__init__() except that an additional optional 

        argument is allowed. 

 

        Walks through all subparts of a message.  If the subpart is of main 

        type `text', then it prints the decoded payload of the subpart. 

 

        Otherwise, fmt is a format string that is used instead of the message 

        payload.  fmt is expanded with the following keywords (in 

        %(keyword)s format): 

 

        type       : Full MIME type of the non-text part 

        maintype   : Main MIME type of the non-text part 

        subtype    : Sub-MIME type of the non-text part 

        filename   : Filename of the non-text part 

        description: Description associated with the non-text part 

        encoding   : Content transfer encoding of the non-text part 

 

        The default value for fmt is None, meaning 

 

        [Non-text (%(type)s) part of message omitted, filename %(filename)s] 

        """ 

        Generator.__init__(self, outfp, mangle_from_, maxheaderlen) 

        if fmt is None: 

            self._fmt = _FMT 

        else: 

            self._fmt = fmt 

 

    def _dispatch(self, msg): 

        for part in msg.walk(): 

            maintype = part.get_content_maintype() 

            if maintype == 'text': 

                print >> self, part.get_payload(decode=True) 

            elif maintype == 'multipart': 

                # Just skip this 

                pass 

            else: 

                print >> self, self._fmt % { 

                    'type'       : part.get_content_type(), 

                    'maintype'   : part.get_content_maintype(), 

                    'subtype'    : part.get_content_subtype(), 

                    'filename'   : part.get_filename('[no filename]'), 

                    'description': part.get('Content-Description', 

                                            '[no description]'), 

                    'encoding'   : part.get('Content-Transfer-Encoding', 

                                            '[no encoding]'), 

                    } 

 

 

 

# Helper 

_width = len(repr(sys.maxint-1)) 

_fmt = '%%0%dd' % _width 

 

def _make_boundary(text=None): 

    # Craft a random boundary.  If text is given, ensure that the chosen 

    # boundary doesn't appear in the text. 

    token = random.randrange(sys.maxint) 

    boundary = ('=' * 15) + (_fmt % token) + '==' 

    if text is None: 

        return boundary 

    b = boundary 

    counter = 0 

    while True: 

        cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) 

        if not cre.search(text): 

            break 

        b = boundary + '.' + str(counter) 

        counter += 1 

    return b