Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

# {{{ http://code.activestate.com/recipes/496882/ (r8) 

''' 

http://code.activestate.com/recipes/496882/ 

Author: Michael Palmer 13 Jul 2006 

a regex-based JavaScript code compression kludge 

''' 

from __future__ import division 

from __future__ import print_function 

from builtins import range 

from builtins import object 

from past.utils import old_div 

import re 

 

 

class JSCompressor(object): 

 

    def __init__(self, compressionLevel=2, measureCompression=False): 

        ''' 

        compressionLevel: 

        0 - no compression, script returned unchanged. For debugging only - 

            try if you suspect that compression compromises your script 

        1 - Strip comments and empty lines, don't change line breaks and indentation (code remains readable) 

        2 - Additionally strip insignificant whitespace (code will become quite unreadable) 

 

        measureCompression: append a comment stating the extent of compression 

        ''' 

        self.compressionLevel = compressionLevel 

        self.measureCompression = measureCompression 

 

    # a bunch of regexes used in compression 

    # first, exempt string and regex literals from compression by transient 

    # substitution 

 

    findLiterals = re.compile(r''' 

        (\'.*?(?<=[^\\])\')             |       # single-quoted strings 

        (\".*?(?<=[^\\])\")             |       # double-quoted strings 

        ((?<![\*\/])\/(?![\/\*]).*?(?<![\\])\/) # JS regexes, trying hard not to be tripped up by comments 

        ''', re.VERBOSE) 

 

    # literals are temporarily replaced by numbered placeholders 

 

    literalMarker = '@_@%d@_@'                  # temporary replacement 

    # put the string literals back in 

    backSubst = re.compile('@_@(\d+)@_@') 

 

    # /* ... */ comments on single line 

    mlc1 = re.compile(r'(\/\*.*?\*\/)') 

    mlc = re.compile(r'(\/\*.*?\*\/)', re.DOTALL)  # real multiline comments 

    slc = re.compile('\/\/.*')                  # remove single line comments 

 

    # collapse successive non-leading white space characters into one 

    collapseWs = re.compile('(?<=\S)[ \t]+') 

 

    squeeze = re.compile(''' 

        \s+(?=[\}\]\)\:\&\|\=\;\,\.\+])   |     # remove whitespace preceding control characters 

        (?<=[\{\[\(\:\&\|\=\;\,\.\+])\s+  |     # ... or following such 

        [ \t]+(?=\W)                      |     # remove spaces or tabs preceding non-word characters 

        (?<=\W)[ \t]+                           # ... or following such 

        ''' 

                         , re.VERBOSE | re.DOTALL) 

 

    def compress(self, script): 

        ''' 

        perform compression and return compressed script 

        ''' 

        if self.compressionLevel == 0: 

            return script 

 

        lengthBefore = len(script) 

 

        # first, substitute string literals by placeholders to prevent the 

        # regexes messing with them 

        literals = [] 

 

        def insertMarker(mo): 

            l = mo.group() 

            literals.append(l) 

            return self.literalMarker % (len(literals) - 1) 

 

        script = self.findLiterals.sub(insertMarker, script) 

 

        # now, to the literal-stripped carcass, apply some kludgy regexes for 

        # deflation... 

        script = self.slc.sub('', script)       # strip single line comments 

        # replace /* .. */ comments on single lines by space 

        script = self.mlc1.sub(' ', script) 

        # replace real multiline comments by newlines 

        script = self.mlc.sub('\n', script) 

 

        # remove empty lines and trailing whitespace 

        script = '\n'.join([l.rstrip() 

                           for l in script.splitlines() if l.strip()]) 

 

        # squeeze out any dispensible whitespace 

        if self.compressionLevel == 2: 

            script = self.squeeze.sub('', script) 

        # only collapse multiple whitespace characters 

        elif self.compressionLevel == 1: 

            script = self.collapseWs.sub(' ', script) 

 

        # now back-substitute the string and regex literals 

        def backsub(mo): 

            return literals[int(mo.group(1))] 

 

        script = self.backSubst.sub(backsub, script) 

 

        if self.measureCompression: 

            lengthAfter = float(len(script)) 

            squeezedBy = int(100 * (1 - old_div(lengthAfter, lengthBefore))) 

            script += '\n// squeezed out %s%%\n' % squeezedBy 

 

        return script 

 

 

if __name__ == '__main__': 

    script = ''' 

 

 

    /* this is a totally useless multiline comment, containing a silly "quoted string", 

       surrounded by several superfluous line breaks 

     */ 

 

 

    // and this is an equally important single line comment 

 

    sth = "this string contains 'quotes', a /regex/ and a // comment yet it will survive compression"; 

 

    function wurst(){           // this is a great function 

        var hans = 33; 

    } 

 

    sthelse = 'and another useless string'; 

 

    function hans(){            // another function 

        var   bill   =   66;    // successive spaces will be collapsed into one; 

        var bob = 77            // this line break will be preserved b/c of lacking semicolon 

        var george = 88; 

    } 

    ''' 

 

    for x in range(1, 3): 

        print('\ncompression level', x, ':\n--------------') 

        c = JSCompressor(compressionLevel=x, measureCompression=True) 

        cpr = c.compress(script) 

        print(cpr) 

        print('length', len(cpr)) 

# end of http://code.activestate.com/recipes/496882/ }}}