Compression, part 5: Finished program Author: Ole Friis (ole_f@post3.tele.dk) Date: Jul, 2001 Part: [url /articles/compression/]1[/url] | [url /articles/compression/2/]2[/url] | [url /articles/compression/3/]3[/url] | [url /articles/compression/4/]4[/url] | 5 | [url /articles/compression/6/]6[/url] ===The main program I decided to put all the functions into an object, and then have the COMPRESS and DECOMPRESS functions inside this object do all the work. Here's how it looks - hopefully you'll recognize most of the functions! compress-bw: context [ ; Burrows-Wheeler encoding encode-bw: func [ "Encodes a string using the Burrows-Wheeler transform" str [string!] "The string to encode" /local str-length permutations transformed-string transformed-info i ][ str-length: length? str ; Build a representation of the permutations permutations: make block! str-length for i 1 str-length 1 [append permutations i] ; Sort the permutations in lexicographic order sort/compare permutations func [n1 n2 /local c1 c2] [ loop str-length [ if n1 < 1 [n1: str-length] if n2 < 1 [n2: str-length] c1: pick str either n1 = 1 [1][str-length - n1 + 2] c2: pick str either n2 = 1 [1][str-length - n2 + 2] if c1 < c2 [return true] if c1 > c2 [return false] n1: n1 - 1 n2: n2 - 1 ] return true ] ; Now, let's create the new string transformed-string: make string! str-length transformed-info: (index? find permutations 1) foreach permutation permutations [ append transformed-string pick str (str-length - permutation + 1) ] reduce [transformed-string transformed-info] ] decode-bw: func [ "Decodes a string using the Burrows-Wheeler transform" str [string!] "The encoded version of the string" index [integer!] "The index to the original text" /local str-length first-row found-table t letter-index old-offset new-offset decoded-string ][ str-length: length? str ; Find t, which is the relation between str and the permutations first-row: sort/case copy str insert/dup (found-table: make block! 256) 0 256 t: make block! str-length foreach letter str [ letter-index: (to-integer letter) + 1 old-offset: pick found-table letter-index new-offset: index? find/case (skip first-row old-offset) letter append t new-offset poke found-table letter-index new-offset ] ; Now the rest is easy decoded-string: make string! str-length insert decoded-string pick str index loop str-length - 1 [ insert decoded-string pick str (index: pick t index) ] decoded-string ] ; Move-to-Front encoding encode-mtf: func [ "Encodes a string using Move-to-Front" str [string!] "The string to encode" table [block!] "The initial table to use" /local result index ][ table: copy table result: make block! (length? str) foreach letter str [ index: find/case table letter append result (index? index) - 1 remove index insert table letter ] result ] decode-mtf: func [ "Decodes a string using Move-to-Front" indices [block!] "The encoded version of the string" table [block!] "The initial table to use" /local result letter ][ table: copy table result: make string! (length? indices) foreach index indices [ letter: pick table (index + 1) append result letter remove skip table index insert table letter ] result ] block-to-string: func [ "Converts output from encode-mtf to a string" b [block!] "The encode-mtf output" /local res ][ res: copy "" foreach i b [ append res to-char i ] res ] string-to-block: func [ "Converts a string to input to decode-mtf" s [string!] "The string to convert" /local res ][ res: copy [] foreach c s [ append res to-integer c ] res ] ; Huffman encoding construct-huffman: func [ "Constructs a Huffman tree, using the given statistics" stats [block!] "The statistics" /local probs symbol-table node1 node2 new-node temp-list ][ ; First, make a flat list of all the characters probs: make block! 256 for i 1 256 1 [ append/only probs reduce [pick stats i (i - 1) none] ] ; Then construct the tree, joining two nodes each time symbol-table: copy probs sort probs while [1 < length? probs][ ; Pick the two nodes with least probability node1: first probs node2: second probs remove/part probs 2 ; Construct a father to node1 and node2 new-node: reduce [(first node1) + (first node2) node1 node2 none] change/only (back tail node1) new-node change/only (back tail node2) new-node ; Insert the new node correctly in the "probs" list temp-list: probs while [all [not tail? temp-list (first temp-list) < new-node]][ temp-list: next temp-list ] insert/only temp-list new-node ] ; Return the top element of the Huffman tree and the original flattened tree. reduce [first probs symbol-table] ] encode-huffman-char: func [ "Huffman-encodes a character" symbol [char!] "Character to encode" tree [block!] "The Huffman tree to use" /local node code parent ][ node: pick (second tree) (to-integer symbol) + 1 code: copy "" while [found? parent: last node][ insert code either node == third parent ["0"] ["1"] node: parent ] code ] decode-huffman-char: func [ "Huffman-decodes a character" code [string!] "The Huffman code to decipher (will be altered)" tree [block!] "The Huffman tree to use" /local node ][ node: first tree until [ either (first code) = #"1" [node: second node] [node: third node] remove code (length? node) = 3 ] to-char second node ] encode-huffman: func [ "Huffman-encodes a string" str [string!] "String to encode" tree [block!] "The Huffman tree to use" /local result ][ result: copy "" while [not tail? str][ append result encode-huffman-char first str tree str: next str ] result ] decode-huffman: func [ "Huffman-decodes a string" code [string!] "The Huffman code to decipher" tree [block!] "The Huffman tree to use" count [integer!] "Number of characters to decipher" ][ code: copy code result: copy "" loop count [ append result decode-huffman-char code tree ] result ] ; Bit-fiddling encode-integer: func [ "Converts a REBOL integer to a 4-character string" i [integer!] "The integer to encode" ][ join "" reduce [ to-char (i / 16777216) ; 16777216 = power 2 24 to-char (i / 65536) // 256 ; 65536 = power 2 16 to-char (i / 256) // 256 to-char i // 256 ] ] decode-integer: func [ "Converts a 4-character string to a REBOL integer" s [string!] "The 4-character string to decode" ][ to-integer ((to-integer first s) * 16777216) + ; 16777216 = power 2 24 ((to-integer second s) * 65536) + ; 65536 = power 2 16 ((to-integer third s) * 256) + to-integer fourth s ] encode-bitstream: func [ "Encodes a string of 1's and 0's to a binary string" s [string!] "The string of 1's and 0's" /local res byte add-this ][ res: copy "" forever [ byte: 0 add-this: 128 while [(add-this <> 0) and (not tail? s)] [ if #"1" = first s [byte: byte + add-this] add-this: to-integer (add-this / 2) s: next s ] append res to-char byte if tail? s [return res] ] ] decode-bitstream: func [ "Decodes a binary string into a string of 1's and 0's" s [string!] "The binary string" /local res next-bit ][ res: copy "" next-bit: 128 while [not tail? s] [ append res either 0 = and~ next-bit to-integer first s ["0"]["1"] next-bit: to-integer (next-bit / 2) if next-bit = 0 [next-bit: 128 s: next s] ] res ] ; Initialization prin "Creating table for Move-to-Front functions... " mtf-table: copy used-letters: [ #"." #"s" #"r" #"g" #"m" #"w" #"a" #"l" #"y" #"," #"e" #"o" #"t" #" " #"n" #"k" #"W" #"d" #"h" #"i" #"f" #"x" ] for i 0 255 1 [ letter: to-char i if not found? find/case used-letters letter [ append mtf-table letter ] ] print "Done." prin "Constructing Huffman tree... " huffman-tree: construct-huffman load %probabilities.r print "Done." ; Main functions compress: func [ "Compresses a string" str [string!] "String to compress" /local bw-string bw-index mtf-string huffman-string res ][ prin "Performing Burrows-Wheeler transformation (might take a while)... " set [bw-string bw-index] encode-bw str print "Done." prin "Performing Move-to-Front encoding... " mtf-string: block-to-string encode-mtf bw-string mtf-table print "Done." prin "Huffman-encoding... " huffman-string: encode-huffman mtf-string huffman-tree print "Done." prin "Creating final representation... " res: rejoin reduce [ encode-integer length? str encode-integer bw-index encode-bitstream huffman-string ] print "Done." res ] decompress: func [ "Decompresses a string" str [string!] "String to decompress" /local bw-string bw-index mtf-string huffman-string res ][ prin "Analysing representation... " str-length: decode-integer str bw-index: decode-integer skip str 4 huffman-string: decode-bitstream skip str 8 print "Done." prin "Huffman-decoding... " mtf-string: decode-huffman huffman-string huffman-tree str-length print "Done." prin "Performing Move-to-Front decoding... " bw-string: decode-mtf string-to-block mtf-string mtf-table print "Done." prin "Performing Burrows-Wheeler inverse transformation... " res: decode-bw bw-string bw-index print "Done." res ] ] ===The Probabilities.r file Copy the following and put it in a file called Probabilities.r, situated in the same directory as where you run the file above: 3269 933 503 467 324 300 250 237 187 166 126 148 102 118 95 66 100 113 71 37 79 55 34 36 26 38 26 38 8 15 12 10 11 13 7 12 8 10 6 2 3 9 2 4 3 1 2 2 2 1 7 1 0 0 0 0 0 0 0 1 1 1 0 1 2 0 0 2 18 0 0 0 0 1 1 3 1 0 0 0 0 1 1 2 0 2 2 1 1 2 1 0 1 0 1 1 0 16 5 3 0 2 0 0 2 0 0 3 3 0 1 2 2 2 3 0 1 0 2 2 2 33 21 4 21 5 2 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 3 2 2 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 As noted earlier, you could do your own table too, but this is a good starting point. ###