-
Notifications
You must be signed in to change notification settings - Fork 87
/
Copy pathRtfDecompressor.cs
224 lines (191 loc) · 11.9 KB
/
RtfDecompressor.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
// Copyright (c) 2016, Dijji, and released under Ms-PL. This can be found in the root of this distribution.
using System;
using System.Collections;
using System.IO;
using System.Text;
namespace XstReader
{
// Implementation of the RTF decompression algorithm specified in [MS-OXRTFCP]
// This is a port of the VB code at http://www.vbforums.com/showthread.php?669883-NET-3-5-RtfDecompressor-Decompress-RTF-From-Outlook-And-Exchange-Server
class RtfDecompressor
{
//fields
private byte[] InitialDictionary;
private static UInt32[] CrcDictionary = new UInt32[]
{
0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x76DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 0xEDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
0x9B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x1DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x6B6B51F, 0x9FBFE4A5, 0xE8B8D433,
0x7807C9A2, 0xF00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x86D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x3B6E20C, 0x74B1D29A,
0xEAD54739, 0x9DD277AF, 0x4DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0xD6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0xA00AE27, 0x7D079EB1,
0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x26D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x5005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0xCB61B38,
0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0xBDBDF21, 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
};
//constants
private const int HeaderLength = 0x10;
private const int CircularDictionaryMaxLength = 0x1000;
//constructors
public RtfDecompressor()
{
//initialize dictionary, must be this exact string
var builder = new StringBuilder();
builder.Append(@"{\rtf1\ansi\mac\deff0\deftab720{\fonttbl;}");
builder.Append(@"{\f0\fnil \froman \fswiss \fmodern \fscript ");
builder.Append(@"\fdecor MS Sans SerifSymbolArialTimes New RomanCourier{\colortbl\red0\green0\blue0");
builder.Append("\r\n");
builder.Append(@"\par \pard\plain\f0\fs20\b\i\u\tab\tx");
InitialDictionary = Encoding.ASCII.GetBytes(builder.ToString()); //2.1.2.1
}
//methods
/// <summary>
/// Decompresses an RTF <see cref="Stream">Stream</see> and returns the decompressed stream as an array of bytes.
/// </summary>
/// <param name="stream">The <see cref="Stream">Stream</see> to decompress.</param>
/// <param name="enforceCrc">True to enforce a CRC check; otherwise, false to ignore CRC checking.</param>
/// <exception cref="System.IndexOutOfRangeException">Thrown when the stream reaches a corrupt or unpredicted state.</exception>
/// <returns>The decompressed byte stream.</returns>
public MemoryStream Decompress(Stream stream, bool enforceCrc = false)
{
if (stream.CanRead)
{
var buffer = new byte[stream.Length - 1];
stream.Read(buffer, 0, (int)stream.Length);
return Decompress(buffer, enforceCrc);
}
return null;
}
/// <summary>
/// Decompresses an RTF byte stream and returns the decompressed stream as an array of bytes.
/// </summary>
/// <param name="data">The compressed stream to decompress.</param>
/// <param name="enforceCrc">True to enforce a CRC check; otherwise, false to ignore CRC checking.</param>
/// <exception cref="System.IndexOutOfRangeException">Thrown when the stream reaches a corrupt or unpredicted state.</exception>
/// <returns>The decompressed byte stream.</returns>
public MemoryStream Decompress(byte[] data, bool enforceCrc = false)
{
//2.2.3.1.2
var header = Map.MapType<RtfHeader>(data);
var initialLength = InitialDictionary.Length;
switch (header.compType)
{
case (UInt32)CompressionTypes.UnCompressed:
//data is uncompressed, this is very rare
//Should the header be excluded from what we return?
return new MemoryStream(data, HeaderLength, data.Length - HeaderLength);
case (UInt32)CompressionTypes.Compressed:
//2.2.3
if (enforceCrc)
{
var crc = CalculateCrc(data, HeaderLength);
if (crc != header.crc)
throw new XstException("Input stream is corrupt: CRC did not match");
}
byte[] dictionary = new byte[CircularDictionaryMaxLength];
var destination = new MemoryStream((int)header.rawSize);
// Initialise the dictionary
Array.Copy(InitialDictionary, 0, dictionary, 0, initialLength);
int dictionaryWrite = initialLength;
int dictionaryEnd = initialLength;
try
{
for (int i = HeaderLength; i < data.Length; )
{
var control = new BitArray(new byte[] { data[i] });
int offset = 1;
for (int j = 0; j < control.Length; j++)
{
if (!control[j])
{
//literal bit
destination.WriteByte(data[i + offset]);
dictionary[dictionaryWrite++] = (data[i + offset]);
if (dictionaryWrite > dictionaryEnd)
dictionaryEnd = Math.Min(dictionaryWrite, CircularDictionaryMaxLength);
dictionaryWrite %= CircularDictionaryMaxLength; //2.1.3.1.4
offset++;
}
else
{
//reference bit, create word from two bytes - note big-Endian ordering
var word = (data[i + offset] << 8) | data[i + offset + 1];
//get the offset into the dictionary
var upper = (word & 0xFFF0) >> 4;
//get the length of bytes to copy
var lower = (word & 0xF) + 2;
if (upper > dictionaryEnd)
throw new XstException("Input stream is corrupt: invalid dictionary reference");
if (upper == dictionaryWrite)
//special dictionary reference means that decompression is complete
return destination;
//cannot just copy the bytes over because the dictionary is a
//circular array so it must properly wrap to beginning
for (int k = 0; k < lower; k++)
{
int correctedOffset = (upper + k) % CircularDictionaryMaxLength; //2.1.3.1.4
if (destination.Position == header.rawSize)
//this is the last token, the rest is just padding
return destination;
destination.WriteByte(dictionary[correctedOffset]);
dictionary[dictionaryWrite++] = dictionary[correctedOffset];
if (dictionaryWrite > dictionaryEnd)
dictionaryEnd = Math.Min(dictionaryWrite, CircularDictionaryMaxLength);
dictionaryWrite %= CircularDictionaryMaxLength; //2.1.3.1.4
}
offset += 2;
}
}
//run is processed
i += offset;
}
}
catch (IndexOutOfRangeException ex)
{
throw new XstException("Input stream is corrupt: index out of range");
}
break;
default:
throw new XstException("Input stream is corrupt: unknown compression type");
}
return null;
}
private UInt32 CalculateCrc(byte[] buffer, int offset)
{
//2.1.3.2
UInt32 crc = 0;
for (int i = offset; i < buffer.Length; i++)
crc = (CrcDictionary[((crc ^ buffer[i])) & 0xFF]) ^ (crc >> 8);
return crc;
}
//enumerations
private enum CompressionTypes : UInt32
{
//2.1.3.1.1
Compressed = 0x75465A4C,
UnCompressed = 0x414C454D,
}
//nested types
private struct RtfHeader
{
//fields
public uint compSize;
public uint rawSize;
public uint compType;
public uint crc;
}
}
}