-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathHtmlFile.cpp
More file actions
364 lines (252 loc) · 9.45 KB
/
HtmlFile.cpp
File metadata and controls
364 lines (252 loc) · 9.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
// HtmlFile.cpp
#include "HtmlFile.h"
// Global variables
LPTSTR g_lpszFileText;
int HtmlFileDisplayText( LPCTSTR lpszTitle, HWND hWndParent )
{
// Display file text
return MessageBox( hWndParent, g_lpszFileText, lpszTitle, ( MB_OK | MB_ICONINFORMATION ) );
} // End of function HtmlFileDisplayText
void HtmlFileFreeMemory()
{
// Free string memory
delete [] g_lpszFileText;
} // End of function HtmlFileFreeMemory
BOOL HtmlFileGetAttributeUrl( LPCTSTR lpszParentUrl, LPCTSTR lpszTag, LPCTSTR lpszAttributeName, LPTSTR lpszAttributeUrl )
{
BOOL bResult = FALSE;
// Allocate string memory
LPTSTR lpszAttributeValue = new char[ STRING_LENGTH + sizeof( char ) ];
// Get attribute value
if( HtmlFileGetAttributeValue( lpszTag, lpszAttributeName, lpszAttributeValue ) )
{
// Successfully got attribute value
LPTSTR lpszQuestionMark;
// Find any question marks in attribute value
lpszQuestionMark = strchr( lpszAttributeValue, ASCII_QUESTION_MARK_CHARACTER );
// See if there are any question marks in attribute value
if( lpszQuestionMark )
{
// There are question marks in attribute value
// Terminate attribute value at first question mark
lpszQuestionMark[ 0 ] = ( char )NULL;
} // End of there are question marks in attribute value
// Ensure that attribute value is not empty
if( lpszAttributeValue[ 0 ] )
{
// Attribute value is not empty
// See if attribute value is an absolute url
if( strstr( lpszAttributeValue, HTML_FILE_ABSOLUTE_URL_IDENTIFIER ) )
{
// Attribute value is an absolute url
// Update attribute url
lstrcpy( lpszAttributeUrl, lpszAttributeValue );
} // End of attribute value is an absolute url
else
{
// Attribute value is not an absolute url
// Copy parent url into attribute url
lstrcpy( lpszAttributeUrl, lpszParentUrl );
// See if attribute value begins with a forward slash
if( lpszAttributeValue[ 0 ] == ASCII_FORWARD_SLASH_CHARACTER )
{
// Attribute value begins with a forward slash
// Append attribute value (after forward slash character) onto attribute url
lstrcat( lpszAttributeUrl, ( lpszAttributeValue + sizeof( char ) ) );
} // End of attribute value begins with a forward slash
else
{
// Attribute value does not begin with a forward slash
// Append attribute value onto attribute url
lstrcat( lpszAttributeUrl, lpszAttributeValue );
} // End of attribute value does not begin with a forward slash
} // End of attribute value is not an absolute url
// Update return value
bResult = TRUE;
} // End of attribute value is not empty
} // End of successfully got attribute value
// Free string memory
delete [] lpszAttributeValue;
return bResult;
} // End of function HtmlFileGetAttributeUrl
BOOL HtmlFileGetAttributeValue( LPCTSTR lpszTag, LPCTSTR lpszAttributeName, LPTSTR lpszAttributeValue )
{
BOOL bResult = FALSE;
int nOffset;
// Find attribute name in tag
nOffset = FindTextInString( lpszTag, lpszAttributeName );
// Ensure that attribute name was found in tag
if( nOffset >= 0 )
{
// Successfully found attribute name in tag
// Find first non-space character after attribute name
nOffset = GetNextNonSpace( lpszTag, ( nOffset + lstrlen( lpszAttributeName ) ) );
// Ensure that first non-space character after attribute name is an equals sign
if( lpszTag[ nOffset ] == ASCII_EQUALS_CHARACTER )
{
// First non-space character after attribute name is an equals sign
// Find first non-space character after equals sign
nOffset = GetNextNonSpace( lpszTag, ( nOffset + sizeof( char ) ) );
// Ensure that first non-space character after equals sign is an apostrophe or inverted comma
if( ( lpszTag[ nOffset ] == ASCII_APOSTROPHE_CHARACTER ) || ( lpszTag[ nOffset ] == ASCII_INVERTED_COMMA_CHARACTER ) )
{
// First non-space character after equals sign is an apostrophe or inverted comma
char cAttributeDelimiter;
LPTSTR lpszEndOfAttribute;
// Store attribute delimiter
cAttributeDelimiter = lpszTag[ nOffset ];
// Store attribute
lstrcpy( lpszAttributeValue, ( lpszTag + nOffset + sizeof( char ) ) );
// Find end of attribute
lpszEndOfAttribute = strchr( lpszAttributeValue, cAttributeDelimiter );
// Ensure that end of attribute was found
if( lpszEndOfAttribute )
{
// Successfully found end of attribute
// Terminate attribute
lpszEndOfAttribute[ 0 ] = ( char )NULL;
// Update return value
bResult = TRUE;
} // End of successfully found end of attribute
} // End of first non-space character after equals sign is an apostrophe or inverted comma
} // End of first non-space character after attribute name is an equals sign
} // End of successfully found attribute name in tag
return bResult;
} // End of function HtmlFileGetAttributeValue
BOOL HtmlFileGetTagName( LPCTSTR lpszTag, LPTSTR lpszTagName )
{
BOOL bResult = FALSE;
int nEndOfTagName;
// Find end of tag name
nEndOfTagName = strcspn( lpszTag, HTML_FILE_END_OF_TAG_NAME_CHARACTERS );
// Ensure that end of tag name was found
if( nEndOfTagName )
{
// Successfully found end of tag name
// <script>
// 01234567
// |
// Store tag name
strncpy( lpszTagName, ( lpszTag + sizeof( HTML_FILE_START_OF_TAG_CHARACTER ) ), ( nEndOfTagName - sizeof( HTML_FILE_START_OF_TAG_CHARACTER ) ) );
// Terminate tag name
lpszTagName[ nEndOfTagName - sizeof( HTML_FILE_START_OF_TAG_CHARACTER ) ] = ( char )NULL;
// Update return value
bResult = TRUE;
} // End of successfully found end of tag name
return bResult;
} // End of function HtmlFileLoad
BOOL HtmlFileIsTagName( LPCTSTR lpszTag, LPCTSTR lpszRequiredTagName )
{
BOOL bResult = FALSE;
// Allocate string memory
LPTSTR lpszTagName = new char[ STRING_LENGTH + sizeof( char ) ];
// Get tag name
if( HtmlFileGetTagName( lpszTag, lpszTagName ) )
{
// Successfully got tag name
// See if tag has the required name
if( lstrcmpi( lpszTagName, lpszRequiredTagName ) == 0 )
{
// Tag has the required name
// Update return value
bResult = TRUE;
} // End of tag has the required name
} // End of successfully got tag name
return bResult;
} // End of function HtmlFileIsTagName
BOOL HtmlFileLoad( LPCTSTR lpszFileName )
{
BOOL bResult = FALSE;
HANDLE hFile;
// Open file
hFile = CreateFile( lpszFileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL );
// Ensure that file was opened
if( hFile != INVALID_HANDLE_VALUE )
{
// Successfully opened file
DWORD dwFileSize;
// Get file size
dwFileSize = GetFileSize( hFile, NULL );
// Ensure that file size was got
if( dwFileSize != INVALID_FILE_SIZE )
{
// Successfully got file size
// Allocate string memory
g_lpszFileText = new char[ dwFileSize + sizeof( char ) ];
// Read file text
if( ReadFile( hFile, g_lpszFileText, dwFileSize, NULL, NULL ) )
{
// Successfully read file text
// Terminate file text
g_lpszFileText[ dwFileSize ] = ( char )NULL;
// Update return value
bResult = TRUE;
} // End of successfully read file text
else
{
// Unable to read file text
// Free string memory
delete [] g_lpszFileText;
} // End of unable to read file text
} // End of successfully got file size
// Close file
CloseHandle( hFile );
} // End of successfully opened file
return bResult;
} // End of function HtmlFileLoad
int HtmlFileProcessTags( LPCTSTR lpszRequiredTagName, LPCTSTR lpszRequiredAttributeName, LPCTSTR lpszAttributeMustContain, int( *lpTagFunction )( LPCTSTR lpszTag, LPCTSTR lpszRequiredTagName, LPCTSTR lpszRequiredAttributeName, LPCTSTR lpszAttributeMustContain ) )
{
int nResult = 0;
LPTSTR lpszStartOfTag;
LPTSTR lpszEndOfTag;
DWORD dwTagLength;
DWORD dwMaximumTagLength = STRING_LENGTH;
// Allocate string memory
LPTSTR lpszTag = new char[ dwMaximumTagLength + sizeof( char ) ];
// Find start of first tag
lpszStartOfTag = strchr( g_lpszFileText, HTML_FILE_START_OF_TAG_CHARACTER );
// Loop through all tags
while( lpszStartOfTag )
{
// Find end of tag
lpszEndOfTag = strchr( lpszStartOfTag, HTML_FILE_END_OF_TAG_CHARACTER );
// Ensure that end of tag was found
if( lpszEndOfTag )
{
// Successfully found end of tag
// Calculate tag length
dwTagLength = ( ( lpszEndOfTag - lpszStartOfTag ) + ( sizeof( char ) + sizeof( char ) ) );
// Ensure that tag length is not greater than maximum
if( dwTagLength > dwMaximumTagLength )
{
// Tag length is greater than maximum
// Free string memory
delete [] lpszTag;
// Update maximum tag length
dwMaximumTagLength = dwTagLength;
// Re-allocate string memory
lpszTag = new char[ dwMaximumTagLength + sizeof( char ) ];
} // End of tag length is greater than maximum
// Store tag
lstrcpyn( lpszTag, lpszStartOfTag, dwTagLength );
// Call tag function
if( ( *lpTagFunction )( lpszTag, lpszRequiredTagName, lpszRequiredAttributeName, lpszAttributeMustContain ) >= 0 )
{
// Successfully called tag function
// Update return value
nResult ++;
} // End of successfully called tag function
// Find start of next tag
lpszStartOfTag = strchr( lpszEndOfTag, HTML_FILE_START_OF_TAG_CHARACTER );
} // End of successfully found end of tag
else
{
// Unable to find end of tag
// Force exit from loop
lpszStartOfTag = NULL;
} // End of unable to find end of tag
}; // End of loop through all tags
// Free string memory
delete [] lpszTag;
return nResult;
} // End of function HtmlFileProcessTags