mirror of
https://github.com/simon987/antiword.git
synced 2025-04-10 13:06:41 +00:00
360 lines
7.7 KiB
C
360 lines
7.7 KiB
C
/*
|
|
* wordlib.c
|
|
* Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL
|
|
*
|
|
* Description:
|
|
* Deal with the internals of a MS Word file
|
|
*/
|
|
|
|
#include "antiword.h"
|
|
|
|
static BOOL bOldMacFile = FALSE;
|
|
|
|
|
|
/*
|
|
* Common part of the file checking functions
|
|
*/
|
|
static BOOL
|
|
bCheckBytes(FILE *pFile, const UCHAR *aucBytes, size_t tBytes)
|
|
{
|
|
int iIndex, iChar;
|
|
|
|
fail(pFile == NULL || aucBytes == NULL || tBytes == 0);
|
|
|
|
rewind(pFile);
|
|
|
|
for (iIndex = 0; iIndex < (int)tBytes; iIndex++) {
|
|
iChar = getc(pFile);
|
|
if (iChar == EOF || iChar != (int)aucBytes[iIndex]) {
|
|
NO_DBG_HEX(iChar);
|
|
NO_DBG_HEX(aucBytes[iIndex]);
|
|
return FALSE;
|
|
}
|
|
}
|
|
return TRUE;
|
|
} /* end of bCheckBytes */
|
|
|
|
/*
|
|
* This function checks whether the given file is or is not a "Word for DOS"
|
|
* document
|
|
*/
|
|
BOOL
|
|
bIsWordForDosFile(FILE *pFile, long lFilesize)
|
|
{
|
|
static UCHAR aucBytes[] =
|
|
{ 0x31, 0xbe, 0x00, 0x00, 0x00, 0xab }; /* Word for DOS */
|
|
|
|
DBG_MSG("bIsWordForDosFile");
|
|
|
|
if (pFile == NULL || lFilesize < 0) {
|
|
DBG_MSG("No proper file given");
|
|
return FALSE;
|
|
}
|
|
if (lFilesize < 128) {
|
|
DBG_MSG("File too small to be a Word document");
|
|
return FALSE;
|
|
}
|
|
return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
|
|
} /* end of bIsWordForDosFile */
|
|
|
|
/*
|
|
* This function checks whether the given file is or is not a file with an
|
|
* OLE envelope (That is a document made by Word 6 or later)
|
|
*/
|
|
static BOOL
|
|
bIsWordFileWithOLE(FILE *pFile, long lFilesize)
|
|
{
|
|
static UCHAR aucBytes[] =
|
|
{ 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
|
|
int iTailLen;
|
|
|
|
if (pFile == NULL || lFilesize < 0) {
|
|
DBG_MSG("No proper file given");
|
|
return FALSE;
|
|
}
|
|
if (lFilesize < (long)BIG_BLOCK_SIZE * 3) {
|
|
DBG_MSG("This file is too small to be a Word document");
|
|
return FALSE;
|
|
}
|
|
|
|
iTailLen = (int)(lFilesize % BIG_BLOCK_SIZE);
|
|
switch (iTailLen) {
|
|
case 0: /* No tail, as it should be */
|
|
break;
|
|
case 1:
|
|
case 2: /* Filesize mismatch or a buggy email program */
|
|
if ((int)(lFilesize % 3) == iTailLen) {
|
|
DBG_DEC(lFilesize);
|
|
return FALSE;
|
|
}
|
|
/*
|
|
* Ignore extra bytes caused by buggy email programs.
|
|
* They have bugs in their base64 encoding or decoding.
|
|
* 3 bytes -> 4 ascii chars -> 3 bytes
|
|
*/
|
|
DBG_MSG("Document with extra bytes");
|
|
break;
|
|
default: /* Wrong filesize for a Word document */
|
|
DBG_DEC(lFilesize);
|
|
DBG_DEC(iTailLen);
|
|
return FALSE;
|
|
}
|
|
return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
|
|
} /* end of bIsWordFileWithOLE */
|
|
|
|
/*
|
|
* This function checks whether the given file is or is not a RTF document
|
|
*/
|
|
BOOL
|
|
bIsRtfFile(FILE *pFile)
|
|
{
|
|
static UCHAR aucBytes[] =
|
|
{ '{', '\\', 'r', 't', 'f', '1' };
|
|
|
|
DBG_MSG("bIsRtfFile");
|
|
|
|
return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
|
|
} /* end of bIsRtfFile */
|
|
|
|
/*
|
|
* This function checks whether the given file is or is not a WP document
|
|
*/
|
|
BOOL
|
|
bIsWordPerfectFile(FILE *pFile)
|
|
{
|
|
static UCHAR aucBytes[] =
|
|
{ 0xff, 'W', 'P', 'C' };
|
|
|
|
DBG_MSG("bIsWordPerfectFile");
|
|
|
|
return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
|
|
} /* end of bIsWordPerfectFile */
|
|
|
|
/*
|
|
* This function checks whether the given file is or is not a "Win Word 1 or 2"
|
|
* document
|
|
*/
|
|
BOOL
|
|
bIsWinWord12File(FILE *pFile, long lFilesize)
|
|
{
|
|
static UCHAR aucBytes[2][4] = {
|
|
{ 0x9b, 0xa5, 0x21, 0x00 }, /* Win Word 1.x */
|
|
{ 0xdb, 0xa5, 0x2d, 0x00 }, /* Win Word 2.0 */
|
|
};
|
|
int iIndex;
|
|
|
|
DBG_MSG("bIsWinWord12File");
|
|
|
|
if (pFile == NULL || lFilesize < 0) {
|
|
DBG_MSG("No proper file given");
|
|
return FALSE;
|
|
}
|
|
if (lFilesize < 384) {
|
|
DBG_MSG("This file is too small to be a Word document");
|
|
return FALSE;
|
|
}
|
|
|
|
for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
|
|
if (bCheckBytes(pFile,
|
|
aucBytes[iIndex],
|
|
elementsof(aucBytes[iIndex]))) {
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
} /* end of bIsWinWord12File */
|
|
|
|
/*
|
|
* This function checks whether the given file is or is not a "Mac Word 4 or 5"
|
|
* document
|
|
*/
|
|
BOOL
|
|
bIsMacWord45File(FILE *pFile)
|
|
{
|
|
static UCHAR aucBytes[2][6] = {
|
|
{ 0xfe, 0x37, 0x00, 0x1c, 0x00, 0x00 }, /* Mac Word 4 */
|
|
{ 0xfe, 0x37, 0x00, 0x23, 0x00, 0x00 }, /* Mac Word 5 */
|
|
};
|
|
int iIndex;
|
|
|
|
DBG_MSG("bIsMacWord45File");
|
|
|
|
for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
|
|
if (bCheckBytes(pFile,
|
|
aucBytes[iIndex],
|
|
elementsof(aucBytes[iIndex]))) {
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
} /* end of bIsMacWord45File */
|
|
|
|
/*
|
|
* iGuessVersionNumber - guess the Word version number from first few bytes
|
|
*
|
|
* Returns the guessed version number or -1 when no guess it possible
|
|
*/
|
|
int
|
|
iGuessVersionNumber(FILE *pFile, long lFilesize)
|
|
{
|
|
if(bIsWordForDosFile(pFile, lFilesize)) {
|
|
return 0;
|
|
}
|
|
if (bIsWinWord12File(pFile, lFilesize)) {
|
|
return 2;
|
|
}
|
|
if (bIsMacWord45File(pFile)) {
|
|
return 5;
|
|
}
|
|
if (bIsWordFileWithOLE(pFile, lFilesize)) {
|
|
return 6;
|
|
}
|
|
return -1;
|
|
} /* end of iGuessVersionNumber */
|
|
|
|
/*
|
|
* iGetVersionNumber - get the Word version number from the header
|
|
*
|
|
* Returns the version number or -1 when unknown
|
|
*/
|
|
int
|
|
iGetVersionNumber(const UCHAR *aucHeader)
|
|
{
|
|
USHORT usFib, usChse;
|
|
|
|
usFib = usGetWord(0x02, aucHeader);
|
|
if (usFib >= 0x1000) {
|
|
/* To big: must be MacWord using Big Endian */
|
|
DBG_HEX(usFib);
|
|
usFib = usGetWordBE(0x02, aucHeader);
|
|
}
|
|
DBG_DEC(usFib);
|
|
bOldMacFile = FALSE;
|
|
switch (usFib) {
|
|
case 0:
|
|
DBG_MSG("Word for DOS");
|
|
return 0;
|
|
case 28:
|
|
DBG_MSG("Word 4 for Macintosh");
|
|
bOldMacFile = TRUE;
|
|
return 4;
|
|
case 33:
|
|
DBG_MSG("Word 1.x for Windows");
|
|
return 1;
|
|
case 35:
|
|
DBG_MSG("Word 5 for Macintosh");
|
|
bOldMacFile = TRUE;
|
|
return 5;
|
|
case 45:
|
|
DBG_MSG("Word 2 for Windows");
|
|
return 2;
|
|
case 101:
|
|
case 102:
|
|
DBG_MSG("Word 6 for Windows");
|
|
return 6;
|
|
case 103:
|
|
case 104:
|
|
usChse = usGetWord(0x14, aucHeader);
|
|
DBG_DEC(usChse);
|
|
switch (usChse) {
|
|
case 0:
|
|
DBG_MSG("Word 7 for Win95");
|
|
return 7;
|
|
case 256:
|
|
DBG_MSG("Word 6 for Macintosh");
|
|
bOldMacFile = TRUE;
|
|
return 6;
|
|
default:
|
|
DBG_FIXME();
|
|
if ((int)ucGetByte(0x05, aucHeader) == 0xe0) {
|
|
DBG_MSG("Word 7 for Win95");
|
|
return 7;
|
|
}
|
|
DBG_MSG("Word 6 for Macintosh");
|
|
bOldMacFile = TRUE;
|
|
return 6;
|
|
}
|
|
default:
|
|
usChse = usGetWord(0x14, aucHeader);
|
|
DBG_DEC(usChse);
|
|
if (usFib < 192) {
|
|
/* Unknown or unsupported version of Word */
|
|
DBG_DEC(usFib);
|
|
return -1;
|
|
}
|
|
DBG_MSG_C(usChse != 256, "Word97 for Win95/98/NT");
|
|
DBG_MSG_C(usChse == 256, "Word98 for Macintosh");
|
|
return 8;
|
|
}
|
|
} /* end of iGetVersionNumber */
|
|
|
|
/*
|
|
* TRUE if the current file was made by Word version 6 or older on an
|
|
* Apple Macintosh, otherwise FALSE.
|
|
* This function hides the methode of how to find out from the rest of the
|
|
* program.
|
|
*/
|
|
BOOL
|
|
bIsOldMacFile(void)
|
|
{
|
|
return bOldMacFile;
|
|
} /* end of bIsOldMacFile */
|
|
|
|
/*
|
|
* iInitDocument - initialize a document
|
|
*
|
|
* Returns the version of Word that made the document or -1
|
|
*/
|
|
int
|
|
iInitDocument(FILE *pFile, long lFilesize)
|
|
{
|
|
int iGuess, iWordVersion;
|
|
|
|
iGuess = iGuessVersionNumber(pFile, lFilesize);
|
|
switch (iGuess) {
|
|
case 0:
|
|
iWordVersion = iInitDocumentDOS(pFile, lFilesize);
|
|
break;
|
|
case 2:
|
|
iWordVersion = iInitDocumentWIN(pFile, lFilesize);
|
|
break;
|
|
case 5:
|
|
iWordVersion = iInitDocumentMAC(pFile, lFilesize);
|
|
break;
|
|
case 6:
|
|
iWordVersion = iInitDocumentOLE(pFile, lFilesize);
|
|
break;
|
|
default:
|
|
DBG_DEC(iGuess);
|
|
iWordVersion = -1;
|
|
break;
|
|
}
|
|
return iWordVersion;
|
|
} /* end of iInitDocument */
|
|
|
|
/*
|
|
* vFreeDocument - free a document by free-ing its parts
|
|
*/
|
|
void
|
|
vFreeDocument(void)
|
|
{
|
|
DBG_MSG("vFreeDocument");
|
|
|
|
/* Free the memory */
|
|
vDestroyTextBlockList();
|
|
vDestroyDataBlockList();
|
|
vDestroyListInfoList();
|
|
vDestroyRowInfoList();
|
|
vDestroyStyleInfoList();
|
|
vDestroyFontInfoList();
|
|
vDestroyStylesheetList();
|
|
vDestroyPictInfoList();
|
|
vDestroyDocumentInfoList();
|
|
vDestroySectionInfoList();
|
|
vDestroyHdrFtrInfoList();
|
|
vDestroyPropModList();
|
|
vDestroyNotesInfoLists();
|
|
vDestroyFontTable();
|
|
vDestroySummaryInfo();
|
|
} /* end of vFreeDocument */
|