mirror of
https://github.com/simon987/antiword.git
synced 2025-04-10 13:06:41 +00:00
210 lines
5.5 KiB
C
210 lines
5.5 KiB
C
/*
|
|
* wordwin.c
|
|
* Copyright (C) 2002-2005 A.J. van Os; Released under GPL
|
|
*
|
|
* Description:
|
|
* Deal with the WIN internals of a MS Word file
|
|
*/
|
|
|
|
#include "antiword.h"
|
|
|
|
|
|
/*
|
|
* bGetDocumentText - make a list of the text blocks of a Word document
|
|
*
|
|
* Return TRUE when succesful, otherwise FALSE
|
|
*/
|
|
static BOOL
|
|
bGetDocumentText(FILE *pFile, const UCHAR *aucHeader)
|
|
{
|
|
text_block_type tTextBlock;
|
|
ULONG ulBeginOfText;
|
|
ULONG ulTextLen, ulFootnoteLen;
|
|
ULONG ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
|
|
UINT uiQuickSaves;
|
|
USHORT usDocStatus;
|
|
BOOL bTemplate, bFastSaved, bEncrypted, bSuccess;
|
|
|
|
fail(pFile == NULL);
|
|
fail(aucHeader == NULL);
|
|
|
|
DBG_MSG("bGetDocumentText");
|
|
|
|
/* Get the status flags from the header */
|
|
usDocStatus = usGetWord(0x0a, aucHeader);
|
|
DBG_HEX(usDocStatus);
|
|
bTemplate = (usDocStatus & BIT(0)) != 0;
|
|
DBG_MSG_C(bTemplate, "This document is a Template");
|
|
bFastSaved = (usDocStatus & BIT(2)) != 0;
|
|
uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
|
|
DBG_MSG_C(bFastSaved, "This document is Fast Saved");
|
|
DBG_DEC_C(bFastSaved, uiQuickSaves);
|
|
if (bFastSaved) {
|
|
werr(0, "Word2: fast saved documents are not supported yet");
|
|
return FALSE;
|
|
}
|
|
bEncrypted = (usDocStatus & BIT(8)) != 0;
|
|
if (bEncrypted) {
|
|
werr(0, "Encrypted documents are not supported");
|
|
return FALSE;
|
|
}
|
|
|
|
/* Get length information */
|
|
ulBeginOfText = ulGetLong(0x18, aucHeader);
|
|
DBG_HEX(ulBeginOfText);
|
|
ulTextLen = ulGetLong(0x34, aucHeader);
|
|
ulFootnoteLen = ulGetLong(0x38, aucHeader);
|
|
ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
|
|
ulMacroLen = ulGetLong(0x40, aucHeader);
|
|
ulAnnotationLen = ulGetLong(0x44, aucHeader);
|
|
DBG_DEC(ulTextLen);
|
|
DBG_DEC(ulFootnoteLen);
|
|
DBG_DEC(ulHdrFtrLen);
|
|
DBG_DEC(ulMacroLen);
|
|
DBG_DEC(ulAnnotationLen);
|
|
if (bFastSaved) {
|
|
bSuccess = FALSE;
|
|
} else {
|
|
tTextBlock.ulFileOffset = ulBeginOfText;
|
|
tTextBlock.ulCharPos = ulBeginOfText;
|
|
tTextBlock.ulLength = ulTextLen +
|
|
ulFootnoteLen +
|
|
ulHdrFtrLen + ulMacroLen + ulAnnotationLen;
|
|
tTextBlock.bUsesUnicode = FALSE;
|
|
tTextBlock.usPropMod = IGNORE_PROPMOD;
|
|
bSuccess = bAdd2TextBlockList(&tTextBlock);
|
|
DBG_HEX_C(!bSuccess, tTextBlock.ulFileOffset);
|
|
DBG_HEX_C(!bSuccess, tTextBlock.ulCharPos);
|
|
DBG_DEC_C(!bSuccess, tTextBlock.ulLength);
|
|
DBG_DEC_C(!bSuccess, tTextBlock.bUsesUnicode);
|
|
DBG_DEC_C(!bSuccess, tTextBlock.usPropMod);
|
|
}
|
|
|
|
if (bSuccess) {
|
|
vSplitBlockList(pFile,
|
|
ulTextLen,
|
|
ulFootnoteLen,
|
|
ulHdrFtrLen,
|
|
ulMacroLen,
|
|
ulAnnotationLen,
|
|
0,
|
|
0,
|
|
0,
|
|
FALSE);
|
|
} else {
|
|
vDestroyTextBlockList();
|
|
werr(0, "I can't find the text of this document");
|
|
}
|
|
return bSuccess;
|
|
} /* end of bGetDocumentText */
|
|
|
|
/*
|
|
* vGetDocumentData - make a list of the data blocks of a Word document
|
|
*/
|
|
static void
|
|
vGetDocumentData(FILE *pFile, const UCHAR *aucHeader)
|
|
{
|
|
data_block_type tDataBlock;
|
|
options_type tOptions;
|
|
ULONG ulEndOfText, ulBeginCharInfo;
|
|
BOOL bFastSaved, bHasImages, bSuccess;
|
|
USHORT usDocStatus;
|
|
|
|
/* Get the options */
|
|
vGetOptions(&tOptions);
|
|
|
|
/* Get the status flags from the header */
|
|
usDocStatus = usGetWord(0x0a, aucHeader);
|
|
DBG_HEX(usDocStatus);
|
|
bFastSaved = (usDocStatus & BIT(2)) != 0;
|
|
bHasImages = (usDocStatus & BIT(3)) != 0;
|
|
|
|
if (!bHasImages ||
|
|
tOptions.eConversionType == conversion_text ||
|
|
tOptions.eConversionType == conversion_fmt_text ||
|
|
tOptions.eConversionType == conversion_xml ||
|
|
tOptions.eImageLevel == level_no_images) {
|
|
/*
|
|
* No images in the document or text-only output or
|
|
* no images wanted, so no data blocks will be needed
|
|
*/
|
|
vDestroyDataBlockList();
|
|
return;
|
|
}
|
|
|
|
if (bFastSaved) {
|
|
bSuccess = FALSE;
|
|
} else {
|
|
/* This datablock is too big, but it contains all images */
|
|
ulEndOfText = ulGetLong(0x1c, aucHeader);
|
|
DBG_HEX(ulEndOfText);
|
|
ulBeginCharInfo = ulGetLong(0xa0, aucHeader);
|
|
DBG_HEX(ulBeginCharInfo);
|
|
if (ulBeginCharInfo > ulEndOfText) {
|
|
tDataBlock.ulFileOffset = ulEndOfText;
|
|
tDataBlock.ulDataPos = ulEndOfText;
|
|
tDataBlock.ulLength = ulBeginCharInfo - ulEndOfText;
|
|
bSuccess = bAdd2DataBlockList(&tDataBlock);
|
|
DBG_HEX_C(!bSuccess, tDataBlock.ulFileOffset);
|
|
DBG_HEX_C(!bSuccess, tDataBlock.ulDataPos);
|
|
DBG_DEC_C(!bSuccess, tDataBlock.ulLength);
|
|
} else {
|
|
bSuccess = ulBeginCharInfo == ulEndOfText;
|
|
}
|
|
}
|
|
|
|
if (!bSuccess) {
|
|
vDestroyDataBlockList();
|
|
werr(0, "I can't find the data of this document");
|
|
}
|
|
} /* end of vGetDocumentData */
|
|
|
|
/*
|
|
* iInitDocumentWIN - initialize an WIN document
|
|
*
|
|
* Returns the version of Word that made the document or -1
|
|
*/
|
|
int
|
|
iInitDocumentWIN(FILE *pFile, long lFilesize)
|
|
{
|
|
int iWordVersion;
|
|
BOOL bSuccess;
|
|
USHORT usIdent;
|
|
UCHAR aucHeader[384];
|
|
|
|
fail(pFile == NULL);
|
|
|
|
if (lFilesize < 384) {
|
|
return -1;
|
|
}
|
|
|
|
/* Read the headerblock */
|
|
if (!bReadBytes(aucHeader, 384, 0x00, pFile)) {
|
|
return -1;
|
|
}
|
|
/* Get the "magic number" from the header */
|
|
usIdent = usGetWord(0x00, aucHeader);
|
|
DBG_HEX(usIdent);
|
|
fail(usIdent != 0xa59b && /* WinWord 1.x */
|
|
usIdent != 0xa5db); /* WinWord 2.0 */
|
|
iWordVersion = iGetVersionNumber(aucHeader);
|
|
if (iWordVersion != 1 && iWordVersion != 2) {
|
|
werr(0, "This file is not from ''Win Word 1 or 2'.");
|
|
return -1;
|
|
}
|
|
bSuccess = bGetDocumentText(pFile, aucHeader);
|
|
if (bSuccess) {
|
|
vGetDocumentData(pFile, aucHeader);
|
|
vGetPropertyInfo(pFile, NULL,
|
|
NULL, 0, NULL, 0,
|
|
aucHeader, iWordVersion);
|
|
vSetDefaultTabWidth(pFile, NULL,
|
|
NULL, 0, NULL, 0,
|
|
aucHeader, iWordVersion);
|
|
vGetNotesInfo(pFile, NULL,
|
|
NULL, 0, NULL, 0,
|
|
aucHeader, iWordVersion);
|
|
}
|
|
return bSuccess ? iWordVersion : -1;
|
|
} /* end of iInitDocumentWIN */
|