mirror of
https://github.com/simon987/antiword.git
synced 2025-04-10 13:06:41 +00:00
488 lines
12 KiB
C
488 lines
12 KiB
C
/*
|
|
* stylelist.c
|
|
* Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
|
|
*
|
|
* Description:
|
|
* Build, read and destroy a list of Word style information
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <stddef.h>
|
|
#include <ctype.h>
|
|
#include "antiword.h"
|
|
|
|
|
|
/*
|
|
* Private structure to hide the way the information
|
|
* is stored from the rest of the program
|
|
*/
|
|
typedef struct style_mem_tag {
|
|
style_block_type tInfo;
|
|
ULONG ulSequenceNumber;
|
|
struct style_mem_tag *pNext;
|
|
} style_mem_type;
|
|
|
|
/* Variables needed to write the Style Information List */
|
|
static style_mem_type *pAnchor = NULL;
|
|
static style_mem_type *pStyleLast = NULL;
|
|
/* The type of conversion */
|
|
static conversion_type eConversionType = conversion_unknown;
|
|
/* The character set encoding */
|
|
static encoding_type eEncoding = encoding_neutral;
|
|
/* Values for efficiency reasons */
|
|
static const style_mem_type *pMidPtr = NULL;
|
|
static BOOL bMoveMidPtr = FALSE;
|
|
static BOOL bInSequence = TRUE;
|
|
|
|
|
|
/*
|
|
* vDestroyStyleInfoList - destroy the Style Information List
|
|
*/
|
|
void
|
|
vDestroyStyleInfoList(void)
|
|
{
|
|
style_mem_type *pCurr, *pNext;
|
|
|
|
DBG_MSG("vDestroyStyleInfoList");
|
|
|
|
/* Free the Style Information List */
|
|
pCurr = pAnchor;
|
|
while (pCurr != NULL) {
|
|
pNext = pCurr->pNext;
|
|
pCurr = xfree(pCurr);
|
|
pCurr = pNext;
|
|
}
|
|
pAnchor = NULL;
|
|
/* Reset all control variables */
|
|
pStyleLast = NULL;
|
|
pMidPtr = NULL;
|
|
bMoveMidPtr = FALSE;
|
|
bInSequence = TRUE;
|
|
} /* end of vDestroyStyleInfoList */
|
|
|
|
/*
|
|
* vConvertListCharacter - convert the list character
|
|
*/
|
|
static void
|
|
vConvertListCharacter(UCHAR ucNFC, USHORT usListChar, char *szListChar)
|
|
{
|
|
options_type tOptions;
|
|
size_t tLen;
|
|
|
|
fail(szListChar == NULL);
|
|
fail(szListChar[0] != '\0');
|
|
|
|
if (usListChar < 0x80 && isprint((int)usListChar)) {
|
|
DBG_CHR_C(isalnum((int)usListChar), usListChar);
|
|
szListChar[0] = (char)usListChar;
|
|
szListChar[1] = '\0';
|
|
return;
|
|
}
|
|
|
|
if (ucNFC != LIST_SPECIAL &&
|
|
ucNFC != LIST_SPECIAL2 &&
|
|
ucNFC != LIST_BULLETS) {
|
|
szListChar[0] = '.';
|
|
szListChar[1] = '\0';
|
|
return;
|
|
}
|
|
|
|
if (eConversionType == conversion_unknown ||
|
|
eEncoding == encoding_neutral) {
|
|
vGetOptions(&tOptions);
|
|
eConversionType = tOptions.eConversionType;
|
|
eEncoding = tOptions.eEncoding;
|
|
}
|
|
|
|
switch (usListChar) {
|
|
case 0x0000: case 0x00b7: case 0x00fe: case 0xf021: case 0xf043:
|
|
case 0xf06c: case 0xf093: case 0xf0b7:
|
|
usListChar = 0x2022; /* BULLET */
|
|
break;
|
|
case 0x0096: case 0xf02d:
|
|
usListChar = 0x2013; /* EN DASH */
|
|
break;
|
|
case 0x00a8:
|
|
usListChar = 0x2666; /* BLACK DIAMOND SUIT */
|
|
break;
|
|
case 0x00de:
|
|
usListChar = 0x21d2; /* RIGHTWARDS DOUBLE ARROW */
|
|
break;
|
|
case 0x00e0: case 0xf074:
|
|
usListChar = 0x25ca; /* LOZENGE */
|
|
break;
|
|
case 0x00e1:
|
|
usListChar = 0x2329; /* LEFT ANGLE BRACKET */
|
|
break;
|
|
case 0xf020:
|
|
usListChar = 0x0020; /* SPACE */
|
|
break;
|
|
case 0xf041:
|
|
usListChar = 0x270c; /* VICTORY HAND */
|
|
break;
|
|
case 0xf066:
|
|
usListChar = 0x03d5; /* GREEK PHI SYMBOL */
|
|
break;
|
|
case 0xf06e:
|
|
usListChar = 0x25a0; /* BLACK SQUARE */
|
|
break;
|
|
case 0xf06f: case 0xf070: case 0xf0a8:
|
|
usListChar = 0x25a1; /* WHITE SQUARE */
|
|
break;
|
|
case 0xf071:
|
|
usListChar = 0x2751; /* LOWER RIGHT SHADOWED WHITE SQUARE */
|
|
break;
|
|
case 0xf075: case 0xf077:
|
|
usListChar = 0x25c6; /* BLACK DIAMOND */
|
|
break;
|
|
case 0xf076:
|
|
usListChar = 0x2756; /* BLACK DIAMOND MINUS WHITE X */
|
|
break;
|
|
case 0xf0a7:
|
|
usListChar = 0x25aa; /* BLACK SMALL SQUARE */
|
|
break;
|
|
case 0xf0d8:
|
|
usListChar = 0x27a2; /* RIGHTWARDS ARROWHEAD */
|
|
break;
|
|
case 0xf0e5:
|
|
usListChar = 0x2199; /* SOUTH WEST ARROW */
|
|
break;
|
|
case 0xf0f0:
|
|
usListChar = 0x21e8; /* RIGHTWARDS WHITE ARROW */
|
|
break;
|
|
case 0xf0fc:
|
|
usListChar = 0x2713; /* CHECK MARK */
|
|
break;
|
|
default:
|
|
if ((usListChar >= 0xe000 && usListChar < 0xf900) ||
|
|
(usListChar < 0x80 && !isprint((int)usListChar))) {
|
|
/*
|
|
* All remaining private area characters and all
|
|
* remaining non-printable ASCII characters to their
|
|
* default bullet character
|
|
*/
|
|
DBG_HEX(usListChar);
|
|
DBG_FIXME();
|
|
if (ucNFC == LIST_SPECIAL || ucNFC == LIST_SPECIAL2) {
|
|
usListChar = 0x2190; /* LEFTWARDS ARROW */
|
|
} else {
|
|
usListChar = 0x2022; /* BULLET */
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (eEncoding == encoding_utf_8) {
|
|
tLen = tUcs2Utf8(usListChar, szListChar, 4);
|
|
szListChar[tLen] = '\0';
|
|
} else {
|
|
switch (usListChar) {
|
|
case 0x03d5: case 0x25a1: case 0x25c6: case 0x25ca:
|
|
case 0x2751:
|
|
szListChar[0] = 'o';
|
|
break;
|
|
case 0x2013: case 0x2500:
|
|
szListChar[0] = '-';
|
|
break;
|
|
case 0x2190: case 0x2199: case 0x2329:
|
|
szListChar[0] = '<';
|
|
break;
|
|
case 0x21d2:
|
|
szListChar[0] = '=';
|
|
break;
|
|
case 0x21e8: case 0x27a2:
|
|
szListChar[0] = '>';
|
|
break;
|
|
case 0x25a0: case 0x25aa:
|
|
szListChar[0] = '.';
|
|
break;
|
|
case 0x2666:
|
|
szListChar[0] = OUR_DIAMOND;
|
|
break;
|
|
case 0x270c:
|
|
szListChar[0] = 'x';
|
|
break;
|
|
case 0x2713:
|
|
szListChar[0] = 'V';
|
|
break;
|
|
case 0x2756:
|
|
szListChar[0] = '*';
|
|
break;
|
|
case 0x2022:
|
|
default:
|
|
vGetBulletValue(eConversionType, eEncoding,
|
|
szListChar, 2);
|
|
break;
|
|
}
|
|
tLen = 1;
|
|
}
|
|
szListChar[tLen] = '\0';
|
|
} /* end of vConvertListCharacter */
|
|
|
|
/*
|
|
* eGetNumType - get the level type from the given level number
|
|
*
|
|
* Returns the level type
|
|
*/
|
|
level_type_enum
|
|
eGetNumType(UCHAR ucNumLevel)
|
|
{
|
|
switch (ucNumLevel) {
|
|
case 1: case 2: case 3: case 4: case 5:
|
|
case 6: case 7: case 8: case 9:
|
|
return level_type_outline;
|
|
case 10:
|
|
return level_type_numbering;
|
|
case 11:
|
|
return level_type_sequence;
|
|
case 12:
|
|
return level_type_pause;
|
|
default:
|
|
return level_type_none;
|
|
}
|
|
} /* end of eGetNumType */
|
|
|
|
/*
|
|
* vCorrectStyleValues - correct style values that Antiword can't use
|
|
*/
|
|
void
|
|
vCorrectStyleValues(style_block_type *pStyleBlock)
|
|
{
|
|
if (pStyleBlock->usBeforeIndent > 0x7fff) {
|
|
pStyleBlock->usBeforeIndent = 0;
|
|
} else if (pStyleBlock->usBeforeIndent > 2160) {
|
|
/* 2160 twips = 1.5 inches or 38.1 mm */
|
|
DBG_DEC(pStyleBlock->usBeforeIndent);
|
|
pStyleBlock->usBeforeIndent = 2160;
|
|
}
|
|
if (pStyleBlock->usIstd >= 1 &&
|
|
pStyleBlock->usIstd <= 9 &&
|
|
pStyleBlock->usBeforeIndent < HEADING_GAP) {
|
|
NO_DBG_DEC(pStyleBlock->usBeforeIndent);
|
|
pStyleBlock->usBeforeIndent = HEADING_GAP;
|
|
}
|
|
|
|
if (pStyleBlock->usAfterIndent > 0x7fff) {
|
|
pStyleBlock->usAfterIndent = 0;
|
|
} else if (pStyleBlock->usAfterIndent > 2160) {
|
|
/* 2160 twips = 1.5 inches or 38.1 mm */
|
|
DBG_DEC(pStyleBlock->usAfterIndent);
|
|
pStyleBlock->usAfterIndent = 2160;
|
|
}
|
|
if (pStyleBlock->usIstd >= 1 &&
|
|
pStyleBlock->usIstd <= 9 &&
|
|
pStyleBlock->usAfterIndent < HEADING_GAP) {
|
|
NO_DBG_DEC(pStyleBlock->usAfterIndent);
|
|
pStyleBlock->usAfterIndent = HEADING_GAP;
|
|
}
|
|
|
|
if (pStyleBlock->sLeftIndent < 0) {
|
|
pStyleBlock->sLeftIndent = 0;
|
|
}
|
|
if (pStyleBlock->sRightIndent > 0) {
|
|
pStyleBlock->sRightIndent = 0;
|
|
}
|
|
vConvertListCharacter(pStyleBlock->ucNFC,
|
|
pStyleBlock->usListChar,
|
|
pStyleBlock->szListChar);
|
|
} /* end of vCorrectStyleValues */
|
|
|
|
/*
|
|
* vAdd2StyleInfoList - Add an element to the Style Information List
|
|
*/
|
|
void
|
|
vAdd2StyleInfoList(const style_block_type *pStyleBlock)
|
|
{
|
|
style_mem_type *pListMember;
|
|
|
|
fail(pStyleBlock == NULL);
|
|
|
|
NO_DBG_MSG("bAdd2StyleInfoList");
|
|
|
|
if (pStyleBlock->ulFileOffset == FC_INVALID) {
|
|
NO_DBG_DEC(pStyleBlock->usIstd);
|
|
return;
|
|
}
|
|
|
|
NO_DBG_HEX(pStyleBlock->ulFileOffset);
|
|
NO_DBG_DEC_C(pStyleBlock->sLeftIndent != 0,
|
|
pStyleBlock->sLeftIndent);
|
|
NO_DBG_DEC_C(pStyleBlock->sRightIndent != 0,
|
|
pStyleBlock->sRightIndent);
|
|
NO_DBG_DEC_C(pStyleBlock->bNumPause, pStyleBlock->bNumPause);
|
|
NO_DBG_DEC_C(pStyleBlock->usIstd != 0, pStyleBlock->usIstd);
|
|
NO_DBG_DEC_C(pStyleBlock->usStartAt != 1, pStyleBlock->usStartAt);
|
|
NO_DBG_DEC_C(pStyleBlock->usAfterIndent != 0,
|
|
pStyleBlock->usAfterIndent);
|
|
NO_DBG_DEC_C(pStyleBlock->ucAlignment != 0, pStyleBlock->ucAlignment);
|
|
NO_DBG_DEC(pStyleBlock->ucNFC);
|
|
NO_DBG_HEX(pStyleBlock->usListChar);
|
|
|
|
if (pStyleLast != NULL &&
|
|
pStyleLast->tInfo.ulFileOffset == pStyleBlock->ulFileOffset) {
|
|
/*
|
|
* If two consecutive styles share the same
|
|
* offset, remember only the last style
|
|
*/
|
|
fail(pStyleLast->pNext != NULL);
|
|
pStyleLast->tInfo = *pStyleBlock;
|
|
/* Correct the values where needed */
|
|
vCorrectStyleValues(&pStyleLast->tInfo);
|
|
return;
|
|
}
|
|
|
|
/* Create list member */
|
|
pListMember = xmalloc(sizeof(style_mem_type));
|
|
/* Fill the list member */
|
|
pListMember->tInfo = *pStyleBlock;
|
|
pListMember->pNext = NULL;
|
|
/* Add the sequence number */
|
|
pListMember->ulSequenceNumber =
|
|
ulGetSeqNumber(pListMember->tInfo.ulFileOffset);
|
|
/* Correct the values where needed */
|
|
vCorrectStyleValues(&pListMember->tInfo);
|
|
/* Add the new member to the list */
|
|
if (pAnchor == NULL) {
|
|
pAnchor = pListMember;
|
|
/* For efficiency */
|
|
pMidPtr = pAnchor;
|
|
bMoveMidPtr = FALSE;
|
|
bInSequence = TRUE;
|
|
} else {
|
|
fail(pStyleLast == NULL);
|
|
pStyleLast->pNext = pListMember;
|
|
/* For efficiency */
|
|
if (bMoveMidPtr) {
|
|
pMidPtr = pMidPtr->pNext;
|
|
bMoveMidPtr = FALSE;
|
|
} else {
|
|
bMoveMidPtr = TRUE;
|
|
}
|
|
if (bInSequence) {
|
|
bInSequence = pListMember->ulSequenceNumber >
|
|
pStyleLast->ulSequenceNumber;
|
|
}
|
|
}
|
|
pStyleLast = pListMember;
|
|
} /* end of vAdd2StyleInfoList */
|
|
|
|
/*
|
|
* Get the record that follows the given recored in the Style Information List
|
|
*/
|
|
const style_block_type *
|
|
pGetNextStyleInfoListItem(const style_block_type *pCurr)
|
|
{
|
|
const style_mem_type *pRecord;
|
|
size_t tOffset;
|
|
|
|
if (pCurr == NULL) {
|
|
if (pAnchor == NULL) {
|
|
/* There are no records */
|
|
return NULL;
|
|
}
|
|
/* The first record is the only one without a predecessor */
|
|
return &pAnchor->tInfo;
|
|
}
|
|
tOffset = offsetof(style_mem_type, tInfo);
|
|
/* Many casts to prevent alignment warnings */
|
|
pRecord = (style_mem_type *)(void *)((char *)pCurr - tOffset);
|
|
fail(pCurr != &pRecord->tInfo);
|
|
if (pRecord->pNext == NULL) {
|
|
/* The last record has no successor */
|
|
return NULL;
|
|
}
|
|
return &pRecord->pNext->tInfo;
|
|
} /* end of pGetNextStyleInfoListItem */
|
|
|
|
/*
|
|
* Get the next text style
|
|
*/
|
|
const style_block_type *
|
|
pGetNextTextStyle(const style_block_type *pCurr)
|
|
{
|
|
const style_block_type *pRecord;
|
|
|
|
pRecord = pCurr;
|
|
do {
|
|
pRecord = pGetNextStyleInfoListItem(pRecord);
|
|
} while (pRecord != NULL &&
|
|
(pRecord->eListID == hdrftr_list ||
|
|
pRecord->eListID == macro_list ||
|
|
pRecord->eListID == annotation_list));
|
|
return pRecord;
|
|
} /* end of pGetNextTextStyle */
|
|
|
|
/*
|
|
* usGetIstd - get the istd that belongs to the given file offset
|
|
*/
|
|
USHORT
|
|
usGetIstd(ULONG ulFileOffset)
|
|
{
|
|
const style_mem_type *pCurr, *pBest, *pStart;
|
|
ULONG ulSeq, ulBest;
|
|
|
|
ulSeq = ulGetSeqNumber(ulFileOffset);
|
|
if (ulSeq == FC_INVALID) {
|
|
return ISTD_NORMAL;
|
|
}
|
|
NO_DBG_HEX(ulFileOffset);
|
|
NO_DBG_DEC(ulSeq);
|
|
|
|
if (bInSequence &&
|
|
pMidPtr != NULL &&
|
|
ulSeq > pMidPtr->ulSequenceNumber) {
|
|
/* The istd is in the second half of the chained list */
|
|
pStart = pMidPtr;
|
|
} else {
|
|
pStart = pAnchor;
|
|
}
|
|
|
|
pBest = NULL;
|
|
ulBest = 0;
|
|
for (pCurr = pStart; pCurr != NULL; pCurr = pCurr->pNext) {
|
|
if (pCurr->ulSequenceNumber != FC_INVALID &&
|
|
(pBest == NULL || pCurr->ulSequenceNumber > ulBest) &&
|
|
pCurr->ulSequenceNumber <= ulSeq) {
|
|
pBest = pCurr;
|
|
ulBest = pCurr->ulSequenceNumber;
|
|
}
|
|
if (bInSequence && pCurr->ulSequenceNumber > ulSeq) {
|
|
break;
|
|
}
|
|
}
|
|
NO_DBG_DEC(ulBest);
|
|
|
|
if (pBest == NULL) {
|
|
return ISTD_NORMAL;
|
|
}
|
|
|
|
NO_DBG_DEC(pBest->tInfo.usIstd);
|
|
return pBest->tInfo.usIstd;
|
|
} /* end of usGetIstd */
|
|
|
|
/*
|
|
* bStyleImpliesList - does style info implies being part of a list
|
|
*
|
|
* Decide whether the style information implies that the given paragraph is
|
|
* part of a list
|
|
*
|
|
* Returns TRUE when the paragraph is part of a list, otherwise FALSE
|
|
*/
|
|
BOOL
|
|
bStyleImpliesList(const style_block_type *pStyle, int iWordVersion)
|
|
{
|
|
fail(pStyle == NULL);
|
|
fail(iWordVersion < 0);
|
|
|
|
if (pStyle->usIstd >= 1 && pStyle->usIstd <= 9) {
|
|
/* These are heading levels */
|
|
return FALSE;
|
|
}
|
|
if (iWordVersion < 8) {
|
|
/* Check for old style lists */
|
|
return pStyle->ucNumLevel != 0;
|
|
}
|
|
/* Check for new style lists */
|
|
return pStyle->usListIndex != 0;
|
|
} /* end of bStyleImpliesList */
|