Search in sources :

Example 1 with SprmOperation

use of org.apache.poi.hwpf.sprm.SprmOperation in project poi by apache.

the class CHPBinTable method rebuild.

public void rebuild(ComplexFileTable complexFileTable) {
    long start = System.currentTimeMillis();
    if (complexFileTable != null) {
        SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
        // adding CHPX from fast-saved SPRMs
        for (TextPiece textPiece : complexFileTable.getTextPieceTable().getTextPieces()) {
            PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
            if (!prm.isComplex())
                continue;
            int igrpprl = prm.getIgrpprl();
            if (igrpprl < 0 || igrpprl >= sprmBuffers.length) {
                logger.log(POILogger.WARN, textPiece + "'s PRM references to unknown grpprl");
                continue;
            }
            boolean hasChp = false;
            SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
            for (SprmIterator iterator = sprmBuffer.iterator(); iterator.hasNext(); ) {
                SprmOperation sprmOperation = iterator.next();
                if (sprmOperation.getType() == SprmOperation.TYPE_CHP) {
                    hasChp = true;
                    break;
                }
            }
            if (hasChp) {
                SprmBuffer newSprmBuffer = sprmBuffer.clone();
                CHPX chpx = new CHPX(textPiece.getStart(), textPiece.getEnd(), newSprmBuffer);
                _textRuns.add(chpx);
            }
        }
        logger.log(POILogger.DEBUG, "Merged with CHPX from complex file table in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(_textRuns.size()), " elements in total)");
        start = System.currentTimeMillis();
    }
    List<CHPX> oldChpxSortedByStartPos = new ArrayList<CHPX>(_textRuns);
    Collections.sort(oldChpxSortedByStartPos, PropertyNode.StartComparator.instance);
    logger.log(POILogger.DEBUG, "CHPX sorted by start position in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
    start = System.currentTimeMillis();
    final Map<CHPX, Integer> chpxToFileOrder = new IdentityHashMap<CHPX, Integer>();
    {
        int counter = 0;
        for (CHPX chpx : _textRuns) {
            chpxToFileOrder.put(chpx, Integer.valueOf(counter++));
        }
    }
    final Comparator<CHPX> chpxFileOrderComparator = new Comparator<CHPX>() {

        public int compare(CHPX o1, CHPX o2) {
            Integer i1 = chpxToFileOrder.get(o1);
            Integer i2 = chpxToFileOrder.get(o2);
            return i1.compareTo(i2);
        }
    };
    logger.log(POILogger.DEBUG, "CHPX's order map created in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
    start = System.currentTimeMillis();
    List<Integer> textRunsBoundariesList;
    {
        Set<Integer> textRunsBoundariesSet = new HashSet<Integer>();
        for (CHPX chpx : _textRuns) {
            textRunsBoundariesSet.add(Integer.valueOf(chpx.getStart()));
            textRunsBoundariesSet.add(Integer.valueOf(chpx.getEnd()));
        }
        textRunsBoundariesSet.remove(Integer.valueOf(0));
        textRunsBoundariesList = new ArrayList<Integer>(textRunsBoundariesSet);
        Collections.sort(textRunsBoundariesList);
    }
    logger.log(POILogger.DEBUG, "Texts CHPX boundaries collected in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
    start = System.currentTimeMillis();
    List<CHPX> newChpxs = new LinkedList<CHPX>();
    int lastTextRunStart = 0;
    for (Integer objBoundary : textRunsBoundariesList) {
        final int boundary = objBoundary.intValue();
        final int startInclusive = lastTextRunStart;
        final int endExclusive = boundary;
        lastTextRunStart = endExclusive;
        int startPosition = binarySearch(oldChpxSortedByStartPos, boundary);
        startPosition = Math.abs(startPosition);
        while (startPosition >= oldChpxSortedByStartPos.size()) startPosition--;
        while (startPosition > 0 && oldChpxSortedByStartPos.get(startPosition).getStart() >= boundary) startPosition--;
        List<CHPX> chpxs = new LinkedList<CHPX>();
        for (int c = startPosition; c < oldChpxSortedByStartPos.size(); c++) {
            CHPX chpx = oldChpxSortedByStartPos.get(c);
            if (boundary < chpx.getStart())
                break;
            int left = Math.max(startInclusive, chpx.getStart());
            int right = Math.min(endExclusive, chpx.getEnd());
            if (left < right) {
                chpxs.add(chpx);
            }
        }
        if (chpxs.size() == 0) {
            logger.log(POILogger.WARN, "Text piece [", Integer.valueOf(startInclusive), "; ", Integer.valueOf(endExclusive), ") has no CHPX. Creating new one.");
            // create it manually
            CHPX chpx = new CHPX(startInclusive, endExclusive, new SprmBuffer(0));
            newChpxs.add(chpx);
            continue;
        }
        if (chpxs.size() == 1) {
            // can we reuse existing?
            CHPX existing = chpxs.get(0);
            if (existing.getStart() == startInclusive && existing.getEnd() == endExclusive) {
                newChpxs.add(existing);
                continue;
            }
        }
        Collections.sort(chpxs, chpxFileOrderComparator);
        SprmBuffer sprmBuffer = new SprmBuffer(0);
        for (CHPX chpx : chpxs) {
            sprmBuffer.append(chpx.getGrpprl(), 0);
        }
        CHPX newChpx = new CHPX(startInclusive, endExclusive, sprmBuffer);
        newChpxs.add(newChpx);
        continue;
    }
    this._textRuns = new ArrayList<CHPX>(newChpxs);
    logger.log(POILogger.DEBUG, "CHPX rebuilded in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(_textRuns.size()), " elements)");
    start = System.currentTimeMillis();
    CHPX previous = null;
    for (Iterator<CHPX> iterator = _textRuns.iterator(); iterator.hasNext(); ) {
        CHPX current = iterator.next();
        if (previous == null) {
            previous = current;
            continue;
        }
        if (previous.getEnd() == current.getStart() && Arrays.equals(previous.getGrpprl(), current.getGrpprl())) {
            previous.setEnd(current.getEnd());
            iterator.remove();
            continue;
        }
        previous = current;
    }
    logger.log(POILogger.DEBUG, "CHPX compacted in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(_textRuns.size()), " elements)");
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) IdentityHashMap(java.util.IdentityHashMap) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) Comparator(java.util.Comparator) SprmIterator(org.apache.poi.hwpf.sprm.SprmIterator) SprmOperation(org.apache.poi.hwpf.sprm.SprmOperation) SprmBuffer(org.apache.poi.hwpf.sprm.SprmBuffer)

Example 2 with SprmOperation

use of org.apache.poi.hwpf.sprm.SprmOperation in project poi by apache.

the class PAPBinTable method rebuild.

static void rebuild(final StringBuilder docText, ComplexFileTable complexFileTable, List<PAPX> paragraphs) {
    long start = System.currentTimeMillis();
    if (complexFileTable != null) {
        SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
        // adding PAPX from fast-saved SPRMs
        for (TextPiece textPiece : complexFileTable.getTextPieceTable().getTextPieces()) {
            PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
            if (!prm.isComplex())
                continue;
            int igrpprl = prm.getIgrpprl();
            if (igrpprl < 0 || igrpprl >= sprmBuffers.length) {
                logger.log(POILogger.WARN, textPiece + "'s PRM references to unknown grpprl");
                continue;
            }
            boolean hasPap = false;
            SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
            for (SprmIterator iterator = sprmBuffer.iterator(); iterator.hasNext(); ) {
                SprmOperation sprmOperation = iterator.next();
                if (sprmOperation.getType() == SprmOperation.TYPE_PAP) {
                    hasPap = true;
                    break;
                }
            }
            if (hasPap) {
                SprmBuffer newSprmBuffer = new SprmBuffer(2);
                newSprmBuffer.append(sprmBuffer.toByteArray());
                PAPX papx = new PAPX(textPiece.getStart(), textPiece.getEnd(), newSprmBuffer);
                paragraphs.add(papx);
            }
        }
        logger.log(POILogger.DEBUG, "Merged (?) with PAPX from complex file table in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(paragraphs.size()), " elements in total)");
        start = System.currentTimeMillis();
    }
    List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>(paragraphs);
    Collections.sort(oldPapxSortedByEndPos, PropertyNode.EndComparator.instance);
    logger.log(POILogger.DEBUG, "PAPX sorted by end position in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
    start = System.currentTimeMillis();
    final Map<PAPX, Integer> papxToFileOrder = new IdentityHashMap<PAPX, Integer>();
    {
        int counter = 0;
        for (PAPX papx : paragraphs) {
            papxToFileOrder.put(papx, Integer.valueOf(counter++));
        }
    }
    final Comparator<PAPX> papxFileOrderComparator = new Comparator<PAPX>() {

        public int compare(PAPX o1, PAPX o2) {
            Integer i1 = papxToFileOrder.get(o1);
            Integer i2 = papxToFileOrder.get(o2);
            return i1.compareTo(i2);
        }
    };
    logger.log(POILogger.DEBUG, "PAPX's order map created in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
    start = System.currentTimeMillis();
    List<PAPX> newPapxs = new LinkedList<PAPX>();
    int lastParStart = 0;
    int lastPapxIndex = 0;
    for (int charIndex = 0; charIndex < docText.length(); charIndex++) {
        final char c = docText.charAt(charIndex);
        if (c != 13 && c != 7 && c != 12)
            continue;
        final int startInclusive = lastParStart;
        final int endExclusive = charIndex + 1;
        boolean broken = false;
        List<PAPX> papxs = new LinkedList<PAPX>();
        for (int papxIndex = lastPapxIndex; papxIndex < oldPapxSortedByEndPos.size(); papxIndex++) {
            broken = false;
            PAPX papx = oldPapxSortedByEndPos.get(papxIndex);
            assert startInclusive == 0 || papxIndex + 1 == oldPapxSortedByEndPos.size() || papx.getEnd() > startInclusive;
            if (papx.getEnd() - 1 > charIndex) {
                lastPapxIndex = papxIndex;
                broken = true;
                break;
            }
            papxs.add(papx);
        }
        if (!broken) {
            lastPapxIndex = oldPapxSortedByEndPos.size() - 1;
        }
        if (papxs.size() == 0) {
            logger.log(POILogger.WARN, "Paragraph [", Integer.valueOf(startInclusive), "; ", Integer.valueOf(endExclusive), ") has no PAPX. Creating new one.");
            // create it manually
            PAPX papx = new PAPX(startInclusive, endExclusive, new SprmBuffer(2));
            newPapxs.add(papx);
            lastParStart = endExclusive;
            continue;
        }
        if (papxs.size() == 1) {
            // can we reuse existing?
            PAPX existing = papxs.get(0);
            if (existing.getStart() == startInclusive && existing.getEnd() == endExclusive) {
                newPapxs.add(existing);
                lastParStart = endExclusive;
                continue;
            }
        }
        // restore file order of PAPX
        Collections.sort(papxs, papxFileOrderComparator);
        SprmBuffer sprmBuffer = null;
        for (PAPX papx : papxs) {
            if (papx.getGrpprl() == null || papx.getGrpprl().length <= 2)
                continue;
            if (sprmBuffer == null) {
                sprmBuffer = papx.getSprmBuf().clone();
            } else {
                sprmBuffer.append(papx.getGrpprl(), 2);
            }
        }
        PAPX newPapx = new PAPX(startInclusive, endExclusive, sprmBuffer);
        newPapxs.add(newPapx);
        lastParStart = endExclusive;
        continue;
    }
    paragraphs.clear();
    paragraphs.addAll(newPapxs);
    logger.log(POILogger.DEBUG, "PAPX rebuilded from document text in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(paragraphs.size()), " elements)");
    start = System.currentTimeMillis();
}
Also used : IdentityHashMap(java.util.IdentityHashMap) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) Comparator(java.util.Comparator) SprmIterator(org.apache.poi.hwpf.sprm.SprmIterator) SprmOperation(org.apache.poi.hwpf.sprm.SprmOperation) SprmBuffer(org.apache.poi.hwpf.sprm.SprmBuffer)

Example 3 with SprmOperation

use of org.apache.poi.hwpf.sprm.SprmOperation in project poi by apache.

the class PAPX method findHuge.

private SprmBuffer findHuge(SprmBuffer buf, byte[] datastream) {
    byte[] grpprl = buf.toByteArray();
    if (// then check for sprmPHugePapx
    grpprl.length == 8 && datastream != null) {
        SprmOperation sprm = new SprmOperation(grpprl, 2);
        if ((sprm.getOperation() == 0x45 || sprm.getOperation() == 0x46) && sprm.getSizeCode() == 3) {
            int hugeGrpprlOffset = sprm.getOperand();
            if (hugeGrpprlOffset + 1 < datastream.length) {
                int grpprlSize = LittleEndian.getShort(datastream, hugeGrpprlOffset);
                if (hugeGrpprlOffset + grpprlSize < datastream.length) {
                    byte[] hugeGrpprl = new byte[grpprlSize + 2];
                    // copy original istd into huge Grpprl
                    hugeGrpprl[0] = grpprl[0];
                    hugeGrpprl[1] = grpprl[1];
                    // copy Grpprl from dataStream
                    System.arraycopy(datastream, hugeGrpprlOffset + 2, hugeGrpprl, 2, grpprlSize);
                    return new SprmBuffer(hugeGrpprl, 2);
                }
            }
        }
    }
    return null;
}
Also used : SprmOperation(org.apache.poi.hwpf.sprm.SprmOperation) SprmBuffer(org.apache.poi.hwpf.sprm.SprmBuffer)

Example 4 with SprmOperation

use of org.apache.poi.hwpf.sprm.SprmOperation in project poi by apache.

the class HWPFLister method dumpChpx.

public void dumpChpx(boolean withProperties, boolean withSprms) {
    for (CHPX chpx : _doc.getCharacterTable().getTextRuns()) {
        System.out.println(chpx);
        if (withProperties) {
            System.out.println(chpx.getCharacterProperties(_doc.getStyleSheet(), (short) StyleSheet.NIL_STYLE));
        }
        if (withSprms) {
            SprmIterator sprmIt = new SprmIterator(chpx.getGrpprl(), 0);
            while (sprmIt.hasNext()) {
                SprmOperation sprm = sprmIt.next();
                System.out.println("\t" + sprm);
            }
        }
        String text = new Range(chpx.getStart(), chpx.getEnd(), _doc.getOverallRange()) {

            public String toString() {
                return "CHPX range (" + super.toString() + ")";
            }
        }.text();
        StringBuilder stringBuilder = new StringBuilder();
        for (char c : text.toCharArray()) {
            if (c < 30)
                stringBuilder.append("\\0x").append(Integer.toHexString(c));
            else
                stringBuilder.append(c);
        }
        System.out.println(stringBuilder);
    }
}
Also used : CHPX(org.apache.poi.hwpf.model.CHPX) SprmIterator(org.apache.poi.hwpf.sprm.SprmIterator) SprmOperation(org.apache.poi.hwpf.sprm.SprmOperation) Range(org.apache.poi.hwpf.usermodel.Range)

Example 5 with SprmOperation

use of org.apache.poi.hwpf.sprm.SprmOperation in project poi by apache.

the class HWPFLister method dumpSprms.

protected void dumpSprms(SprmIterator sprmIt, String linePrefix) {
    while (sprmIt.hasNext()) {
        SprmOperation sprm = sprmIt.next();
        System.out.println(linePrefix + sprm);
    }
}
Also used : SprmOperation(org.apache.poi.hwpf.sprm.SprmOperation)

Aggregations

SprmOperation (org.apache.poi.hwpf.sprm.SprmOperation)5 SprmBuffer (org.apache.poi.hwpf.sprm.SprmBuffer)3 SprmIterator (org.apache.poi.hwpf.sprm.SprmIterator)3 ArrayList (java.util.ArrayList)2 Comparator (java.util.Comparator)2 IdentityHashMap (java.util.IdentityHashMap)2 LinkedList (java.util.LinkedList)2 HashSet (java.util.HashSet)1 Set (java.util.Set)1 CHPX (org.apache.poi.hwpf.model.CHPX)1 Range (org.apache.poi.hwpf.usermodel.Range)1