Search in sources :

Example 1 with SprmIterator

use of org.apache.poi.hwpf.sprm.SprmIterator in project poi by apache.

the class HWPFLister method dumpPapx.

public void dumpPapx(boolean withProperties, boolean withSprms) throws Exception {
    if (_doc instanceof HWPFDocument) {
        System.out.println("binary PAP pages ");
        HWPFDocument doc = (HWPFDocument) _doc;
        byte[] mainStream = _doc.getMainStream();
        PlexOfCps binTable = new PlexOfCps(doc.getTableStream(), doc.getFileInformationBlock().getFcPlcfbtePapx(), doc.getFileInformationBlock().getLcbPlcfbtePapx(), 4);
        List<PAPX> papxs = new ArrayList<PAPX>();
        int length = binTable.length();
        for (int x = 0; x < length; x++) {
            GenericPropertyNode node = binTable.getProperty(x);
            int pageNum = LittleEndian.getInt(node.getBytes());
            int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
            PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(mainStream, doc.getDataStream(), pageOffset, doc.getTextTable());
            System.out.println("* PFKP: " + pfkp);
            for (PAPX papx : pfkp.getPAPXs()) {
                System.out.println("** " + papx);
                papxs.add(papx);
                if (papx != null && withSprms) {
                    SprmIterator sprmIt = new SprmIterator(papx.getGrpprl(), 2);
                    dumpSprms(sprmIt, "*** ");
                }
            }
        }
        Collections.sort(papxs);
        System.out.println("* Sorted by END");
        for (PAPX papx : papxs) {
            System.out.println("** " + papx);
            if (papx != null && withSprms) {
                SprmIterator sprmIt = new SprmIterator(papx.getGrpprl(), 2);
                dumpSprms(sprmIt, "*** ");
            }
        }
    }
    for (PAPX papx : _doc.getParagraphTable().getParagraphs()) {
        System.out.println(papx);
        if (withProperties) {
            Paragraph paragraph = Paragraph.newParagraph(_doc.getOverallRange(), papx);
            System.out.println(paragraph.getProps());
        }
        SprmIterator sprmIt = new SprmIterator(papx.getGrpprl(), 2);
        dumpSprms(sprmIt, "\t");
    }
}
Also used : HWPFDocument(org.apache.poi.hwpf.HWPFDocument) SprmIterator(org.apache.poi.hwpf.sprm.SprmIterator) PlexOfCps(org.apache.poi.hwpf.model.PlexOfCps) ArrayList(java.util.ArrayList) PAPFormattedDiskPage(org.apache.poi.hwpf.model.PAPFormattedDiskPage) PAPX(org.apache.poi.hwpf.model.PAPX) GenericPropertyNode(org.apache.poi.hwpf.model.GenericPropertyNode) Paragraph(org.apache.poi.hwpf.usermodel.Paragraph)

Example 2 with SprmIterator

use of org.apache.poi.hwpf.sprm.SprmIterator in project poi by apache.

the class HWPFLister method dumpParagraphLevels.

protected void dumpParagraphLevels(ListTables listTables, ParagraphProperties paragraph) {
    if (paragraph.getIlfo() != 0) {
        final LFO lfo = listTables.getLfo(paragraph.getIlfo());
        System.out.println("PAP's LFO: " + lfo);
        final LFOData lfoData = listTables.getLfoData(paragraph.getIlfo());
        System.out.println("PAP's LFOData: " + lfoData);
        if (lfo != null) {
            final ListLevel listLevel = listTables.getLevel(lfo.getLsid(), paragraph.getIlvl());
            System.out.println("PAP's ListLevel: " + listLevel);
            if (listLevel.getGrpprlPapx() != null) {
                System.out.println("PAP's ListLevel PAPX:");
                dumpSprms(new SprmIterator(listLevel.getGrpprlPapx(), 0), "* ");
            }
            if (listLevel.getGrpprlPapx() != null) {
                System.out.println("PAP's ListLevel CHPX:");
                dumpSprms(new SprmIterator(listLevel.getGrpprlChpx(), 0), "* ");
            }
        }
    }
}
Also used : SprmIterator(org.apache.poi.hwpf.sprm.SprmIterator) LFO(org.apache.poi.hwpf.model.LFO) LFOData(org.apache.poi.hwpf.model.LFOData) ListLevel(org.apache.poi.hwpf.model.ListLevel)

Example 3 with SprmIterator

use of org.apache.poi.hwpf.sprm.SprmIterator in project poi by apache.

the class CHPBinTable method rebuild.

public void rebuild(ComplexFileTable complexFileTable) {
    long start = System.currentTimeMillis();
    if (complexFileTable != null) {
        SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
        // adding CHPX from fast-saved SPRMs
        for (TextPiece textPiece : complexFileTable.getTextPieceTable().getTextPieces()) {
            PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
            if (!prm.isComplex())
                continue;
            int igrpprl = prm.getIgrpprl();
            if (igrpprl < 0 || igrpprl >= sprmBuffers.length) {
                logger.log(POILogger.WARN, textPiece + "'s PRM references to unknown grpprl");
                continue;
            }
            boolean hasChp = false;
            SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
            for (SprmIterator iterator = sprmBuffer.iterator(); iterator.hasNext(); ) {
                SprmOperation sprmOperation = iterator.next();
                if (sprmOperation.getType() == SprmOperation.TYPE_CHP) {
                    hasChp = true;
                    break;
                }
            }
            if (hasChp) {
                SprmBuffer newSprmBuffer = sprmBuffer.clone();
                CHPX chpx = new CHPX(textPiece.getStart(), textPiece.getEnd(), newSprmBuffer);
                _textRuns.add(chpx);
            }
        }
        logger.log(POILogger.DEBUG, "Merged with CHPX from complex file table in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(_textRuns.size()), " elements in total)");
        start = System.currentTimeMillis();
    }
    List<CHPX> oldChpxSortedByStartPos = new ArrayList<CHPX>(_textRuns);
    Collections.sort(oldChpxSortedByStartPos, PropertyNode.StartComparator.instance);
    logger.log(POILogger.DEBUG, "CHPX sorted by start position in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
    start = System.currentTimeMillis();
    final Map<CHPX, Integer> chpxToFileOrder = new IdentityHashMap<CHPX, Integer>();
    {
        int counter = 0;
        for (CHPX chpx : _textRuns) {
            chpxToFileOrder.put(chpx, Integer.valueOf(counter++));
        }
    }
    final Comparator<CHPX> chpxFileOrderComparator = new Comparator<CHPX>() {

        public int compare(CHPX o1, CHPX o2) {
            Integer i1 = chpxToFileOrder.get(o1);
            Integer i2 = chpxToFileOrder.get(o2);
            return i1.compareTo(i2);
        }
    };
    logger.log(POILogger.DEBUG, "CHPX's order map created in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
    start = System.currentTimeMillis();
    List<Integer> textRunsBoundariesList;
    {
        Set<Integer> textRunsBoundariesSet = new HashSet<Integer>();
        for (CHPX chpx : _textRuns) {
            textRunsBoundariesSet.add(Integer.valueOf(chpx.getStart()));
            textRunsBoundariesSet.add(Integer.valueOf(chpx.getEnd()));
        }
        textRunsBoundariesSet.remove(Integer.valueOf(0));
        textRunsBoundariesList = new ArrayList<Integer>(textRunsBoundariesSet);
        Collections.sort(textRunsBoundariesList);
    }
    logger.log(POILogger.DEBUG, "Texts CHPX boundaries collected in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
    start = System.currentTimeMillis();
    List<CHPX> newChpxs = new LinkedList<CHPX>();
    int lastTextRunStart = 0;
    for (Integer objBoundary : textRunsBoundariesList) {
        final int boundary = objBoundary.intValue();
        final int startInclusive = lastTextRunStart;
        final int endExclusive = boundary;
        lastTextRunStart = endExclusive;
        int startPosition = binarySearch(oldChpxSortedByStartPos, boundary);
        startPosition = Math.abs(startPosition);
        while (startPosition >= oldChpxSortedByStartPos.size()) startPosition--;
        while (startPosition > 0 && oldChpxSortedByStartPos.get(startPosition).getStart() >= boundary) startPosition--;
        List<CHPX> chpxs = new LinkedList<CHPX>();
        for (int c = startPosition; c < oldChpxSortedByStartPos.size(); c++) {
            CHPX chpx = oldChpxSortedByStartPos.get(c);
            if (boundary < chpx.getStart())
                break;
            int left = Math.max(startInclusive, chpx.getStart());
            int right = Math.min(endExclusive, chpx.getEnd());
            if (left < right) {
                chpxs.add(chpx);
            }
        }
        if (chpxs.size() == 0) {
            logger.log(POILogger.WARN, "Text piece [", Integer.valueOf(startInclusive), "; ", Integer.valueOf(endExclusive), ") has no CHPX. Creating new one.");
            // create it manually
            CHPX chpx = new CHPX(startInclusive, endExclusive, new SprmBuffer(0));
            newChpxs.add(chpx);
            continue;
        }
        if (chpxs.size() == 1) {
            // can we reuse existing?
            CHPX existing = chpxs.get(0);
            if (existing.getStart() == startInclusive && existing.getEnd() == endExclusive) {
                newChpxs.add(existing);
                continue;
            }
        }
        Collections.sort(chpxs, chpxFileOrderComparator);
        SprmBuffer sprmBuffer = new SprmBuffer(0);
        for (CHPX chpx : chpxs) {
            sprmBuffer.append(chpx.getGrpprl(), 0);
        }
        CHPX newChpx = new CHPX(startInclusive, endExclusive, sprmBuffer);
        newChpxs.add(newChpx);
        continue;
    }
    this._textRuns = new ArrayList<CHPX>(newChpxs);
    logger.log(POILogger.DEBUG, "CHPX rebuilded in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(_textRuns.size()), " elements)");
    start = System.currentTimeMillis();
    CHPX previous = null;
    for (Iterator<CHPX> iterator = _textRuns.iterator(); iterator.hasNext(); ) {
        CHPX current = iterator.next();
        if (previous == null) {
            previous = current;
            continue;
        }
        if (previous.getEnd() == current.getStart() && Arrays.equals(previous.getGrpprl(), current.getGrpprl())) {
            previous.setEnd(current.getEnd());
            iterator.remove();
            continue;
        }
        previous = current;
    }
    logger.log(POILogger.DEBUG, "CHPX compacted in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(_textRuns.size()), " elements)");
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) IdentityHashMap(java.util.IdentityHashMap) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) Comparator(java.util.Comparator) SprmIterator(org.apache.poi.hwpf.sprm.SprmIterator) SprmOperation(org.apache.poi.hwpf.sprm.SprmOperation) SprmBuffer(org.apache.poi.hwpf.sprm.SprmBuffer)

Example 4 with SprmIterator

use of org.apache.poi.hwpf.sprm.SprmIterator in project poi by apache.

the class PAPBinTable method rebuild.

static void rebuild(final StringBuilder docText, ComplexFileTable complexFileTable, List<PAPX> paragraphs) {
    long start = System.currentTimeMillis();
    if (complexFileTable != null) {
        SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
        // adding PAPX from fast-saved SPRMs
        for (TextPiece textPiece : complexFileTable.getTextPieceTable().getTextPieces()) {
            PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
            if (!prm.isComplex())
                continue;
            int igrpprl = prm.getIgrpprl();
            if (igrpprl < 0 || igrpprl >= sprmBuffers.length) {
                logger.log(POILogger.WARN, textPiece + "'s PRM references to unknown grpprl");
                continue;
            }
            boolean hasPap = false;
            SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
            for (SprmIterator iterator = sprmBuffer.iterator(); iterator.hasNext(); ) {
                SprmOperation sprmOperation = iterator.next();
                if (sprmOperation.getType() == SprmOperation.TYPE_PAP) {
                    hasPap = true;
                    break;
                }
            }
            if (hasPap) {
                SprmBuffer newSprmBuffer = new SprmBuffer(2);
                newSprmBuffer.append(sprmBuffer.toByteArray());
                PAPX papx = new PAPX(textPiece.getStart(), textPiece.getEnd(), newSprmBuffer);
                paragraphs.add(papx);
            }
        }
        logger.log(POILogger.DEBUG, "Merged (?) with PAPX from complex file table in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(paragraphs.size()), " elements in total)");
        start = System.currentTimeMillis();
    }
    List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>(paragraphs);
    Collections.sort(oldPapxSortedByEndPos, PropertyNode.EndComparator.instance);
    logger.log(POILogger.DEBUG, "PAPX sorted by end position in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
    start = System.currentTimeMillis();
    final Map<PAPX, Integer> papxToFileOrder = new IdentityHashMap<PAPX, Integer>();
    {
        int counter = 0;
        for (PAPX papx : paragraphs) {
            papxToFileOrder.put(papx, Integer.valueOf(counter++));
        }
    }
    final Comparator<PAPX> papxFileOrderComparator = new Comparator<PAPX>() {

        public int compare(PAPX o1, PAPX o2) {
            Integer i1 = papxToFileOrder.get(o1);
            Integer i2 = papxToFileOrder.get(o2);
            return i1.compareTo(i2);
        }
    };
    logger.log(POILogger.DEBUG, "PAPX's order map created in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
    start = System.currentTimeMillis();
    List<PAPX> newPapxs = new LinkedList<PAPX>();
    int lastParStart = 0;
    int lastPapxIndex = 0;
    for (int charIndex = 0; charIndex < docText.length(); charIndex++) {
        final char c = docText.charAt(charIndex);
        if (c != 13 && c != 7 && c != 12)
            continue;
        final int startInclusive = lastParStart;
        final int endExclusive = charIndex + 1;
        boolean broken = false;
        List<PAPX> papxs = new LinkedList<PAPX>();
        for (int papxIndex = lastPapxIndex; papxIndex < oldPapxSortedByEndPos.size(); papxIndex++) {
            broken = false;
            PAPX papx = oldPapxSortedByEndPos.get(papxIndex);
            assert startInclusive == 0 || papxIndex + 1 == oldPapxSortedByEndPos.size() || papx.getEnd() > startInclusive;
            if (papx.getEnd() - 1 > charIndex) {
                lastPapxIndex = papxIndex;
                broken = true;
                break;
            }
            papxs.add(papx);
        }
        if (!broken) {
            lastPapxIndex = oldPapxSortedByEndPos.size() - 1;
        }
        if (papxs.size() == 0) {
            logger.log(POILogger.WARN, "Paragraph [", Integer.valueOf(startInclusive), "; ", Integer.valueOf(endExclusive), ") has no PAPX. Creating new one.");
            // create it manually
            PAPX papx = new PAPX(startInclusive, endExclusive, new SprmBuffer(2));
            newPapxs.add(papx);
            lastParStart = endExclusive;
            continue;
        }
        if (papxs.size() == 1) {
            // can we reuse existing?
            PAPX existing = papxs.get(0);
            if (existing.getStart() == startInclusive && existing.getEnd() == endExclusive) {
                newPapxs.add(existing);
                lastParStart = endExclusive;
                continue;
            }
        }
        // restore file order of PAPX
        Collections.sort(papxs, papxFileOrderComparator);
        SprmBuffer sprmBuffer = null;
        for (PAPX papx : papxs) {
            if (papx.getGrpprl() == null || papx.getGrpprl().length <= 2)
                continue;
            if (sprmBuffer == null) {
                sprmBuffer = papx.getSprmBuf().clone();
            } else {
                sprmBuffer.append(papx.getGrpprl(), 2);
            }
        }
        PAPX newPapx = new PAPX(startInclusive, endExclusive, sprmBuffer);
        newPapxs.add(newPapx);
        lastParStart = endExclusive;
        continue;
    }
    paragraphs.clear();
    paragraphs.addAll(newPapxs);
    logger.log(POILogger.DEBUG, "PAPX rebuilded from document text in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(paragraphs.size()), " elements)");
    start = System.currentTimeMillis();
}
Also used : IdentityHashMap(java.util.IdentityHashMap) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) Comparator(java.util.Comparator) SprmIterator(org.apache.poi.hwpf.sprm.SprmIterator) SprmOperation(org.apache.poi.hwpf.sprm.SprmOperation) SprmBuffer(org.apache.poi.hwpf.sprm.SprmBuffer)

Example 5 with SprmIterator

use of org.apache.poi.hwpf.sprm.SprmIterator in project poi by apache.

the class HWPFLister method dumpStyles.

private void dumpStyles() {
    if (_doc instanceof HWPFOldDocument) {
        System.out.println("Word 95 not supported so far");
        return;
    }
    HWPFDocument hwpfDocument = (HWPFDocument) _doc;
    for (int s = 0; s < hwpfDocument.getStyleSheet().numStyles(); s++) {
        StyleDescription styleDescription = hwpfDocument.getStyleSheet().getStyleDescription(s);
        if (styleDescription == null)
            continue;
        System.out.println("=== Style #" + s + " '" + styleDescription.getName() + "' ===");
        System.out.println(styleDescription);
        if (styleDescription.getPAPX() != null)
            dumpSprms(new SprmIterator(styleDescription.getPAPX(), 2), "Style's PAP SPRM: ");
        if (styleDescription.getCHPX() != null)
            dumpSprms(new SprmIterator(styleDescription.getCHPX(), 0), "Style's CHP SPRM: ");
    }
}
Also used : HWPFDocument(org.apache.poi.hwpf.HWPFDocument) HWPFOldDocument(org.apache.poi.hwpf.HWPFOldDocument) SprmIterator(org.apache.poi.hwpf.sprm.SprmIterator) StyleDescription(org.apache.poi.hwpf.model.StyleDescription)

Aggregations

SprmIterator (org.apache.poi.hwpf.sprm.SprmIterator)7 ArrayList (java.util.ArrayList)3 SprmOperation (org.apache.poi.hwpf.sprm.SprmOperation)3 Comparator (java.util.Comparator)2 IdentityHashMap (java.util.IdentityHashMap)2 LinkedList (java.util.LinkedList)2 HWPFDocument (org.apache.poi.hwpf.HWPFDocument)2 PAPX (org.apache.poi.hwpf.model.PAPX)2 SprmBuffer (org.apache.poi.hwpf.sprm.SprmBuffer)2 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 Set (java.util.Set)1 HWPFOldDocument (org.apache.poi.hwpf.HWPFOldDocument)1 CHPX (org.apache.poi.hwpf.model.CHPX)1 GenericPropertyNode (org.apache.poi.hwpf.model.GenericPropertyNode)1 LFO (org.apache.poi.hwpf.model.LFO)1 LFOData (org.apache.poi.hwpf.model.LFOData)1 ListLevel (org.apache.poi.hwpf.model.ListLevel)1 PAPFormattedDiskPage (org.apache.poi.hwpf.model.PAPFormattedDiskPage)1