use of org.apache.poi.hwpf.sprm.SprmOperation in project poi by apache.
the class CHPBinTable method rebuild.
public void rebuild(ComplexFileTable complexFileTable) {
long start = System.currentTimeMillis();
if (complexFileTable != null) {
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
// adding CHPX from fast-saved SPRMs
for (TextPiece textPiece : complexFileTable.getTextPieceTable().getTextPieces()) {
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
if (!prm.isComplex())
continue;
int igrpprl = prm.getIgrpprl();
if (igrpprl < 0 || igrpprl >= sprmBuffers.length) {
logger.log(POILogger.WARN, textPiece + "'s PRM references to unknown grpprl");
continue;
}
boolean hasChp = false;
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
for (SprmIterator iterator = sprmBuffer.iterator(); iterator.hasNext(); ) {
SprmOperation sprmOperation = iterator.next();
if (sprmOperation.getType() == SprmOperation.TYPE_CHP) {
hasChp = true;
break;
}
}
if (hasChp) {
SprmBuffer newSprmBuffer = sprmBuffer.clone();
CHPX chpx = new CHPX(textPiece.getStart(), textPiece.getEnd(), newSprmBuffer);
_textRuns.add(chpx);
}
}
logger.log(POILogger.DEBUG, "Merged with CHPX from complex file table in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(_textRuns.size()), " elements in total)");
start = System.currentTimeMillis();
}
List<CHPX> oldChpxSortedByStartPos = new ArrayList<CHPX>(_textRuns);
Collections.sort(oldChpxSortedByStartPos, PropertyNode.StartComparator.instance);
logger.log(POILogger.DEBUG, "CHPX sorted by start position in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
start = System.currentTimeMillis();
final Map<CHPX, Integer> chpxToFileOrder = new IdentityHashMap<CHPX, Integer>();
{
int counter = 0;
for (CHPX chpx : _textRuns) {
chpxToFileOrder.put(chpx, Integer.valueOf(counter++));
}
}
final Comparator<CHPX> chpxFileOrderComparator = new Comparator<CHPX>() {
public int compare(CHPX o1, CHPX o2) {
Integer i1 = chpxToFileOrder.get(o1);
Integer i2 = chpxToFileOrder.get(o2);
return i1.compareTo(i2);
}
};
logger.log(POILogger.DEBUG, "CHPX's order map created in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
start = System.currentTimeMillis();
List<Integer> textRunsBoundariesList;
{
Set<Integer> textRunsBoundariesSet = new HashSet<Integer>();
for (CHPX chpx : _textRuns) {
textRunsBoundariesSet.add(Integer.valueOf(chpx.getStart()));
textRunsBoundariesSet.add(Integer.valueOf(chpx.getEnd()));
}
textRunsBoundariesSet.remove(Integer.valueOf(0));
textRunsBoundariesList = new ArrayList<Integer>(textRunsBoundariesSet);
Collections.sort(textRunsBoundariesList);
}
logger.log(POILogger.DEBUG, "Texts CHPX boundaries collected in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
start = System.currentTimeMillis();
List<CHPX> newChpxs = new LinkedList<CHPX>();
int lastTextRunStart = 0;
for (Integer objBoundary : textRunsBoundariesList) {
final int boundary = objBoundary.intValue();
final int startInclusive = lastTextRunStart;
final int endExclusive = boundary;
lastTextRunStart = endExclusive;
int startPosition = binarySearch(oldChpxSortedByStartPos, boundary);
startPosition = Math.abs(startPosition);
while (startPosition >= oldChpxSortedByStartPos.size()) startPosition--;
while (startPosition > 0 && oldChpxSortedByStartPos.get(startPosition).getStart() >= boundary) startPosition--;
List<CHPX> chpxs = new LinkedList<CHPX>();
for (int c = startPosition; c < oldChpxSortedByStartPos.size(); c++) {
CHPX chpx = oldChpxSortedByStartPos.get(c);
if (boundary < chpx.getStart())
break;
int left = Math.max(startInclusive, chpx.getStart());
int right = Math.min(endExclusive, chpx.getEnd());
if (left < right) {
chpxs.add(chpx);
}
}
if (chpxs.size() == 0) {
logger.log(POILogger.WARN, "Text piece [", Integer.valueOf(startInclusive), "; ", Integer.valueOf(endExclusive), ") has no CHPX. Creating new one.");
// create it manually
CHPX chpx = new CHPX(startInclusive, endExclusive, new SprmBuffer(0));
newChpxs.add(chpx);
continue;
}
if (chpxs.size() == 1) {
// can we reuse existing?
CHPX existing = chpxs.get(0);
if (existing.getStart() == startInclusive && existing.getEnd() == endExclusive) {
newChpxs.add(existing);
continue;
}
}
Collections.sort(chpxs, chpxFileOrderComparator);
SprmBuffer sprmBuffer = new SprmBuffer(0);
for (CHPX chpx : chpxs) {
sprmBuffer.append(chpx.getGrpprl(), 0);
}
CHPX newChpx = new CHPX(startInclusive, endExclusive, sprmBuffer);
newChpxs.add(newChpx);
continue;
}
this._textRuns = new ArrayList<CHPX>(newChpxs);
logger.log(POILogger.DEBUG, "CHPX rebuilded in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(_textRuns.size()), " elements)");
start = System.currentTimeMillis();
CHPX previous = null;
for (Iterator<CHPX> iterator = _textRuns.iterator(); iterator.hasNext(); ) {
CHPX current = iterator.next();
if (previous == null) {
previous = current;
continue;
}
if (previous.getEnd() == current.getStart() && Arrays.equals(previous.getGrpprl(), current.getGrpprl())) {
previous.setEnd(current.getEnd());
iterator.remove();
continue;
}
previous = current;
}
logger.log(POILogger.DEBUG, "CHPX compacted in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(_textRuns.size()), " elements)");
}
use of org.apache.poi.hwpf.sprm.SprmOperation in project poi by apache.
the class PAPBinTable method rebuild.
static void rebuild(final StringBuilder docText, ComplexFileTable complexFileTable, List<PAPX> paragraphs) {
long start = System.currentTimeMillis();
if (complexFileTable != null) {
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
// adding PAPX from fast-saved SPRMs
for (TextPiece textPiece : complexFileTable.getTextPieceTable().getTextPieces()) {
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
if (!prm.isComplex())
continue;
int igrpprl = prm.getIgrpprl();
if (igrpprl < 0 || igrpprl >= sprmBuffers.length) {
logger.log(POILogger.WARN, textPiece + "'s PRM references to unknown grpprl");
continue;
}
boolean hasPap = false;
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
for (SprmIterator iterator = sprmBuffer.iterator(); iterator.hasNext(); ) {
SprmOperation sprmOperation = iterator.next();
if (sprmOperation.getType() == SprmOperation.TYPE_PAP) {
hasPap = true;
break;
}
}
if (hasPap) {
SprmBuffer newSprmBuffer = new SprmBuffer(2);
newSprmBuffer.append(sprmBuffer.toByteArray());
PAPX papx = new PAPX(textPiece.getStart(), textPiece.getEnd(), newSprmBuffer);
paragraphs.add(papx);
}
}
logger.log(POILogger.DEBUG, "Merged (?) with PAPX from complex file table in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(paragraphs.size()), " elements in total)");
start = System.currentTimeMillis();
}
List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>(paragraphs);
Collections.sort(oldPapxSortedByEndPos, PropertyNode.EndComparator.instance);
logger.log(POILogger.DEBUG, "PAPX sorted by end position in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
start = System.currentTimeMillis();
final Map<PAPX, Integer> papxToFileOrder = new IdentityHashMap<PAPX, Integer>();
{
int counter = 0;
for (PAPX papx : paragraphs) {
papxToFileOrder.put(papx, Integer.valueOf(counter++));
}
}
final Comparator<PAPX> papxFileOrderComparator = new Comparator<PAPX>() {
public int compare(PAPX o1, PAPX o2) {
Integer i1 = papxToFileOrder.get(o1);
Integer i2 = papxToFileOrder.get(o2);
return i1.compareTo(i2);
}
};
logger.log(POILogger.DEBUG, "PAPX's order map created in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
start = System.currentTimeMillis();
List<PAPX> newPapxs = new LinkedList<PAPX>();
int lastParStart = 0;
int lastPapxIndex = 0;
for (int charIndex = 0; charIndex < docText.length(); charIndex++) {
final char c = docText.charAt(charIndex);
if (c != 13 && c != 7 && c != 12)
continue;
final int startInclusive = lastParStart;
final int endExclusive = charIndex + 1;
boolean broken = false;
List<PAPX> papxs = new LinkedList<PAPX>();
for (int papxIndex = lastPapxIndex; papxIndex < oldPapxSortedByEndPos.size(); papxIndex++) {
broken = false;
PAPX papx = oldPapxSortedByEndPos.get(papxIndex);
assert startInclusive == 0 || papxIndex + 1 == oldPapxSortedByEndPos.size() || papx.getEnd() > startInclusive;
if (papx.getEnd() - 1 > charIndex) {
lastPapxIndex = papxIndex;
broken = true;
break;
}
papxs.add(papx);
}
if (!broken) {
lastPapxIndex = oldPapxSortedByEndPos.size() - 1;
}
if (papxs.size() == 0) {
logger.log(POILogger.WARN, "Paragraph [", Integer.valueOf(startInclusive), "; ", Integer.valueOf(endExclusive), ") has no PAPX. Creating new one.");
// create it manually
PAPX papx = new PAPX(startInclusive, endExclusive, new SprmBuffer(2));
newPapxs.add(papx);
lastParStart = endExclusive;
continue;
}
if (papxs.size() == 1) {
// can we reuse existing?
PAPX existing = papxs.get(0);
if (existing.getStart() == startInclusive && existing.getEnd() == endExclusive) {
newPapxs.add(existing);
lastParStart = endExclusive;
continue;
}
}
// restore file order of PAPX
Collections.sort(papxs, papxFileOrderComparator);
SprmBuffer sprmBuffer = null;
for (PAPX papx : papxs) {
if (papx.getGrpprl() == null || papx.getGrpprl().length <= 2)
continue;
if (sprmBuffer == null) {
sprmBuffer = papx.getSprmBuf().clone();
} else {
sprmBuffer.append(papx.getGrpprl(), 2);
}
}
PAPX newPapx = new PAPX(startInclusive, endExclusive, sprmBuffer);
newPapxs.add(newPapx);
lastParStart = endExclusive;
continue;
}
paragraphs.clear();
paragraphs.addAll(newPapxs);
logger.log(POILogger.DEBUG, "PAPX rebuilded from document text in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(paragraphs.size()), " elements)");
start = System.currentTimeMillis();
}
use of org.apache.poi.hwpf.sprm.SprmOperation in project poi by apache.
the class PAPX method findHuge.
private SprmBuffer findHuge(SprmBuffer buf, byte[] datastream) {
byte[] grpprl = buf.toByteArray();
if (// then check for sprmPHugePapx
grpprl.length == 8 && datastream != null) {
SprmOperation sprm = new SprmOperation(grpprl, 2);
if ((sprm.getOperation() == 0x45 || sprm.getOperation() == 0x46) && sprm.getSizeCode() == 3) {
int hugeGrpprlOffset = sprm.getOperand();
if (hugeGrpprlOffset + 1 < datastream.length) {
int grpprlSize = LittleEndian.getShort(datastream, hugeGrpprlOffset);
if (hugeGrpprlOffset + grpprlSize < datastream.length) {
byte[] hugeGrpprl = new byte[grpprlSize + 2];
// copy original istd into huge Grpprl
hugeGrpprl[0] = grpprl[0];
hugeGrpprl[1] = grpprl[1];
// copy Grpprl from dataStream
System.arraycopy(datastream, hugeGrpprlOffset + 2, hugeGrpprl, 2, grpprlSize);
return new SprmBuffer(hugeGrpprl, 2);
}
}
}
}
return null;
}
use of org.apache.poi.hwpf.sprm.SprmOperation in project poi by apache.
the class HWPFLister method dumpChpx.
public void dumpChpx(boolean withProperties, boolean withSprms) {
for (CHPX chpx : _doc.getCharacterTable().getTextRuns()) {
System.out.println(chpx);
if (withProperties) {
System.out.println(chpx.getCharacterProperties(_doc.getStyleSheet(), (short) StyleSheet.NIL_STYLE));
}
if (withSprms) {
SprmIterator sprmIt = new SprmIterator(chpx.getGrpprl(), 0);
while (sprmIt.hasNext()) {
SprmOperation sprm = sprmIt.next();
System.out.println("\t" + sprm);
}
}
String text = new Range(chpx.getStart(), chpx.getEnd(), _doc.getOverallRange()) {
public String toString() {
return "CHPX range (" + super.toString() + ")";
}
}.text();
StringBuilder stringBuilder = new StringBuilder();
for (char c : text.toCharArray()) {
if (c < 30)
stringBuilder.append("\\0x").append(Integer.toHexString(c));
else
stringBuilder.append(c);
}
System.out.println(stringBuilder);
}
}
use of org.apache.poi.hwpf.sprm.SprmOperation in project poi by apache.
the class HWPFLister method dumpSprms.
protected void dumpSprms(SprmIterator sprmIt, String linePrefix) {
while (sprmIt.hasNext()) {
SprmOperation sprm = sprmIt.next();
System.out.println(linePrefix + sprm);
}
}
Aggregations