use of org.apache.poi.hslf.record.TextBytesAtom in project poi by apache.
the class QuickButCruddyTextExtractor method findTextRecords.
/**
* For the given position, look if the record is a text record, and wind
* on after.
* If it is a text record, grabs out the text. Whatever happens, returns
* the position of the next record, or -1 if no more.
*/
public int findTextRecords(int startPos, List<String> textV) {
// Grab the length, and the first option byte
// Note that the length doesn't include the 8 byte atom header
int len = (int) LittleEndian.getUInt(pptContents, startPos + 4);
byte opt = pptContents[startPos];
// If it's a container, step into it and return
// (If it's a container, option byte 1 BINARY_AND 0x0f will be 0x0f)
int container = opt & 0x0f;
if (container == 0x0f) {
return (startPos + 8);
}
// Otherwise, check the type to see if it's text
int type = LittleEndian.getUShort(pptContents, startPos + 2);
// TextBytesAtom
if (type == RecordTypes.TextBytesAtom.typeID) {
TextBytesAtom tba = (TextBytesAtom) Record.createRecordForType(type, pptContents, startPos, len + 8);
String text = HSLFTextParagraph.toExternalString(tba.getText(), -1);
textV.add(text);
}
// TextCharsAtom
if (type == RecordTypes.TextCharsAtom.typeID) {
TextCharsAtom tca = (TextCharsAtom) Record.createRecordForType(type, pptContents, startPos, len + 8);
String text = HSLFTextParagraph.toExternalString(tca.getText(), -1);
textV.add(text);
}
// CString (doesn't go via a TextRun)
if (type == RecordTypes.CString.typeID) {
CString cs = (CString) Record.createRecordForType(type, pptContents, startPos, len + 8);
String text = cs.getText();
// Ignore the ones we know to be rubbish
if (text.equals("___PPT10")) {
} else if (text.equals("Default Design")) {
} else {
textV.add(text);
}
}
// Wind on by the atom length, and check we're not at the end
int newPos = (startPos + 8 + len);
if (newPos > (pptContents.length - 8)) {
newPos = -1;
}
return newPos;
}
use of org.apache.poi.hslf.record.TextBytesAtom in project poi by apache.
the class SlideShowRecordDumper method printEscherTextBox.
private void printEscherTextBox(EscherTextboxRecord tbRecord, int indent) {
String ind = tabs.substring(0, indent);
ps.println(ind + "EscherTextboxRecord:");
EscherTextboxWrapper etw = new EscherTextboxWrapper(tbRecord);
Record prevChild = null;
for (Record child : etw.getChildRecords()) {
if (child instanceof StyleTextPropAtom) {
// need preceding Text[Chars|Bytes]Atom to initialize the data structure
String text = null;
if (prevChild instanceof TextCharsAtom) {
text = ((TextCharsAtom) prevChild).getText();
} else if (prevChild instanceof TextBytesAtom) {
text = ((TextBytesAtom) prevChild).getText();
} else {
ps.println(ind + "Error! Couldn't find preceding TextAtom for style");
continue;
}
StyleTextPropAtom tsp = (StyleTextPropAtom) child;
tsp.setParentTextSize(text.length());
}
ps.println(ind + child);
prevChild = child;
}
}
use of org.apache.poi.hslf.record.TextBytesAtom in project poi by apache.
the class HSLFTextParagraph method findTextParagraphs.
/**
* Scans through the supplied record array, looking for
* a TextHeaderAtom followed by one of a TextBytesAtom or
* a TextCharsAtom. Builds up TextRuns from these
*
* @param records the records to build from
*/
protected static List<List<HSLFTextParagraph>> findTextParagraphs(Record[] records) {
List<List<HSLFTextParagraph>> paragraphCollection = new ArrayList<List<HSLFTextParagraph>>();
int[] recordIdx = { 0 };
for (int slwtIndex = 0; recordIdx[0] < records.length; slwtIndex++) {
TextHeaderAtom header = null;
TextBytesAtom tbytes = null;
TextCharsAtom tchars = null;
TextRulerAtom ruler = null;
MasterTextPropAtom indents = null;
for (Record r : getRecords(records, recordIdx, null)) {
long rt = r.getRecordType();
if (RecordTypes.TextHeaderAtom.typeID == rt) {
header = (TextHeaderAtom) r;
} else if (RecordTypes.TextBytesAtom.typeID == rt) {
tbytes = (TextBytesAtom) r;
} else if (RecordTypes.TextCharsAtom.typeID == rt) {
tchars = (TextCharsAtom) r;
} else if (RecordTypes.TextRulerAtom.typeID == rt) {
ruler = (TextRulerAtom) r;
} else if (RecordTypes.MasterTextPropAtom.typeID == rt) {
indents = (MasterTextPropAtom) r;
}
// don't search for RecordTypes.StyleTextPropAtom.typeID here ... see findStyleAtomPresent below
}
if (header == null) {
break;
}
if (header.getParentRecord() instanceof SlideListWithText) {
// runs found in PPDrawing are not linked with SlideListWithTexts
header.setIndex(slwtIndex);
}
if (tbytes == null && tchars == null) {
tbytes = new TextBytesAtom();
// don't add record yet - set it in storeText
logger.log(POILogger.INFO, "bytes nor chars atom doesn't exist. Creating dummy record for later saving.");
}
String rawText = (tchars != null) ? tchars.getText() : tbytes.getText();
StyleTextPropAtom styles = findStyleAtomPresent(header, rawText.length());
List<HSLFTextParagraph> paragraphs = new ArrayList<HSLFTextParagraph>();
paragraphCollection.add(paragraphs);
// split, but keep delimiter
for (String para : rawText.split("(?<=\r)")) {
HSLFTextParagraph tpara = new HSLFTextParagraph(header, tbytes, tchars, paragraphs);
paragraphs.add(tpara);
tpara._ruler = ruler;
tpara.getParagraphStyle().updateTextSize(para.length());
HSLFTextRun trun = new HSLFTextRun(tpara);
tpara.addTextRun(trun);
trun.setText(para);
}
applyCharacterStyles(paragraphs, styles.getCharacterStyles());
applyParagraphStyles(paragraphs, styles.getParagraphStyles());
if (indents != null) {
applyParagraphIndents(paragraphs, indents.getIndents());
}
}
if (paragraphCollection.isEmpty()) {
logger.log(POILogger.DEBUG, "No text records found.");
}
return paragraphCollection;
}
use of org.apache.poi.hslf.record.TextBytesAtom in project poi by apache.
the class HSLFTextParagraph method updateTextAtom.
/**
* Set the correct text atom depending on the multibyte usage
*/
private static void updateTextAtom(List<HSLFTextParagraph> paragraphs) {
final String rawText = toInternalString(getRawText(paragraphs));
// Will it fit in a 8 bit atom?
boolean isUnicode = StringUtil.hasMultibyte(rawText);
// isUnicode = true;
TextHeaderAtom headerAtom = paragraphs.get(0)._headerAtom;
TextBytesAtom byteAtom = paragraphs.get(0)._byteAtom;
TextCharsAtom charAtom = paragraphs.get(0)._charAtom;
StyleTextPropAtom styleAtom = findStyleAtomPresent(headerAtom, rawText.length());
// Store in the appropriate record
Record oldRecord = null, newRecord = null;
if (isUnicode) {
if (byteAtom != null || charAtom == null) {
oldRecord = byteAtom;
charAtom = new TextCharsAtom();
}
newRecord = charAtom;
charAtom.setText(rawText);
} else {
if (charAtom != null || byteAtom == null) {
oldRecord = charAtom;
byteAtom = new TextBytesAtom();
}
newRecord = byteAtom;
byte[] byteText = new byte[rawText.length()];
StringUtil.putCompressedUnicode(rawText, byteText, 0);
byteAtom.setText(byteText);
}
assert (newRecord != null);
RecordContainer _txtbox = headerAtom.getParentRecord();
Record[] cr = _txtbox.getChildRecords();
int /* headerIdx = -1, */
textIdx = -1, styleIdx = -1;
for (int i = 0; i < cr.length; i++) {
Record r = cr[i];
if (r == headerAtom) {
// headerIdx = i;
} else if (r == oldRecord || r == newRecord) {
textIdx = i;
} else if (r == styleAtom) {
styleIdx = i;
}
}
if (textIdx == -1) {
// the old record was never registered, ignore it
_txtbox.addChildAfter(newRecord, headerAtom);
// textIdx = headerIdx + 1;
} else {
// swap not appropriated records - noop if unchanged
cr[textIdx] = newRecord;
}
if (styleIdx == -1) {
// Add the new StyleTextPropAtom after the TextCharsAtom / TextBytesAtom
_txtbox.addChildAfter(styleAtom, newRecord);
}
for (HSLFTextParagraph p : paragraphs) {
if (newRecord == byteAtom) {
p._byteAtom = byteAtom;
p._charAtom = null;
} else {
p._byteAtom = null;
p._charAtom = charAtom;
}
}
}
use of org.apache.poi.hslf.record.TextBytesAtom in project poi by apache.
the class TestTextRun method testAdvancedSetText.
/**
* Test to ensure that changing non rich text between bytes and
* chars works correctly
*/
@SuppressWarnings("unused")
@Test
public void testAdvancedSetText() {
HSLFSlide slideOne = ss.getSlides().get(0);
List<HSLFTextParagraph> paras = slideOne.getTextParagraphs().get(0);
HSLFTextParagraph para = paras.get(0);
TextHeaderAtom tha = null;
TextBytesAtom tba = null;
TextCharsAtom tca = null;
for (Record r : para.getRecords()) {
if (r instanceof TextHeaderAtom)
tha = (TextHeaderAtom) r;
else if (r instanceof TextBytesAtom)
tba = (TextBytesAtom) r;
else if (r instanceof TextCharsAtom)
tca = (TextCharsAtom) r;
}
// Bytes -> Bytes
assertNull(tca);
assertNotNull(tba);
// assertFalse(run._isUnicode);
assertEquals("This is a test title", para.getTextRuns().get(0).getRawText());
String changeBytesOnly = "New Test Title";
HSLFTextParagraph.setText(paras, changeBytesOnly);
para = paras.get(0);
tha = null;
tba = null;
tca = null;
for (Record r : para.getRecords()) {
if (r instanceof TextHeaderAtom)
tha = (TextHeaderAtom) r;
else if (r instanceof TextBytesAtom)
tba = (TextBytesAtom) r;
else if (r instanceof TextCharsAtom)
tca = (TextCharsAtom) r;
}
assertEquals(changeBytesOnly, HSLFTextParagraph.getRawText(paras));
assertNull(tca);
assertNotNull(tba);
// Bytes -> Chars
assertNull(tca);
assertNotNull(tba);
assertEquals(changeBytesOnly, HSLFTextParagraph.getRawText(paras));
String changeByteChar = "This is a test title with a 'ġ' g with a dot";
HSLFTextParagraph.setText(paras, changeByteChar);
para = paras.get(0);
tha = null;
tba = null;
tca = null;
for (Record r : para.getRecords()) {
if (r instanceof TextHeaderAtom)
tha = (TextHeaderAtom) r;
else if (r instanceof TextBytesAtom)
tba = (TextBytesAtom) r;
else if (r instanceof TextCharsAtom)
tca = (TextCharsAtom) r;
}
assertEquals(changeByteChar, HSLFTextParagraph.getRawText(paras));
assertNotNull(tca);
assertNull(tba);
// Chars -> Chars
assertNull(tba);
assertNotNull(tca);
assertEquals(changeByteChar, HSLFTextParagraph.getRawText(paras));
String changeCharChar = "This is a test title with a 'Ň' N with a hat";
HSLFTextParagraph.setText(paras, changeCharChar);
para = paras.get(0);
tha = null;
tba = null;
tca = null;
for (Record r : para.getRecords()) {
if (r instanceof TextHeaderAtom)
tha = (TextHeaderAtom) r;
else if (r instanceof TextBytesAtom)
tba = (TextBytesAtom) r;
else if (r instanceof TextCharsAtom)
tca = (TextCharsAtom) r;
}
assertEquals(changeCharChar, HSLFTextParagraph.getRawText(paras));
assertNotNull(tca);
assertNull(tba);
}
Aggregations