Search in sources :

Example 1 with TextBytesAtom

use of org.apache.poi.hslf.record.TextBytesAtom in project poi by apache.

the class QuickButCruddyTextExtractor method findTextRecords.

/**
	 * For the given position, look if the record is a text record, and wind
	 *  on after.
	 * If it is a text record, grabs out the text. Whatever happens, returns
	 *  the position of the next record, or -1 if no more.
	 */
public int findTextRecords(int startPos, List<String> textV) {
    // Grab the length, and the first option byte
    // Note that the length doesn't include the 8 byte atom header
    int len = (int) LittleEndian.getUInt(pptContents, startPos + 4);
    byte opt = pptContents[startPos];
    // If it's a container, step into it and return
    // (If it's a container, option byte 1 BINARY_AND 0x0f will be 0x0f)
    int container = opt & 0x0f;
    if (container == 0x0f) {
        return (startPos + 8);
    }
    // Otherwise, check the type to see if it's text
    int type = LittleEndian.getUShort(pptContents, startPos + 2);
    // TextBytesAtom
    if (type == RecordTypes.TextBytesAtom.typeID) {
        TextBytesAtom tba = (TextBytesAtom) Record.createRecordForType(type, pptContents, startPos, len + 8);
        String text = HSLFTextParagraph.toExternalString(tba.getText(), -1);
        textV.add(text);
    }
    // TextCharsAtom
    if (type == RecordTypes.TextCharsAtom.typeID) {
        TextCharsAtom tca = (TextCharsAtom) Record.createRecordForType(type, pptContents, startPos, len + 8);
        String text = HSLFTextParagraph.toExternalString(tca.getText(), -1);
        textV.add(text);
    }
    // CString (doesn't go via a TextRun)
    if (type == RecordTypes.CString.typeID) {
        CString cs = (CString) Record.createRecordForType(type, pptContents, startPos, len + 8);
        String text = cs.getText();
        // Ignore the ones we know to be rubbish
        if (text.equals("___PPT10")) {
        } else if (text.equals("Default Design")) {
        } else {
            textV.add(text);
        }
    }
    // Wind on by the atom length, and check we're not at the end
    int newPos = (startPos + 8 + len);
    if (newPos > (pptContents.length - 8)) {
        newPos = -1;
    }
    return newPos;
}
Also used : TextCharsAtom(org.apache.poi.hslf.record.TextCharsAtom) CString(org.apache.poi.hslf.record.CString) TextBytesAtom(org.apache.poi.hslf.record.TextBytesAtom) CString(org.apache.poi.hslf.record.CString)

Example 2 with TextBytesAtom

use of org.apache.poi.hslf.record.TextBytesAtom in project poi by apache.

the class SlideShowRecordDumper method printEscherTextBox.

private void printEscherTextBox(EscherTextboxRecord tbRecord, int indent) {
    String ind = tabs.substring(0, indent);
    ps.println(ind + "EscherTextboxRecord:");
    EscherTextboxWrapper etw = new EscherTextboxWrapper(tbRecord);
    Record prevChild = null;
    for (Record child : etw.getChildRecords()) {
        if (child instanceof StyleTextPropAtom) {
            // need preceding Text[Chars|Bytes]Atom to initialize the data structure
            String text = null;
            if (prevChild instanceof TextCharsAtom) {
                text = ((TextCharsAtom) prevChild).getText();
            } else if (prevChild instanceof TextBytesAtom) {
                text = ((TextBytesAtom) prevChild).getText();
            } else {
                ps.println(ind + "Error! Couldn't find preceding TextAtom for style");
                continue;
            }
            StyleTextPropAtom tsp = (StyleTextPropAtom) child;
            tsp.setParentTextSize(text.length());
        }
        ps.println(ind + child);
        prevChild = child;
    }
}
Also used : EscherTextboxWrapper(org.apache.poi.hslf.record.EscherTextboxWrapper) Record(org.apache.poi.hslf.record.Record) EscherContainerRecord(org.apache.poi.ddf.EscherContainerRecord) EscherTextboxRecord(org.apache.poi.ddf.EscherTextboxRecord) EscherRecord(org.apache.poi.ddf.EscherRecord) TextCharsAtom(org.apache.poi.hslf.record.TextCharsAtom) StyleTextPropAtom(org.apache.poi.hslf.record.StyleTextPropAtom) TextBytesAtom(org.apache.poi.hslf.record.TextBytesAtom)

Example 3 with TextBytesAtom

use of org.apache.poi.hslf.record.TextBytesAtom in project poi by apache.

the class HSLFTextParagraph method findTextParagraphs.

/**
     * Scans through the supplied record array, looking for
     * a TextHeaderAtom followed by one of a TextBytesAtom or
     * a TextCharsAtom. Builds up TextRuns from these
     *
     * @param records the records to build from
     */
protected static List<List<HSLFTextParagraph>> findTextParagraphs(Record[] records) {
    List<List<HSLFTextParagraph>> paragraphCollection = new ArrayList<List<HSLFTextParagraph>>();
    int[] recordIdx = { 0 };
    for (int slwtIndex = 0; recordIdx[0] < records.length; slwtIndex++) {
        TextHeaderAtom header = null;
        TextBytesAtom tbytes = null;
        TextCharsAtom tchars = null;
        TextRulerAtom ruler = null;
        MasterTextPropAtom indents = null;
        for (Record r : getRecords(records, recordIdx, null)) {
            long rt = r.getRecordType();
            if (RecordTypes.TextHeaderAtom.typeID == rt) {
                header = (TextHeaderAtom) r;
            } else if (RecordTypes.TextBytesAtom.typeID == rt) {
                tbytes = (TextBytesAtom) r;
            } else if (RecordTypes.TextCharsAtom.typeID == rt) {
                tchars = (TextCharsAtom) r;
            } else if (RecordTypes.TextRulerAtom.typeID == rt) {
                ruler = (TextRulerAtom) r;
            } else if (RecordTypes.MasterTextPropAtom.typeID == rt) {
                indents = (MasterTextPropAtom) r;
            }
        // don't search for RecordTypes.StyleTextPropAtom.typeID here ... see findStyleAtomPresent below
        }
        if (header == null) {
            break;
        }
        if (header.getParentRecord() instanceof SlideListWithText) {
            // runs found in PPDrawing are not linked with SlideListWithTexts
            header.setIndex(slwtIndex);
        }
        if (tbytes == null && tchars == null) {
            tbytes = new TextBytesAtom();
            // don't add record yet - set it in storeText
            logger.log(POILogger.INFO, "bytes nor chars atom doesn't exist. Creating dummy record for later saving.");
        }
        String rawText = (tchars != null) ? tchars.getText() : tbytes.getText();
        StyleTextPropAtom styles = findStyleAtomPresent(header, rawText.length());
        List<HSLFTextParagraph> paragraphs = new ArrayList<HSLFTextParagraph>();
        paragraphCollection.add(paragraphs);
        // split, but keep delimiter
        for (String para : rawText.split("(?<=\r)")) {
            HSLFTextParagraph tpara = new HSLFTextParagraph(header, tbytes, tchars, paragraphs);
            paragraphs.add(tpara);
            tpara._ruler = ruler;
            tpara.getParagraphStyle().updateTextSize(para.length());
            HSLFTextRun trun = new HSLFTextRun(tpara);
            tpara.addTextRun(trun);
            trun.setText(para);
        }
        applyCharacterStyles(paragraphs, styles.getCharacterStyles());
        applyParagraphStyles(paragraphs, styles.getParagraphStyles());
        if (indents != null) {
            applyParagraphIndents(paragraphs, indents.getIndents());
        }
    }
    if (paragraphCollection.isEmpty()) {
        logger.log(POILogger.DEBUG, "No text records found.");
    }
    return paragraphCollection;
}
Also used : SlideListWithText(org.apache.poi.hslf.record.SlideListWithText) ArrayList(java.util.ArrayList) TextCharsAtom(org.apache.poi.hslf.record.TextCharsAtom) TextHeaderAtom(org.apache.poi.hslf.record.TextHeaderAtom) TextBytesAtom(org.apache.poi.hslf.record.TextBytesAtom) DrawPaint(org.apache.poi.sl.draw.DrawPaint) SolidPaint(org.apache.poi.sl.usermodel.PaintStyle.SolidPaint) MasterTextPropAtom(org.apache.poi.hslf.record.MasterTextPropAtom) List(java.util.List) ArrayList(java.util.ArrayList) Record(org.apache.poi.hslf.record.Record) StyleTextPropAtom(org.apache.poi.hslf.record.StyleTextPropAtom) TextRulerAtom(org.apache.poi.hslf.record.TextRulerAtom)

Example 4 with TextBytesAtom

use of org.apache.poi.hslf.record.TextBytesAtom in project poi by apache.

the class HSLFTextParagraph method updateTextAtom.

/**
     * Set the correct text atom depending on the multibyte usage
     */
private static void updateTextAtom(List<HSLFTextParagraph> paragraphs) {
    final String rawText = toInternalString(getRawText(paragraphs));
    // Will it fit in a 8 bit atom?
    boolean isUnicode = StringUtil.hasMultibyte(rawText);
    // isUnicode = true;
    TextHeaderAtom headerAtom = paragraphs.get(0)._headerAtom;
    TextBytesAtom byteAtom = paragraphs.get(0)._byteAtom;
    TextCharsAtom charAtom = paragraphs.get(0)._charAtom;
    StyleTextPropAtom styleAtom = findStyleAtomPresent(headerAtom, rawText.length());
    // Store in the appropriate record
    Record oldRecord = null, newRecord = null;
    if (isUnicode) {
        if (byteAtom != null || charAtom == null) {
            oldRecord = byteAtom;
            charAtom = new TextCharsAtom();
        }
        newRecord = charAtom;
        charAtom.setText(rawText);
    } else {
        if (charAtom != null || byteAtom == null) {
            oldRecord = charAtom;
            byteAtom = new TextBytesAtom();
        }
        newRecord = byteAtom;
        byte[] byteText = new byte[rawText.length()];
        StringUtil.putCompressedUnicode(rawText, byteText, 0);
        byteAtom.setText(byteText);
    }
    assert (newRecord != null);
    RecordContainer _txtbox = headerAtom.getParentRecord();
    Record[] cr = _txtbox.getChildRecords();
    int /* headerIdx = -1, */
    textIdx = -1, styleIdx = -1;
    for (int i = 0; i < cr.length; i++) {
        Record r = cr[i];
        if (r == headerAtom) {
        // headerIdx = i;
        } else if (r == oldRecord || r == newRecord) {
            textIdx = i;
        } else if (r == styleAtom) {
            styleIdx = i;
        }
    }
    if (textIdx == -1) {
        // the old record was never registered, ignore it
        _txtbox.addChildAfter(newRecord, headerAtom);
    // textIdx = headerIdx + 1;
    } else {
        // swap not appropriated records - noop if unchanged
        cr[textIdx] = newRecord;
    }
    if (styleIdx == -1) {
        // Add the new StyleTextPropAtom after the TextCharsAtom / TextBytesAtom
        _txtbox.addChildAfter(styleAtom, newRecord);
    }
    for (HSLFTextParagraph p : paragraphs) {
        if (newRecord == byteAtom) {
            p._byteAtom = byteAtom;
            p._charAtom = null;
        } else {
            p._byteAtom = null;
            p._charAtom = charAtom;
        }
    }
}
Also used : RecordContainer(org.apache.poi.hslf.record.RecordContainer) TextCharsAtom(org.apache.poi.hslf.record.TextCharsAtom) Record(org.apache.poi.hslf.record.Record) TextHeaderAtom(org.apache.poi.hslf.record.TextHeaderAtom) TextBytesAtom(org.apache.poi.hslf.record.TextBytesAtom) StyleTextPropAtom(org.apache.poi.hslf.record.StyleTextPropAtom) DrawPaint(org.apache.poi.sl.draw.DrawPaint) SolidPaint(org.apache.poi.sl.usermodel.PaintStyle.SolidPaint)

Example 5 with TextBytesAtom

use of org.apache.poi.hslf.record.TextBytesAtom in project poi by apache.

the class TestTextRun method testAdvancedSetText.

/**
	 * Test to ensure that changing non rich text between bytes and
	 *  chars works correctly
	 */
@SuppressWarnings("unused")
@Test
public void testAdvancedSetText() {
    HSLFSlide slideOne = ss.getSlides().get(0);
    List<HSLFTextParagraph> paras = slideOne.getTextParagraphs().get(0);
    HSLFTextParagraph para = paras.get(0);
    TextHeaderAtom tha = null;
    TextBytesAtom tba = null;
    TextCharsAtom tca = null;
    for (Record r : para.getRecords()) {
        if (r instanceof TextHeaderAtom)
            tha = (TextHeaderAtom) r;
        else if (r instanceof TextBytesAtom)
            tba = (TextBytesAtom) r;
        else if (r instanceof TextCharsAtom)
            tca = (TextCharsAtom) r;
    }
    // Bytes -> Bytes
    assertNull(tca);
    assertNotNull(tba);
    // assertFalse(run._isUnicode);
    assertEquals("This is a test title", para.getTextRuns().get(0).getRawText());
    String changeBytesOnly = "New Test Title";
    HSLFTextParagraph.setText(paras, changeBytesOnly);
    para = paras.get(0);
    tha = null;
    tba = null;
    tca = null;
    for (Record r : para.getRecords()) {
        if (r instanceof TextHeaderAtom)
            tha = (TextHeaderAtom) r;
        else if (r instanceof TextBytesAtom)
            tba = (TextBytesAtom) r;
        else if (r instanceof TextCharsAtom)
            tca = (TextCharsAtom) r;
    }
    assertEquals(changeBytesOnly, HSLFTextParagraph.getRawText(paras));
    assertNull(tca);
    assertNotNull(tba);
    // Bytes -> Chars
    assertNull(tca);
    assertNotNull(tba);
    assertEquals(changeBytesOnly, HSLFTextParagraph.getRawText(paras));
    String changeByteChar = "This is a test title with a 'ġ' g with a dot";
    HSLFTextParagraph.setText(paras, changeByteChar);
    para = paras.get(0);
    tha = null;
    tba = null;
    tca = null;
    for (Record r : para.getRecords()) {
        if (r instanceof TextHeaderAtom)
            tha = (TextHeaderAtom) r;
        else if (r instanceof TextBytesAtom)
            tba = (TextBytesAtom) r;
        else if (r instanceof TextCharsAtom)
            tca = (TextCharsAtom) r;
    }
    assertEquals(changeByteChar, HSLFTextParagraph.getRawText(paras));
    assertNotNull(tca);
    assertNull(tba);
    // Chars -> Chars
    assertNull(tba);
    assertNotNull(tca);
    assertEquals(changeByteChar, HSLFTextParagraph.getRawText(paras));
    String changeCharChar = "This is a test title with a 'Ň' N with a hat";
    HSLFTextParagraph.setText(paras, changeCharChar);
    para = paras.get(0);
    tha = null;
    tba = null;
    tca = null;
    for (Record r : para.getRecords()) {
        if (r instanceof TextHeaderAtom)
            tha = (TextHeaderAtom) r;
        else if (r instanceof TextBytesAtom)
            tba = (TextBytesAtom) r;
        else if (r instanceof TextCharsAtom)
            tca = (TextCharsAtom) r;
    }
    assertEquals(changeCharChar, HSLFTextParagraph.getRawText(paras));
    assertNotNull(tca);
    assertNull(tba);
}
Also used : TextCharsAtom(org.apache.poi.hslf.record.TextCharsAtom) Record(org.apache.poi.hslf.record.Record) TextHeaderAtom(org.apache.poi.hslf.record.TextHeaderAtom) TextBytesAtom(org.apache.poi.hslf.record.TextBytesAtom) Test(org.junit.Test)

Aggregations

TextBytesAtom (org.apache.poi.hslf.record.TextBytesAtom)9 TextCharsAtom (org.apache.poi.hslf.record.TextCharsAtom)9 Record (org.apache.poi.hslf.record.Record)7 StyleTextPropAtom (org.apache.poi.hslf.record.StyleTextPropAtom)5 TextHeaderAtom (org.apache.poi.hslf.record.TextHeaderAtom)4 SlideListWithText (org.apache.poi.hslf.record.SlideListWithText)3 HSLFSlideShowImpl (org.apache.poi.hslf.usermodel.HSLFSlideShowImpl)3 EscherTextboxWrapper (org.apache.poi.hslf.record.EscherTextboxWrapper)2 DrawPaint (org.apache.poi.sl.draw.DrawPaint)2 SolidPaint (org.apache.poi.sl.usermodel.PaintStyle.SolidPaint)2 ArrayList (java.util.ArrayList)1 List (java.util.List)1 EscherContainerRecord (org.apache.poi.ddf.EscherContainerRecord)1 EscherRecord (org.apache.poi.ddf.EscherRecord)1 EscherTextboxRecord (org.apache.poi.ddf.EscherTextboxRecord)1 TextPropCollection (org.apache.poi.hslf.model.textproperties.TextPropCollection)1 CString (org.apache.poi.hslf.record.CString)1 Document (org.apache.poi.hslf.record.Document)1 MasterTextPropAtom (org.apache.poi.hslf.record.MasterTextPropAtom)1 PPDrawing (org.apache.poi.hslf.record.PPDrawing)1