Search in sources :

Example 21 with Range

use of org.apache.poi.hwpf.usermodel.Range in project poi by apache.

the class TestBookmarksTables method testReplaceTextBefore.

public void testReplaceTextBefore() {
    HWPFDocument doc = HWPFTestDataSamples.openSampleFile("pageref.doc");
    Bookmark bookmark = doc.getBookmarks().getBookmark(0);
    Range range = new Range(bookmark.getStart(), bookmark.getEnd(), doc);
    range.replaceText("1destin2ation3", false);
    bookmark = doc.getBookmarks().getBookmark(0);
    assertEquals("userref", bookmark.getName());
    assertEquals(27, bookmark.getStart());
    assertEquals(41, bookmark.getEnd());
}
Also used : HWPFDocument(org.apache.poi.hwpf.HWPFDocument) Bookmark(org.apache.poi.hwpf.usermodel.Bookmark) Range(org.apache.poi.hwpf.usermodel.Range)

Example 22 with Range

use of org.apache.poi.hwpf.usermodel.Range in project poi by apache.

the class TestSprms method testInnerTable.

private void testInnerTable(HWPFDocument hwpfDocument) {
    Range range = hwpfDocument.getRange();
    for (int p = 0; p < range.numParagraphs(); p++) {
        Paragraph paragraph = range.getParagraph(p);
        char first = paragraph.text().toLowerCase(Locale.ROOT).charAt(0);
        if ('1' <= first && first < '4') {
            assertTrue(paragraph.isInTable());
            assertEquals(2, paragraph.getTableLevel());
        }
        if ('a' <= first && first < 'z') {
            assertTrue(paragraph.isInTable());
            assertEquals(1, paragraph.getTableLevel());
        }
    }
}
Also used : Range(org.apache.poi.hwpf.usermodel.Range) Paragraph(org.apache.poi.hwpf.usermodel.Paragraph)

Example 23 with Range

use of org.apache.poi.hwpf.usermodel.Range in project poi by apache.

the class TestHWPFRangeParts method testContentsUnicode.

public void testContentsUnicode() {
    Range r;
    // Now check the real ranges
    r = docUnicode.getRange();
    assertEquals(u_page_1 + page_break + "\r" + u_page_2, r.text());
    r = docUnicode.getHeaderStoryRange();
    assertEquals(headerDef + u_header + footerDef + u_footer + endHeaderFooter, r.text());
    r = docUnicode.getOverallRange();
    assertEquals(u_page_1 + page_break + "\r" + u_page_2 + headerDef + u_header + footerDef + u_footer + endHeaderFooter + "\r", r.text());
}
Also used : Range(org.apache.poi.hwpf.usermodel.Range)

Example 24 with Range

use of org.apache.poi.hwpf.usermodel.Range in project tika by apache.

the class WordExtractor method parse.

protected void parse(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
    HWPFDocument document;
    try {
        document = new HWPFDocument(root);
    } catch (org.apache.poi.EncryptedDocumentException e) {
        throw new EncryptedDocumentException(e);
    } catch (OldWordFileFormatException e) {
        parseWord6(root, xhtml);
        return;
    }
    extractSavedByMetadata(document);
    org.apache.poi.hwpf.extractor.WordExtractor wordExtractor = new org.apache.poi.hwpf.extractor.WordExtractor(document);
    HeaderStories headerFooter = new HeaderStories(document);
    // Grab the list of pictures. As far as we can tell,
    //  the pictures should be in order, and may be directly
    //  placed or referenced from an anchor
    PicturesTable pictureTable = document.getPicturesTable();
    PicturesSource pictures = new PicturesSource(document);
    // Do any headers, if present
    Range[] headers = new Range[] { headerFooter.getFirstHeaderSubrange(), headerFooter.getEvenHeaderSubrange(), headerFooter.getOddHeaderSubrange() };
    handleHeaderFooter(headers, "header", document, pictures, pictureTable, xhtml);
    // Do the main paragraph text
    Range r = document.getRange();
    ListManager listManager = new ListManager(document);
    for (int i = 0; i < r.numParagraphs(); i++) {
        Paragraph p = r.getParagraph(i);
        i += handleParagraph(p, 0, r, document, FieldsDocumentPart.MAIN, pictures, pictureTable, listManager, xhtml);
    }
    if (officeParserConfig.getIncludeShapeBasedContent()) {
        // Do everything else
        for (String paragraph : wordExtractor.getMainTextboxText()) {
            xhtml.element("p", paragraph);
        }
    }
    for (String paragraph : wordExtractor.getFootnoteText()) {
        xhtml.element("p", paragraph);
    }
    for (String paragraph : wordExtractor.getCommentsText()) {
        xhtml.element("p", paragraph);
    }
    for (String paragraph : wordExtractor.getEndnoteText()) {
        xhtml.element("p", paragraph);
    }
    // Do any footers, if present
    Range[] footers = new Range[] { headerFooter.getFirstFooterSubrange(), headerFooter.getEvenFooterSubrange(), headerFooter.getOddFooterSubrange() };
    handleHeaderFooter(footers, "footer", document, pictures, pictureTable, xhtml);
    // Handle any pictures that we haven't output yet
    for (Picture p = pictures.nextUnclaimed(); p != null; ) {
        handlePictureCharacterRun(null, p, pictures, xhtml);
        p = pictures.nextUnclaimed();
    }
    // Handle any embeded office documents
    try {
        DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
        for (Entry entry : op) {
            if (entry.getName().startsWith("_") && entry instanceof DirectoryEntry) {
                handleEmbeddedOfficeDoc((DirectoryEntry) entry, xhtml);
            }
        }
    } catch (FileNotFoundException e) {
    }
}
Also used : EncryptedDocumentException(org.apache.tika.exception.EncryptedDocumentException) FileNotFoundException(java.io.FileNotFoundException) PicturesTable(org.apache.poi.hwpf.model.PicturesTable) Range(org.apache.poi.hwpf.usermodel.Range) DirectoryEntry(org.apache.poi.poifs.filesystem.DirectoryEntry) Paragraph(org.apache.poi.hwpf.usermodel.Paragraph) HWPFDocument(org.apache.poi.hwpf.HWPFDocument) HeaderStories(org.apache.poi.hwpf.usermodel.HeaderStories) Entry(org.apache.poi.poifs.filesystem.Entry) DirectoryEntry(org.apache.poi.poifs.filesystem.DirectoryEntry) SavedByEntry(org.apache.poi.hwpf.model.SavedByEntry) Picture(org.apache.poi.hwpf.usermodel.Picture) OldWordFileFormatException(org.apache.poi.hwpf.OldWordFileFormatException)

Aggregations

Range (org.apache.poi.hwpf.usermodel.Range)24 HWPFDocument (org.apache.poi.hwpf.HWPFDocument)9 Paragraph (org.apache.poi.hwpf.usermodel.Paragraph)8 Bookmark (org.apache.poi.hwpf.usermodel.Bookmark)4 CharacterRun (org.apache.poi.hwpf.usermodel.CharacterRun)4 Picture (org.apache.poi.hwpf.usermodel.Picture)3 FileInputStream (java.io.FileInputStream)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 FileNotFoundException (java.io.FileNotFoundException)1 FileOutputStream (java.io.FileOutputStream)1 InputStream (java.io.InputStream)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Matcher (java.util.regex.Matcher)1 SummaryInformation (org.apache.poi.hpsf.SummaryInformation)1 OLEShape (org.apache.poi.hslf.model.OLEShape)1 HSLFObjectData (org.apache.poi.hslf.usermodel.HSLFObjectData)1 HSLFPictureData (org.apache.poi.hslf.usermodel.HSLFPictureData)1 HSLFPictureShape (org.apache.poi.hslf.usermodel.HSLFPictureShape)1