Search in sources :

Example 51 with XMLEvent

use of javax.xml.stream.events.XMLEvent in project sirix by sirixdb.

the class StAXSerializerTest method testStAXSerializer.

@Test
public void testStAXSerializer() {
    try {
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        final XMLSerializer xmlSerializer = new XMLSerializerBuilder(holder.getResourceManager(), out).emitXMLDeclaration().build();
        xmlSerializer.call();
        final XdmNodeReadTrx rtx = holder.getResourceManager().beginNodeReadTrx();
        StAXSerializer serializer = new StAXSerializer(rtx);
        final StringBuilder strBuilder = new StringBuilder();
        boolean isEmptyElement = false;
        while (serializer.hasNext()) {
            XMLEvent event = serializer.nextEvent();
            switch(event.getEventType()) {
                case XMLStreamConstants.START_DOCUMENT:
                    strBuilder.append("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>");
                    break;
                case XMLStreamConstants.START_ELEMENT:
                    emitElement(event, strBuilder);
                    if (serializer.peek().getEventType() == XMLStreamConstants.END_ELEMENT) {
                        strBuilder.append("/>");
                        isEmptyElement = true;
                    } else {
                        strBuilder.append('>');
                    }
                    break;
                case XMLStreamConstants.END_ELEMENT:
                    if (isEmptyElement) {
                        isEmptyElement = false;
                    } else {
                        emitQName(true, event, strBuilder);
                        strBuilder.append('>');
                    }
                    break;
                case XMLStreamConstants.CHARACTERS:
                    strBuilder.append(((Characters) event).getData());
                    break;
            }
        }
        assertEquals(out.toString(), strBuilder.toString());
        // Check getElementText().
        // ========================================================
        holder.getReader().moveToDocumentRoot();
        holder.getReader().moveToFirstChild();
        serializer = new StAXSerializer(holder.getReader());
        String elemText = null;
        // <p:a>
        if (serializer.hasNext()) {
            serializer.next();
            elemText = serializer.getElementText();
        }
        assertEquals("oops1foooops2baroops3", elemText);
        // oops1
        checkForException(serializer);
        // <b>
        if (serializer.hasNext()) {
            serializer.next();
            elemText = serializer.getElementText();
        }
        assertEquals("foo", elemText);
        // foo
        checkForException(serializer);
        // <c>
        if (serializer.hasNext()) {
            serializer.next();
            elemText = serializer.getElementText();
        }
        assertEquals("", elemText);
        // </c>
        checkForException(serializer);
        // </b>
        checkForException(serializer);
        // oops2
        checkForException(serializer);
        // <b p:x='y'>
        if (serializer.hasNext()) {
            serializer.next();
            elemText = serializer.getElementText();
        }
        assertEquals("bar", elemText);
        // <c>
        if (serializer.hasNext()) {
            serializer.next();
            elemText = serializer.getElementText();
        }
        assertEquals("", elemText);
        // </c>
        checkForException(serializer);
        // bar
        checkForException(serializer);
        // </b>
        checkForException(serializer);
        // oops3
        checkForException(serializer);
        // </p:a>
        checkForException(serializer);
        rtx.close();
    } catch (final XMLStreamException e) {
        fail("XML error while parsing: " + e.getMessage());
    } catch (final SirixException e) {
        fail("Sirix exception occured: " + e.getMessage());
    } catch (final Exception e) {
        fail("Any exception occured: " + e.getMessage());
    }
}
Also used : XMLSerializerBuilder(org.sirix.service.xml.serialize.XMLSerializer.XMLSerializerBuilder) XdmNodeReadTrx(org.sirix.api.XdmNodeReadTrx) XMLStreamException(javax.xml.stream.XMLStreamException) XMLEvent(javax.xml.stream.events.XMLEvent) SirixException(org.sirix.exception.SirixException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) SirixException(org.sirix.exception.SirixException) XMLStreamException(javax.xml.stream.XMLStreamException) Test(org.junit.Test)

Example 52 with XMLEvent

use of javax.xml.stream.events.XMLEvent in project sirix by sirixdb.

the class XMLShredderTest method testShreddingLargeText.

@Test
public void testShreddingLargeText() throws Exception {
    final Database database = TestHelper.getDatabase(PATHS.PATH2.getFile());
    final ResourceManager manager = database.getResourceManager(new ResourceManagerConfiguration.Builder(TestHelper.RESOURCE).build());
    final XdmNodeWriteTrx wtx = manager.beginNodeWriteTrx();
    final XMLShredder shredder = new XMLShredder.Builder(wtx, XMLShredder.createFileReader(XML3), Insert.ASFIRSTCHILD).commitAfterwards().build();
    shredder.call();
    wtx.close();
    final XdmNodeReadTrx rtx = manager.beginNodeReadTrx();
    assertTrue(rtx.moveToFirstChild().hasMoved());
    assertTrue(rtx.moveToFirstChild().hasMoved());
    final StringBuilder tnkBuilder = new StringBuilder();
    do {
        tnkBuilder.append(rtx.getValue());
    } while (rtx.moveToRightSibling().hasMoved());
    final String tnkString = tnkBuilder.toString();
    rtx.close();
    manager.close();
    final XMLEventReader validater = XMLShredder.createFileReader(XML3);
    final StringBuilder xmlBuilder = new StringBuilder();
    while (validater.hasNext()) {
        final XMLEvent event = validater.nextEvent();
        switch(event.getEventType()) {
            case XMLStreamConstants.CHARACTERS:
                final String text = event.asCharacters().getData().trim();
                if (text.length() > 0) {
                    xmlBuilder.append(text);
                }
                break;
        }
    }
    assertEquals(xmlBuilder.toString(), tnkString);
}
Also used : XdmNodeWriteTrx(org.sirix.api.XdmNodeWriteTrx) XdmNodeReadTrx(org.sirix.api.XdmNodeReadTrx) Database(org.sirix.api.Database) XMLEvent(javax.xml.stream.events.XMLEvent) XMLEventReader(javax.xml.stream.XMLEventReader) ResourceManager(org.sirix.api.ResourceManager) Test(org.junit.Test)

Example 53 with XMLEvent

use of javax.xml.stream.events.XMLEvent in project sirix by sirixdb.

the class XMLRecordReader method moveToEvent.

/**
 * Move to beginning of record.
 *
 * @param paramReader
 *          XML Reader {@link XMLEventReader}.
 * @param paramFilter
 *          XML filter {@link EventFilter}.
 * @param paramIsRecord
 *          determines if the parser is inside a record or outside
 * @return false if event was not found and received end of file
 * @throws XMLStreamException
 *           if a parsing error occurs
 */
private boolean moveToEvent(final XMLEventReader paramReader, final EventFilter paramFilter, final boolean paramIsRecord) throws XMLStreamException {
    boolean isTimestamp = false;
    final DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH);
    while (paramReader.hasNext() && !paramFilter.accept(paramReader.peek())) {
        final XMLEvent event = paramReader.nextEvent();
        mCountEvents++;
        if (isTimestamp && event.isCharacters() && !event.asCharacters().isWhiteSpace()) {
            isTimestamp = false;
            try {
                // Parse timestamp.
                final String text = event.asCharacters().getData();
                final String[] splitted = text.split("T");
                final String time = splitted[1].substring(0, splitted[1].length() - 1);
                mKey.setTimestamp(formatter.parse(splitted[0] + " " + time));
            } catch (final ParseException e) {
                LOGWRAPPER.warn(e.getMessage(), e);
            }
        }
        if (paramIsRecord) {
            // Parser currently is located somewhere after the start of a record
            // (inside a record).
            mEventWriter.add(event);
            if (event.isStartElement() && mDate.equals(event.asStartElement().getName())) {
                isTimestamp = true;
            }
        }
    }
    return paramReader.hasNext();
}
Also used : SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) XMLEvent(javax.xml.stream.events.XMLEvent) ParseException(java.text.ParseException) SimpleDateFormat(java.text.SimpleDateFormat)

Example 54 with XMLEvent

use of javax.xml.stream.events.XMLEvent in project sirix by sirixdb.

the class XMLRecordReader method nextKeyValue.

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    mValue.clear();
    boolean retVal = false;
    try {
        // Skip whitespace.
        skipWhitespace();
        if (mReader.hasNext() && mReader.peek().isStartElement() && mReader.peek().asStartElement().getName().equals(mPage)) {
            mPageEvents = new ArrayList<>();
            while (mReader.hasNext() && !mBeginFilter.accept(mReader.peek())) {
                mPageEvents.add(mReader.nextEvent());
            }
        }
        for (final XMLEvent event : mPageEvents) {
            mEventWriter.add(event);
        }
        // Moves to start of record.
        final boolean foundStartEvent = moveToEvent(mReader, mBeginFilter, false);
        if (foundStartEvent) {
            final boolean foundEndEvent = moveToEvent(mReader, mEndFilter, true);
            if (foundEndEvent) {
                // Add last element to the writer.
                mEventWriter.add(mReader.nextEvent());
                skipWhitespace();
                if (mReader.hasNext() && mReader.peek().isEndElement() && mReader.peek().asEndElement().getName().equals(mPage)) {
                    mEventWriter.add(mReader.nextEvent());
                } else {
                    mEventWriter.add(mEventFactory.createEndElement(mPage, null));
                }
                retVal = true;
                mWriter.flush();
                mValue.set(mWriter.toString());
                mWriter.getBuffer().setLength(0);
            }
        }
    } catch (final XMLStreamException e) {
        LOGWRAPPER.error(e.getMessage(), e);
    }
    return retVal;
}
Also used : XMLStreamException(javax.xml.stream.XMLStreamException) XMLEvent(javax.xml.stream.events.XMLEvent)

Example 55 with XMLEvent

use of javax.xml.stream.events.XMLEvent in project sirix by sirixdb.

the class XMLRecordReader method initialize.

@Override
public void initialize(final InputSplit paramGenericSplit, final TaskAttemptContext paramContext) throws IOException {
    final FileSplit split = (FileSplit) paramGenericSplit;
    mConf = paramContext.getConfiguration();
    mEventFactory = XMLEventFactory.newInstance();
    mPageEvents = new ArrayList<>();
    mStart = split.getStart();
    mEnd = mStart + split.getLength();
    mValue = new Text();
    mKey = new DateWritable();
    mWriter = new StringWriter();
    try {
        mEventWriter = XMLOutputFactory.newInstance().createXMLEventWriter(mWriter);
    } catch (final XMLStreamException | FactoryConfigurationError e) {
        LOGWRAPPER.error(e.getMessage(), e);
    }
    final Path file = split.getPath();
    // Open the file and seek to the start of the split.
    final FileSystem fileSys = file.getFileSystem(mConf);
    final FSDataInputStream fileIn = fileSys.open(split.getPath());
    fileIn.seek(mStart);
    final CompressionCodecFactory comprCodecs = new CompressionCodecFactory(mConf);
    final CompressionCodec codec = comprCodecs.getCodec(file);
    InputStream input = fileIn;
    if (codec != null) {
        input = codec.createInputStream(fileIn);
        mEnd = Long.MAX_VALUE;
    }
    input = new BufferedInputStream(input);
    final XMLInputFactory xmlif = XMLInputFactory.newInstance();
    try {
        mReader = xmlif.createXMLEventReader(input);
    } catch (final XMLStreamException e) {
        LOGWRAPPER.error(e.getMessage(), e);
    }
    // Create start/end record filters.
    final String recordIdentifier = mConf.get("record_element_name");
    final String recordNsPrefix = mConf.get("namespace_prefix") == null ? "" : mConf.get("namespace_prefix");
    final String recordNsURI = mConf.get("namespace_URI") == null ? "" : mConf.get("namespace_URI");
    if (recordIdentifier == null) {
        throw new IllegalStateException("Record identifier must be specified (record_elem_name)!");
    }
    if (recordNsPrefix == "" && recordNsURI == "") {
        mRecordElem = mEventFactory.createStartElement(new QName(recordIdentifier), null, null);
    } else {
        mRecordElem = mEventFactory.createStartElement(new QName(recordNsURI, recordIdentifier, recordNsPrefix), null, null);
    }
    mBeginFilter = new EventFilter() {

        @Override
        public boolean accept(final XMLEvent paramEvent) {
            return paramEvent.isStartElement() && paramEvent.asStartElement().getName().getLocalPart().equals(mRecordElem.getName().getLocalPart()) && paramEvent.asStartElement().getName().getPrefix().equals(mRecordElem.getName().getPrefix());
        }
    };
    mEndFilter = new EventFilter() {

        @Override
        public boolean accept(final XMLEvent paramEvent) {
            return paramEvent.isEndElement() && paramEvent.asEndElement().getName().getLocalPart().equals(mRecordElem.getName().getLocalPart()) && paramEvent.asEndElement().getName().getPrefix().equals(mRecordElem.getName().getPrefix());
        }
    };
    mDate = new QName(recordNsURI, mConf.get("timestamp"), recordNsPrefix);
    mPage = new QName(recordNsURI, mConf.get("page"), recordNsPrefix);
    try {
        while (mReader.hasNext() && !(mReader.peek().isStartElement() && mReader.peek().asStartElement().getName().equals(mPage))) {
            mReader.next();
        }
    } catch (final XMLStreamException e) {
        LOGWRAPPER.error(e.getMessage(), e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) BufferedInputStream(java.io.BufferedInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) InputStream(java.io.InputStream) QName(javax.xml.namespace.QName) Text(org.apache.hadoop.io.Text) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) EventFilter(javax.xml.stream.EventFilter) StringWriter(java.io.StringWriter) XMLStreamException(javax.xml.stream.XMLStreamException) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) BufferedInputStream(java.io.BufferedInputStream) FileSystem(org.apache.hadoop.fs.FileSystem) XMLEvent(javax.xml.stream.events.XMLEvent) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) FactoryConfigurationError(javax.xml.stream.FactoryConfigurationError) XMLInputFactory(javax.xml.stream.XMLInputFactory)

Aggregations

XMLEvent (javax.xml.stream.events.XMLEvent)242 XMLEventReader (javax.xml.stream.XMLEventReader)111 XMLStreamException (javax.xml.stream.XMLStreamException)91 StartElement (javax.xml.stream.events.StartElement)86 XMLInputFactory (javax.xml.stream.XMLInputFactory)63 QName (javax.xml.namespace.QName)47 IOException (java.io.IOException)40 Attribute (javax.xml.stream.events.Attribute)40 EndElement (javax.xml.stream.events.EndElement)33 ArrayList (java.util.ArrayList)31 InputStream (java.io.InputStream)28 XMLEventWriter (javax.xml.stream.XMLEventWriter)23 Characters (javax.xml.stream.events.Characters)21 Test (org.junit.Test)17 ByteArrayInputStream (java.io.ByteArrayInputStream)16 StringWriter (java.io.StringWriter)16 StringReader (java.io.StringReader)14 XMLOutputFactory (javax.xml.stream.XMLOutputFactory)14 HashMap (java.util.HashMap)13 XMLEventFactory (javax.xml.stream.XMLEventFactory)13