Search in sources :

Example 1 with StreamingParser

use of de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser in project elki by elki-project.

the class ConcatenateFilesDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    MultipleObjectsBundle objects = new MultipleObjectsBundle();
    objects.appendColumn(TypeUtil.STRING, new ArrayList<>());
    for (File file : files) {
        String filestr = file.getPath();
        try (InputStream inputStream = // 
        FileUtil.tryGzipInput(new BufferedInputStream(new FileInputStream(file)))) {
            final BundleStreamSource source;
            if (parser instanceof StreamingParser) {
                final StreamingParser streamParser = (StreamingParser) parser;
                streamParser.initStream(inputStream);
                source = streamParser;
            } else {
                MultipleObjectsBundle parsingResult = parser.parse(inputStream);
                // normalize objects and transform labels
                source = parsingResult.asStream();
            }
            // NullPointerException on invalid streams
            BundleMeta meta = null;
            loop: for (Event e = source.nextEvent(); ; e = source.nextEvent()) {
                switch(e) {
                    case END_OF_STREAM:
                        break loop;
                    case META_CHANGED:
                        meta = source.getMeta();
                        for (int i = 0; i < meta.size(); i++) {
                            if (i + 1 >= objects.metaLength()) {
                                objects.appendColumn(meta.get(i), new ArrayList<>());
                            } else {
                                // Ensure compatibility:
                                if (!objects.meta(i + 1).isAssignableFromType(meta.get(i))) {
                                    throw new AbortException("Incompatible files loaded. Cannot concatenate with unaligned columns, please preprocess manually.");
                                }
                            }
                        }
                        // switch
                        break;
                    case NEXT_OBJECT:
                        Object[] o = new Object[objects.metaLength()];
                        o[0] = filestr;
                        for (int i = 0; i < meta.size(); i++) {
                            o[i + 1] = source.data(i);
                        }
                        objects.appendSimple(o);
                        // switch
                        break;
                }
            }
        } catch (IOException e) {
            throw new AbortException("Loading file " + filestr + " failed: " + e.toString(), e);
        }
    }
    parser.cleanup();
    // Invoke filters
    if (LOG.isDebugging()) {
        LOG.debugFine("Invoking filters.");
    }
    return invokeBundleFilters(objects);
}
Also used : StreamingParser(de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser) BundleMeta(de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) BufferedInputStream(java.io.BufferedInputStream) Event(de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource.Event) File(java.io.File) BundleStreamSource(de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 2 with StreamingParser

use of de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser in project elki by elki-project.

the class InputStreamDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    // Run parser
    if (LOG.isDebugging()) {
        LOG.debugFine("Invoking parsers.");
    }
    // Streaming parsers may yield to stream filters immediately.
    if (parser instanceof StreamingParser) {
        final StreamingParser streamParser = (StreamingParser) parser;
        streamParser.initStream(in);
        // normalize objects and transform labels
        if (LOG.isDebugging()) {
            LOG.debugFine("Parsing as stream.");
        }
        Duration duration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".load").begin() : null;
        MultipleObjectsBundle objects = invokeStreamFilters(streamParser).asMultipleObjectsBundle();
        parser.cleanup();
        if (duration != null) {
            LOG.statistics(duration.end());
        }
        return objects;
    } else {
        // For non-streaming parsers, we first parse, then filter
        Duration duration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".parse").begin() : null;
        MultipleObjectsBundle parsingResult = parser.parse(in);
        parser.cleanup();
        if (duration != null) {
            LOG.statistics(duration.end());
        }
        // normalize objects and transform labels
        if (LOG.isDebugging()) {
            LOG.debugFine("Invoking filters.");
        }
        Duration fduration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".filter").begin() : null;
        MultipleObjectsBundle objects = invokeBundleFilters(parsingResult);
        if (fduration != null) {
            LOG.statistics(fduration.end());
        }
        return objects;
    }
}
Also used : StreamingParser(de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration)

Aggregations

MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)2 StreamingParser (de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser)2 BundleMeta (de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta)1 BundleStreamSource (de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource)1 Event (de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource.Event)1 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)1 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)1 BufferedInputStream (java.io.BufferedInputStream)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1