Search in sources :

Example 1 with BundleStreamSource

use of de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource in project elki by elki-project.

the class ConcatenateFilesDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    MultipleObjectsBundle objects = new MultipleObjectsBundle();
    objects.appendColumn(TypeUtil.STRING, new ArrayList<>());
    for (File file : files) {
        String filestr = file.getPath();
        try (InputStream inputStream = // 
        FileUtil.tryGzipInput(new BufferedInputStream(new FileInputStream(file)))) {
            final BundleStreamSource source;
            if (parser instanceof StreamingParser) {
                final StreamingParser streamParser = (StreamingParser) parser;
                streamParser.initStream(inputStream);
                source = streamParser;
            } else {
                MultipleObjectsBundle parsingResult = parser.parse(inputStream);
                // normalize objects and transform labels
                source = parsingResult.asStream();
            }
            // NullPointerException on invalid streams
            BundleMeta meta = null;
            loop: for (Event e = source.nextEvent(); ; e = source.nextEvent()) {
                switch(e) {
                    case END_OF_STREAM:
                        break loop;
                    case META_CHANGED:
                        meta = source.getMeta();
                        for (int i = 0; i < meta.size(); i++) {
                            if (i + 1 >= objects.metaLength()) {
                                objects.appendColumn(meta.get(i), new ArrayList<>());
                            } else {
                                // Ensure compatibility:
                                if (!objects.meta(i + 1).isAssignableFromType(meta.get(i))) {
                                    throw new AbortException("Incompatible files loaded. Cannot concatenate with unaligned columns, please preprocess manually.");
                                }
                            }
                        }
                        // switch
                        break;
                    case NEXT_OBJECT:
                        Object[] o = new Object[objects.metaLength()];
                        o[0] = filestr;
                        for (int i = 0; i < meta.size(); i++) {
                            o[i + 1] = source.data(i);
                        }
                        objects.appendSimple(o);
                        // switch
                        break;
                }
            }
        } catch (IOException e) {
            throw new AbortException("Loading file " + filestr + " failed: " + e.toString(), e);
        }
    }
    parser.cleanup();
    // Invoke filters
    if (LOG.isDebugging()) {
        LOG.debugFine("Invoking filters.");
    }
    return invokeBundleFilters(objects);
}
Also used : StreamingParser(de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser) BundleMeta(de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) BufferedInputStream(java.io.BufferedInputStream) Event(de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource.Event) File(java.io.File) BundleStreamSource(de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 2 with BundleStreamSource

use of de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource in project elki by elki-project.

the class AbstractDatabaseConnection method invokeBundleFilters.

/**
 * Transforms the specified list of objects and their labels into a list of
 * objects and their associations.
 *
 * @param bundle the objects to process
 * @return processed objects
 */
protected MultipleObjectsBundle invokeBundleFilters(MultipleObjectsBundle bundle) {
    if (filters == null) {
        return bundle;
    }
    // We dynamically switch between streaming and bundle operations.
    BundleStreamSource stream = null;
    for (ObjectFilter filter : filters) {
        if (filter instanceof StreamFilter) {
            StreamFilter sfilter = (StreamFilter) filter;
            stream = sfilter.init((stream != null) ? stream : bundle.asStream());
            // No longer a bundle
            bundle = null;
        } else {
            bundle = filter.filter((bundle != null) ? bundle : stream.asMultipleObjectsBundle());
            // No longer a stream
            stream = null;
        }
    }
    return (bundle != null) ? bundle : stream.asMultipleObjectsBundle();
}
Also used : ObjectFilter(de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter) StreamFilter(de.lmu.ifi.dbs.elki.datasource.filter.StreamFilter) BundleStreamSource(de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource)

Aggregations

BundleStreamSource (de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource)2 BundleMeta (de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta)1 Event (de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource.Event)1 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)1 ObjectFilter (de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter)1 StreamFilter (de.lmu.ifi.dbs.elki.datasource.filter.StreamFilter)1 StreamingParser (de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser)1 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)1 BufferedInputStream (java.io.BufferedInputStream)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1