Search in sources :

Example 1 with BundleMeta

use of de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta in project elki by elki-project.

the class SimpleTransactionParser method getMeta.

@Override
public BundleMeta getMeta() {
    if (meta == null) {
        meta = new BundleMeta(1);
        meta.add(new VectorTypeInformation<>(BitVector.FACTORY, BitVector.SHORT_SERIALIZER, 0, numterms));
    }
    return meta;
}
Also used : BundleMeta(de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta)

Example 2 with BundleMeta

use of de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta in project elki by elki-project.

the class EvaluatePrecomputedOutlierScores method run.

@Override
public void run() {
    try (// 
    FileInputStream fis = new FileInputStream(infile);
        // 
        InputStream is = new BufferedInputStream(FileUtil.tryGzipInput(fis));
        FileOutputStream fosResult = new FileOutputStream(outfile, true);
        PrintStream fout = new PrintStream(fosResult);
        FileChannel chan = fosResult.getChannel()) {
        // Setup the input stream.
        parser.initStream(is);
        // Lock the output file:
        chan.lock();
        if (chan.position() == 0L) {
            writeHeader(fout);
        } else {
            LOG.info("Appending to existing output " + outfile);
        }
        int lcol = -1, dcol = -1;
        loop: while (true) {
            BundleStreamSource.Event ev = parser.nextEvent();
            switch(ev) {
                case END_OF_STREAM:
                    break loop;
                case META_CHANGED:
                    BundleMeta meta = parser.getMeta();
                    lcol = -1;
                    dcol = -1;
                    for (int i = 0; i < meta.size(); i++) {
                        SimpleTypeInformation<?> m = meta.get(i);
                        if (TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH.isAssignableFromType(m)) {
                            if (dcol >= 0) {
                                throw new AbortException("More than one vector column.");
                            }
                            dcol = i;
                        } else if (TypeUtil.GUESSED_LABEL.isAssignableFromType(m)) {
                            if (lcol >= 0) {
                                throw new AbortException("More than one label column.");
                            }
                            lcol = i;
                        } else {
                            throw new AbortException("Unexpected data column type: " + m);
                        }
                    }
                    break;
                case NEXT_OBJECT:
                    if (lcol < 0) {
                        throw new AbortException("No label column available.");
                    }
                    if (dcol < 0) {
                        throw new AbortException("No vector column available.");
                    }
                    processRow(fout, (NumberVector) parser.data(dcol), parser.data(lcol).toString());
                    break;
            }
        }
    } catch (IOException e) {
        throw new AbortException("IO error.", e);
    }
}
Also used : BundleMeta(de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta) FileChannel(java.nio.channels.FileChannel) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 3 with BundleMeta

use of de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta in project elki by elki-project.

the class ClusteringVectorParser method getMeta.

@Override
public BundleMeta getMeta() {
    if (meta == null) {
        meta = new BundleMeta(haslbl ? 2 : 1);
        meta.add(Clustering.TYPE);
        if (haslbl) {
            meta.add(TypeUtil.LABELLIST);
        }
    }
    return meta;
}
Also used : BundleMeta(de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta)

Example 4 with BundleMeta

use of de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta in project elki by elki-project.

the class ByLabelFilter method nextEvent.

@Override
public Event nextEvent() {
    while (true) {
        Event ev = source.nextEvent();
        switch(ev) {
            case END_OF_STREAM:
                if (lblcol < 0) {
                    LOG.warning("By label filter was used, but never saw a label relation!");
                }
                return Event.END_OF_STREAM;
            case META_CHANGED:
                // Search for the first label column
                if (lblcol < 0) {
                    BundleMeta meta = source.getMeta();
                    lblcol = FilterUtil.findLabelColumn(meta);
                }
                return Event.META_CHANGED;
            case NEXT_OBJECT:
                if (lblcol > 0) {
                    Object l = source.data(lblcol);
                    if (l instanceof LabelList) {
                        boolean good = false;
                        final LabelList ll = (LabelList) l;
                        for (int i = 0; i < ll.size(); i++) {
                            matcher.reset(ll.get(i));
                            if (matcher.matches()) {
                                good = true;
                                break;
                            }
                        }
                        if (good == inverted) {
                            continue;
                        }
                    } else {
                        matcher.reset(l.toString());
                        if (!matcher.matches()) {
                            continue;
                        }
                    }
                } else {
                    // No labels known yet.
                    if (!inverted) {
                        continue;
                    }
                }
                return Event.NEXT_OBJECT;
            default:
                LOG.warning("Unknown event: " + ev);
        }
    }
}
Also used : BundleMeta(de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta) LabelList(de.lmu.ifi.dbs.elki.data.LabelList)

Example 5 with BundleMeta

use of de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta in project elki by elki-project.

the class ConcatenateFilesDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    MultipleObjectsBundle objects = new MultipleObjectsBundle();
    objects.appendColumn(TypeUtil.STRING, new ArrayList<>());
    for (File file : files) {
        String filestr = file.getPath();
        try (InputStream inputStream = // 
        FileUtil.tryGzipInput(new BufferedInputStream(new FileInputStream(file)))) {
            final BundleStreamSource source;
            if (parser instanceof StreamingParser) {
                final StreamingParser streamParser = (StreamingParser) parser;
                streamParser.initStream(inputStream);
                source = streamParser;
            } else {
                MultipleObjectsBundle parsingResult = parser.parse(inputStream);
                // normalize objects and transform labels
                source = parsingResult.asStream();
            }
            // NullPointerException on invalid streams
            BundleMeta meta = null;
            loop: for (Event e = source.nextEvent(); ; e = source.nextEvent()) {
                switch(e) {
                    case END_OF_STREAM:
                        break loop;
                    case META_CHANGED:
                        meta = source.getMeta();
                        for (int i = 0; i < meta.size(); i++) {
                            if (i + 1 >= objects.metaLength()) {
                                objects.appendColumn(meta.get(i), new ArrayList<>());
                            } else {
                                // Ensure compatibility:
                                if (!objects.meta(i + 1).isAssignableFromType(meta.get(i))) {
                                    throw new AbortException("Incompatible files loaded. Cannot concatenate with unaligned columns, please preprocess manually.");
                                }
                            }
                        }
                        // switch
                        break;
                    case NEXT_OBJECT:
                        Object[] o = new Object[objects.metaLength()];
                        o[0] = filestr;
                        for (int i = 0; i < meta.size(); i++) {
                            o[i + 1] = source.data(i);
                        }
                        objects.appendSimple(o);
                        // switch
                        break;
                }
            }
        } catch (IOException e) {
            throw new AbortException("Loading file " + filestr + " failed: " + e.toString(), e);
        }
    }
    parser.cleanup();
    // Invoke filters
    if (LOG.isDebugging()) {
        LOG.debugFine("Invoking filters.");
    }
    return invokeBundleFilters(objects);
}
Also used : StreamingParser(de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser) BundleMeta(de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) BufferedInputStream(java.io.BufferedInputStream) Event(de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource.Event) File(java.io.File) BundleStreamSource(de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

BundleMeta (de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta)11 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)2 IOException (java.io.IOException)2 BitVector (de.lmu.ifi.dbs.elki.data.BitVector)1 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)1 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)1 VectorTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorTypeInformation)1 BundleStreamSource (de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource)1 Event (de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource.Event)1 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)1 StreamingParser (de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser)1 Object2IntMap (it.unimi.dsi.fastutil.objects.Object2IntMap)1 BufferedInputStream (java.io.BufferedInputStream)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 FileChannel (java.nio.channels.FileChannel)1 ArrayList (java.util.ArrayList)1