Search in sources :

Example 91 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class GeneratorXMLDatabaseConnection method parseVector.

/**
 * Parse a string into a vector.
 *
 * TODO: move this into utility package?
 *
 * @param s String to parse
 * @return Vector
 */
private double[] parseVector(String s) {
    String[] entries = WHITESPACE_PATTERN.split(s);
    double[] d = new double[entries.length];
    for (int i = 0; i < entries.length; i++) {
        try {
            d[i] = ParseUtil.parseDouble(entries[i]);
        } catch (NumberFormatException e) {
            throw new AbortException("Could not parse vector.");
        }
    }
    return d;
}
Also used : AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 92 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class GeneratorXMLDatabaseConnection method loadXMLSpecification.

/**
 * Load the XML configuration file.
 *
 * @return Generator
 */
private GeneratorMain loadXMLSpecification() {
    try {
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        URL url = ClassLoader.getSystemResource(GENERATOR_SCHEMA_FILE);
        if (url != null) {
            try {
                Schema schema = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(url);
                dbf.setSchema(schema);
                dbf.setIgnoringElementContentWhitespace(true);
            } catch (Exception e) {
                LOG.warning("Could not set up XML Schema validation for specification file.", e);
            }
        } else {
            LOG.warning("Could not set up XML Schema validation for specification file.");
        }
        Document doc = dbf.newDocumentBuilder().parse(specfile);
        Node root = doc.getDocumentElement();
        if (TAG_DATASET.equals(root.getNodeName())) {
            GeneratorMain gen = new GeneratorMain();
            processElementDataset(gen, root);
            return gen;
        } else {
            throw new AbortException("Experiment specification has incorrect document element: " + root.getNodeName());
        }
    } catch (FileNotFoundException e) {
        throw new AbortException("Can't open specification file.", e);
    } catch (SAXException e) {
        throw new AbortException("Error parsing specification file.", e);
    } catch (IOException e) {
        throw new AbortException("IO Exception loading specification file.", e);
    } catch (ParserConfigurationException e) {
        throw new AbortException("Parser Configuration Error", e);
    }
}
Also used : GeneratorMain(de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorMain) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) Schema(javax.xml.validation.Schema) Node(org.w3c.dom.Node) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) Document(org.w3c.dom.Document) URL(java.net.URL) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) SAXException(org.xml.sax.SAXException) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) SAXException(org.xml.sax.SAXException)

Example 93 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class GeneratorXMLDatabaseConnection method processElementPoint.

/**
 * Parse a 'point' element (point vector for a static cluster)
 *
 * @param points current list of points (to append to)
 * @param cur Current document nod
 */
private void processElementPoint(List<double[]> points, Node cur) {
    double[] point = null;
    String vstr = ((Element) cur).getAttribute(ATTR_VECTOR);
    if (vstr != null && vstr.length() > 0) {
        point = parseVector(vstr);
    }
    if (point == null) {
        throw new AbortException("No translation vector given.");
    }
    // *** add new point
    points.add(point);
    // TODO: check for unknown attributes.
    XMLNodeIterator iter = new XMLNodeIterator(cur.getFirstChild());
    while (iter.hasNext()) {
        Node child = iter.next();
        if (child.getNodeType() == Node.ELEMENT_NODE) {
            LOG.warning("Unknown element in XML specification file: " + child.getNodeName());
        }
    }
}
Also used : Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) XMLNodeIterator(de.lmu.ifi.dbs.elki.utilities.xml.XMLNodeIterator)

Example 94 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class ConcatenateFilesDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    MultipleObjectsBundle objects = new MultipleObjectsBundle();
    objects.appendColumn(TypeUtil.STRING, new ArrayList<>());
    for (File file : files) {
        String filestr = file.getPath();
        try (InputStream inputStream = // 
        FileUtil.tryGzipInput(new BufferedInputStream(new FileInputStream(file)))) {
            final BundleStreamSource source;
            if (parser instanceof StreamingParser) {
                final StreamingParser streamParser = (StreamingParser) parser;
                streamParser.initStream(inputStream);
                source = streamParser;
            } else {
                MultipleObjectsBundle parsingResult = parser.parse(inputStream);
                // normalize objects and transform labels
                source = parsingResult.asStream();
            }
            // NullPointerException on invalid streams
            BundleMeta meta = null;
            loop: for (Event e = source.nextEvent(); ; e = source.nextEvent()) {
                switch(e) {
                    case END_OF_STREAM:
                        break loop;
                    case META_CHANGED:
                        meta = source.getMeta();
                        for (int i = 0; i < meta.size(); i++) {
                            if (i + 1 >= objects.metaLength()) {
                                objects.appendColumn(meta.get(i), new ArrayList<>());
                            } else {
                                // Ensure compatibility:
                                if (!objects.meta(i + 1).isAssignableFromType(meta.get(i))) {
                                    throw new AbortException("Incompatible files loaded. Cannot concatenate with unaligned columns, please preprocess manually.");
                                }
                            }
                        }
                        // switch
                        break;
                    case NEXT_OBJECT:
                        Object[] o = new Object[objects.metaLength()];
                        o[0] = filestr;
                        for (int i = 0; i < meta.size(); i++) {
                            o[i + 1] = source.data(i);
                        }
                        objects.appendSimple(o);
                        // switch
                        break;
                }
            }
        } catch (IOException e) {
            throw new AbortException("Loading file " + filestr + " failed: " + e.toString(), e);
        }
    }
    parser.cleanup();
    // Invoke filters
    if (LOG.isDebugging()) {
        LOG.debugFine("Invoking filters.");
    }
    return invokeBundleFilters(objects);
}
Also used : StreamingParser(de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser) BundleMeta(de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) BufferedInputStream(java.io.BufferedInputStream) Event(de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource.Event) File(java.io.File) BundleStreamSource(de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 95 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class LabelJoinDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
    for (DatabaseConnection dbc : sources) {
        bundles.add(dbc.loadData());
    }
    MultipleObjectsBundle first = bundles.get(0);
    Object2IntOpenHashMap<String> labelmap = new Object2IntOpenHashMap<>(first.dataLength());
    labelmap.defaultReturnValue(-1);
    // Process first bundle
    {
        // Identify a label column
        final int lblcol = FilterUtil.findLabelColumn(first);
        if (lblcol == -1) {
            throw new AbortException("No label column found in first source, cannot join (do you want to use " + ExternalIDJoinDatabaseConnection.class.getSimpleName() + " instead?)");
        }
        for (int i = 0; i < first.dataLength(); i++) {
            Object data = first.data(i, lblcol);
            if (data == null) {
                LOG.warning("Object without label encountered.");
                continue;
            }
            if (data instanceof String) {
                int old = labelmap.put((String) data, i);
                if (old != -1) {
                    LOG.warning("Duplicate label encountered: " + data + " in rows " + old + " and " + i);
                }
            } else if (data instanceof LabelList) {
                final LabelList ll = (LabelList) data;
                for (int j = 0; j < ll.size(); j++) {
                    String lbl = ll.get(j);
                    int old = labelmap.put(lbl, i);
                    if (old != -1) {
                        LOG.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
                    }
                }
            } else {
                String lbl = data.toString();
                int old = labelmap.put(lbl, i);
                if (old != -1) {
                    LOG.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
                }
            }
        }
    }
    // Process additional columns
    for (int c = 1; c < sources.size(); c++) {
        MultipleObjectsBundle cur = bundles.get(c);
        final int lblcol = FilterUtil.findLabelColumn(cur);
        if (lblcol == -1) {
            throw new AbortException("No label column found in source " + (c + 1) + ", cannot join (do you want to use " + ExternalIDJoinDatabaseConnection.class.getSimpleName() + " instead?)");
        }
        // Destination columns
        List<ArrayList<Object>> dcol = new ArrayList<>(cur.metaLength());
        for (int i = 0; i < cur.metaLength(); i++) {
            // Skip the label columns
            if (i == lblcol) {
                dcol.add(null);
                continue;
            }
            ArrayList<Object> newcol = new ArrayList<>(first.dataLength());
            // Pre-fill with nulls.
            for (int j = 0; j < first.dataLength(); j++) {
                newcol.add(null);
            }
            first.appendColumn(cur.meta(i), newcol);
            dcol.add(newcol);
        }
        for (int i = 0; i < cur.dataLength(); i++) {
            Object data = cur.data(i, lblcol);
            if (data == null) {
                LOG.warning("Object without label encountered.");
                continue;
            }
            int row = -1;
            if (data instanceof String) {
                row = labelmap.getInt(data);
            } else if (data instanceof LabelList) {
                final LabelList ll = (LabelList) data;
                for (int j = 0; j < ll.size(); j++) {
                    row = labelmap.getInt(ll.get(j));
                    if (row >= 0) {
                        break;
                    }
                }
            } else {
                row = labelmap.getInt(data.toString());
            }
            if (row < 0) {
                LOG.warning("Label not found for join: " + data + " in row " + i);
                continue;
            }
            for (int d = 0; d < cur.metaLength(); d++) {
                if (d == lblcol) {
                    continue;
                }
                List<Object> col = dcol.get(d);
                assert (col != null);
                col.set(row, cur.data(i, d));
            }
        }
    }
    for (int i = 0; i < first.dataLength(); i++) {
        for (int d = 0; d < first.metaLength(); d++) {
            if (first.data(i, d) == null) {
                StringBuilder buf = new StringBuilder();
                for (int d2 = 0; d2 < first.metaLength(); d2++) {
                    if (buf.length() > 0) {
                        buf.append(", ");
                    }
                    if (first.data(i, d2) == null) {
                        buf.append("null");
                    } else {
                        buf.append(first.data(i, d2));
                    }
                }
                LOG.warning("null value in joined data, row " + i + " column " + d + FormatUtil.NEWLINE + "[" + buf.toString() + "]");
                break;
            }
        }
    }
    return first;
}
Also used : LabelList(de.lmu.ifi.dbs.elki.data.LabelList) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)99 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)25 IOException (java.io.IOException)24 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)22 ArrayList (java.util.ArrayList)16 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)13 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)9 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)9 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)8 Model (de.lmu.ifi.dbs.elki.data.model.Model)8 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)8 Database (de.lmu.ifi.dbs.elki.database.Database)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)8 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)8 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)8 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)6 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)5 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)5