use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class GeneratorXMLDatabaseConnection method parseVector.
/**
* Parse a string into a vector.
*
* TODO: move this into utility package?
*
* @param s String to parse
* @return Vector
*/
private double[] parseVector(String s) {
String[] entries = WHITESPACE_PATTERN.split(s);
double[] d = new double[entries.length];
for (int i = 0; i < entries.length; i++) {
try {
d[i] = ParseUtil.parseDouble(entries[i]);
} catch (NumberFormatException e) {
throw new AbortException("Could not parse vector.");
}
}
return d;
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class GeneratorXMLDatabaseConnection method loadXMLSpecification.
/**
* Load the XML configuration file.
*
* @return Generator
*/
private GeneratorMain loadXMLSpecification() {
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
URL url = ClassLoader.getSystemResource(GENERATOR_SCHEMA_FILE);
if (url != null) {
try {
Schema schema = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(url);
dbf.setSchema(schema);
dbf.setIgnoringElementContentWhitespace(true);
} catch (Exception e) {
LOG.warning("Could not set up XML Schema validation for specification file.", e);
}
} else {
LOG.warning("Could not set up XML Schema validation for specification file.");
}
Document doc = dbf.newDocumentBuilder().parse(specfile);
Node root = doc.getDocumentElement();
if (TAG_DATASET.equals(root.getNodeName())) {
GeneratorMain gen = new GeneratorMain();
processElementDataset(gen, root);
return gen;
} else {
throw new AbortException("Experiment specification has incorrect document element: " + root.getNodeName());
}
} catch (FileNotFoundException e) {
throw new AbortException("Can't open specification file.", e);
} catch (SAXException e) {
throw new AbortException("Error parsing specification file.", e);
} catch (IOException e) {
throw new AbortException("IO Exception loading specification file.", e);
} catch (ParserConfigurationException e) {
throw new AbortException("Parser Configuration Error", e);
}
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class GeneratorXMLDatabaseConnection method processElementPoint.
/**
* Parse a 'point' element (point vector for a static cluster)
*
* @param points current list of points (to append to)
* @param cur Current document nod
*/
private void processElementPoint(List<double[]> points, Node cur) {
double[] point = null;
String vstr = ((Element) cur).getAttribute(ATTR_VECTOR);
if (vstr != null && vstr.length() > 0) {
point = parseVector(vstr);
}
if (point == null) {
throw new AbortException("No translation vector given.");
}
// *** add new point
points.add(point);
// TODO: check for unknown attributes.
XMLNodeIterator iter = new XMLNodeIterator(cur.getFirstChild());
while (iter.hasNext()) {
Node child = iter.next();
if (child.getNodeType() == Node.ELEMENT_NODE) {
LOG.warning("Unknown element in XML specification file: " + child.getNodeName());
}
}
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class ConcatenateFilesDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
MultipleObjectsBundle objects = new MultipleObjectsBundle();
objects.appendColumn(TypeUtil.STRING, new ArrayList<>());
for (File file : files) {
String filestr = file.getPath();
try (InputStream inputStream = //
FileUtil.tryGzipInput(new BufferedInputStream(new FileInputStream(file)))) {
final BundleStreamSource source;
if (parser instanceof StreamingParser) {
final StreamingParser streamParser = (StreamingParser) parser;
streamParser.initStream(inputStream);
source = streamParser;
} else {
MultipleObjectsBundle parsingResult = parser.parse(inputStream);
// normalize objects and transform labels
source = parsingResult.asStream();
}
// NullPointerException on invalid streams
BundleMeta meta = null;
loop: for (Event e = source.nextEvent(); ; e = source.nextEvent()) {
switch(e) {
case END_OF_STREAM:
break loop;
case META_CHANGED:
meta = source.getMeta();
for (int i = 0; i < meta.size(); i++) {
if (i + 1 >= objects.metaLength()) {
objects.appendColumn(meta.get(i), new ArrayList<>());
} else {
// Ensure compatibility:
if (!objects.meta(i + 1).isAssignableFromType(meta.get(i))) {
throw new AbortException("Incompatible files loaded. Cannot concatenate with unaligned columns, please preprocess manually.");
}
}
}
// switch
break;
case NEXT_OBJECT:
Object[] o = new Object[objects.metaLength()];
o[0] = filestr;
for (int i = 0; i < meta.size(); i++) {
o[i + 1] = source.data(i);
}
objects.appendSimple(o);
// switch
break;
}
}
} catch (IOException e) {
throw new AbortException("Loading file " + filestr + " failed: " + e.toString(), e);
}
}
parser.cleanup();
// Invoke filters
if (LOG.isDebugging()) {
LOG.debugFine("Invoking filters.");
}
return invokeBundleFilters(objects);
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class LabelJoinDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
for (DatabaseConnection dbc : sources) {
bundles.add(dbc.loadData());
}
MultipleObjectsBundle first = bundles.get(0);
Object2IntOpenHashMap<String> labelmap = new Object2IntOpenHashMap<>(first.dataLength());
labelmap.defaultReturnValue(-1);
// Process first bundle
{
// Identify a label column
final int lblcol = FilterUtil.findLabelColumn(first);
if (lblcol == -1) {
throw new AbortException("No label column found in first source, cannot join (do you want to use " + ExternalIDJoinDatabaseConnection.class.getSimpleName() + " instead?)");
}
for (int i = 0; i < first.dataLength(); i++) {
Object data = first.data(i, lblcol);
if (data == null) {
LOG.warning("Object without label encountered.");
continue;
}
if (data instanceof String) {
int old = labelmap.put((String) data, i);
if (old != -1) {
LOG.warning("Duplicate label encountered: " + data + " in rows " + old + " and " + i);
}
} else if (data instanceof LabelList) {
final LabelList ll = (LabelList) data;
for (int j = 0; j < ll.size(); j++) {
String lbl = ll.get(j);
int old = labelmap.put(lbl, i);
if (old != -1) {
LOG.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
}
}
} else {
String lbl = data.toString();
int old = labelmap.put(lbl, i);
if (old != -1) {
LOG.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
}
}
}
}
// Process additional columns
for (int c = 1; c < sources.size(); c++) {
MultipleObjectsBundle cur = bundles.get(c);
final int lblcol = FilterUtil.findLabelColumn(cur);
if (lblcol == -1) {
throw new AbortException("No label column found in source " + (c + 1) + ", cannot join (do you want to use " + ExternalIDJoinDatabaseConnection.class.getSimpleName() + " instead?)");
}
// Destination columns
List<ArrayList<Object>> dcol = new ArrayList<>(cur.metaLength());
for (int i = 0; i < cur.metaLength(); i++) {
// Skip the label columns
if (i == lblcol) {
dcol.add(null);
continue;
}
ArrayList<Object> newcol = new ArrayList<>(first.dataLength());
// Pre-fill with nulls.
for (int j = 0; j < first.dataLength(); j++) {
newcol.add(null);
}
first.appendColumn(cur.meta(i), newcol);
dcol.add(newcol);
}
for (int i = 0; i < cur.dataLength(); i++) {
Object data = cur.data(i, lblcol);
if (data == null) {
LOG.warning("Object without label encountered.");
continue;
}
int row = -1;
if (data instanceof String) {
row = labelmap.getInt(data);
} else if (data instanceof LabelList) {
final LabelList ll = (LabelList) data;
for (int j = 0; j < ll.size(); j++) {
row = labelmap.getInt(ll.get(j));
if (row >= 0) {
break;
}
}
} else {
row = labelmap.getInt(data.toString());
}
if (row < 0) {
LOG.warning("Label not found for join: " + data + " in row " + i);
continue;
}
for (int d = 0; d < cur.metaLength(); d++) {
if (d == lblcol) {
continue;
}
List<Object> col = dcol.get(d);
assert (col != null);
col.set(row, cur.data(i, d));
}
}
}
for (int i = 0; i < first.dataLength(); i++) {
for (int d = 0; d < first.metaLength(); d++) {
if (first.data(i, d) == null) {
StringBuilder buf = new StringBuilder();
for (int d2 = 0; d2 < first.metaLength(); d2++) {
if (buf.length() > 0) {
buf.append(", ");
}
if (first.data(i, d2) == null) {
buf.append("null");
} else {
buf.append(first.data(i, d2));
}
}
LOG.warning("null value in joined data, row " + i + " column " + d + FormatUtil.NEWLINE + "[" + buf.toString() + "]");
break;
}
}
}
return first;
}
Aggregations