use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class BundleDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
try {
FileInputStream fis = new FileInputStream(infile);
FileChannel channel = fis.getChannel();
MultipleObjectsBundle bundle = invokeStreamFilters(new BundleReader(channel)).asMultipleObjectsBundle();
channel.close();
fis.close();
return bundle;
} catch (IOException e) {
throw new AbortException("IO error loading bundle", e);
}
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class ExternalIDJoinDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
for (DatabaseConnection dbc : sources) {
bundles.add(dbc.loadData());
}
MultipleObjectsBundle first = bundles.get(0);
Object2IntOpenHashMap<ExternalID> labelmap = new Object2IntOpenHashMap<>(first.dataLength());
labelmap.defaultReturnValue(-1);
// Process first bundle
{
// Identify a label column
final int lblcol;
{
int lblc = -1;
for (int i = 0; i < first.metaLength(); i++) {
if (TypeUtil.EXTERNALID.isAssignableFromType(first.meta(i))) {
lblc = i;
break;
}
}
// make static
lblcol = lblc;
}
if (lblcol == -1) {
throw new AbortException("No external ID column found in primary source.");
}
for (int i = 0; i < first.dataLength(); i++) {
ExternalID data = (ExternalID) first.data(i, lblcol);
if (data == null) {
LOG.debug("Object without ID encountered.");
continue;
}
int old = labelmap.put(data, i);
if (old != -1) {
LOG.debug("Duplicate id encountered: " + data + " in rows " + old + " and " + i);
}
}
}
// Process additional columns
for (int c = 1; c < sources.size(); c++) {
MultipleObjectsBundle cur = bundles.get(c);
final int lblcol;
{
int lblc = -1;
for (int i = 0; i < cur.metaLength(); i++) {
if (TypeUtil.EXTERNALID.isAssignableFromType(cur.meta(i))) {
lblc = i;
break;
}
}
// make static
lblcol = lblc;
}
if (lblcol == -1) {
StringBuilder buf = new StringBuilder();
for (int i = 0; i < cur.metaLength(); i++) {
if (buf.length() > 0) {
buf.append(',');
}
buf.append(cur.meta(i));
}
throw new AbortException("No external ID column found in source " + (c + 1) + " to join with. Got: " + buf.toString());
}
// Destination columns
List<ArrayList<Object>> dcol = new ArrayList<>(cur.metaLength());
for (int i = 0; i < cur.metaLength(); i++) {
// Skip the label columns
if (i == lblcol) {
dcol.add(null);
continue;
}
ArrayList<Object> newcol = new ArrayList<>(first.dataLength());
// Pre-fill with nulls.
for (int j = 0; j < first.dataLength(); j++) {
newcol.add(null);
}
first.appendColumn(cur.meta(i), newcol);
dcol.add(newcol);
}
for (int i = 0; i < cur.dataLength(); i++) {
ExternalID data = (ExternalID) cur.data(i, lblcol);
if (data == null) {
LOG.warning("Object without label encountered.");
continue;
}
int row = labelmap.getInt(data);
if (row == -1) {
LOG.debug("ID not found for join: " + data + " in row " + i);
continue;
}
for (int d = 0; d < cur.metaLength(); d++) {
if (d == lblcol) {
continue;
}
List<Object> col = dcol.get(d);
assert (col != null);
col.set(row, cur.data(i, d));
}
}
}
for (int i = 0; i < first.dataLength(); i++) {
for (int d = 0; d < first.metaLength(); d++) {
if (first.data(i, d) == null) {
StringBuilder buf = new StringBuilder();
for (int d2 = 0; d2 < first.metaLength(); d2++) {
if (buf.length() > 0) {
buf.append(", ");
}
if (first.data(i, d2) == null) {
buf.append("null");
} else {
buf.append(first.data(i, d2));
}
}
LOG.warning("null value in joined data, row " + i + " column " + d + FormatUtil.NEWLINE + "[" + buf.toString() + "]");
break;
}
}
}
return first;
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class ArrayAdapterDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
MultipleObjectsBundle b = new MultipleObjectsBundle();
if (startid != null) {
b.setDBIDs(DBIDFactory.FACTORY.generateStaticDBIDRange(startid, data.length));
}
int mind = Integer.MAX_VALUE, maxd = 0;
List<DoubleVector> vecs = new ArrayList<>(data.length);
for (int i = 0; i < data.length; i++) {
final int d = data[i].length;
mind = d < mind ? d : mind;
maxd = d > maxd ? d : maxd;
vecs.add(DoubleVector.wrap(data[i]));
}
SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, mind, maxd, DoubleVector.FACTORY.getDefaultSerializer());
b.appendColumn(type, vecs);
if (labels != null) {
if (labels.length != data.length) {
throw new AbortException("Label and DBID columns must have the same size.");
}
b.appendColumn(TypeUtil.STRING, Arrays.asList(labels));
}
return invokeBundleFilters(b);
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class SortByLabelFilter method filter.
@Override
public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
if (LOG.isDebugging()) {
LOG.debug("Sorting the data set");
}
// Prepare a reposition array for cheap resorting
final int size = objects.dataLength();
final int[] offsets = new int[size];
for (int i = 0; i < size; i++) {
offsets[i] = i;
}
// Sort by labels - identify a label column
final int lblcol = FilterUtil.findLabelColumn(objects);
if (lblcol == -1) {
throw new AbortException("No label column found - cannot sort by label.");
}
IntegerArrayQuickSort.sort(offsets, new IntegerComparator() {
@Override
public int compare(int o1, int o2) {
String l1 = objects.data(o1, lblcol).toString();
String l2 = objects.data(o2, lblcol).toString();
return l1.compareToIgnoreCase(l2);
}
});
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for (int j = 0; j < objects.metaLength(); j++) {
// Reorder column accordingly
List<?> in = objects.getColumn(j);
List<Object> data = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
data.add(in.get(offsets[i]));
}
bundle.appendColumn(objects.meta(j), data);
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class GlobalPrincipalComponentAnalysisTransform method prepareStart.
@Override
protected boolean prepareStart(SimpleTypeInformation<O> in) {
if (!(in instanceof VectorFieldTypeInformation)) {
throw new AbortException("PCA can only applied to fixed dimensionality vectors");
}
dim = ((VectorFieldTypeInformation<?>) in).getDimensionality();
covmat = new CovarianceMatrix(dim);
return true;
}
Aggregations