use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class ConvertToBundleApplication method run.
@Override
public void run() {
if (LOG.isVerbose()) {
LOG.verbose("Loading data.");
}
MultipleObjectsBundle bundle = input.loadData();
if (LOG.isVerbose()) {
LOG.verbose("Serializing to output file: " + outfile.toString());
}
// TODO: make configurable?
BundleWriter writer = new BundleWriter();
try {
FileOutputStream fos = new FileOutputStream(outfile);
FileChannel channel = fos.getChannel();
writer.writeBundleStream(bundle.asStream(), channel);
channel.close();
fos.close();
} catch (IOException e) {
LOG.exception("IO Error", e);
}
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class ArrayAdapterDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
MultipleObjectsBundle b = new MultipleObjectsBundle();
if (startid != null) {
b.setDBIDs(DBIDFactory.FACTORY.generateStaticDBIDRange(startid, data.length));
}
int mind = Integer.MAX_VALUE, maxd = 0;
List<DoubleVector> vecs = new ArrayList<>(data.length);
for (int i = 0; i < data.length; i++) {
final int d = data[i].length;
mind = d < mind ? d : mind;
maxd = d > maxd ? d : maxd;
vecs.add(DoubleVector.wrap(data[i]));
}
SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, mind, maxd, DoubleVector.FACTORY.getDefaultSerializer());
b.appendColumn(type, vecs);
if (labels != null) {
if (labels.length != data.length) {
throw new AbortException("Label and DBID columns must have the same size.");
}
b.appendColumn(TypeUtil.STRING, Arrays.asList(labels));
}
return invokeBundleFilters(b);
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class IntegerRankTieNormalization method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
final int len = objects.dataLength();
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
int[] order = new int[len];
for (int i = 0; i < len; i++) {
order[i] = i;
}
Sorter comparator = new Sorter();
for (int r = 0; r < objects.metaLength(); r++) {
final SimpleTypeInformation<?> type = objects.meta(r);
final List<?> column = objects.getColumn(r);
if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
@SuppressWarnings("unchecked") final List<? extends NumberVector> castColumn = (List<? extends NumberVector>) column;
// Get the replacement type information
final int dim = ((VectorFieldTypeInformation<?>) type).getDimensionality();
final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<>(IntegerVector.STATIC, dim);
// Output vectors
int[][] posvecs = new int[len][dim];
// Sort for each dimension
for (int d = 0; d < dim; d++) {
// Sort
comparator.setup(castColumn, d);
IntegerArrayQuickSort.sort(order, comparator);
// Transfer positions to output vectors
for (int sta = 0; sta < order.length; ) {
double v = castColumn.get(order[sta]).doubleValue(d);
// Compute ties
int end = sta + 1;
while (end < order.length && !(v < castColumn.get(order[end]).doubleValue(d))) {
end++;
}
final int pos = (sta + end - 1);
for (int i = sta; i < end; i++) {
posvecs[order[i]][d] = pos;
}
sta = end;
}
}
// Prepare output data
final List<IntegerVector> outColumn = new ArrayList<>(len);
for (int i = 0; i < len; i++) {
outColumn.add(new IntegerVector(posvecs[i]));
}
bundle.appendColumn(outType, outColumn);
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class ClassicMultidimensionalScalingTransform method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
final int size = objects.dataLength();
if (size == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for (int r = 0; r < objects.metaLength(); r++) {
@SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
@SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
if (!dist.getInputTypeRestriction().isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
// Get the replacement type information
@SuppressWarnings("unchecked") final List<I> castColumn = (List<I>) column;
bundle.appendColumn(new VectorFieldTypeInformation<>(factory, tdim), castColumn);
StepProgress prog = LOG.isVerbose() ? new StepProgress("Classic MDS", 2) : null;
// Compute distance matrix.
LOG.beginStep(prog, 1, "Computing distance matrix");
double[][] mat = computeSquaredDistanceMatrix(castColumn, dist);
doubleCenterSymmetric(mat);
// Find eigenvectors.
{
LOG.beginStep(prog, 2, "Computing singular value decomposition");
SingularValueDecomposition svd = new SingularValueDecomposition(mat);
double[][] u = svd.getU();
double[] lambda = svd.getSingularValues();
// Undo squared, unless we were given a squared distance function:
if (!dist.isSquared()) {
for (int i = 0; i < tdim; i++) {
lambda[i] = FastMath.sqrt(Math.abs(lambda[i]));
}
}
double[] buf = new double[tdim];
for (int i = 0; i < size; i++) {
double[] row = u[i];
for (int x = 0; x < buf.length; x++) {
buf[x] = lambda[x] * row[x];
}
column.set(i, factory.newNumberVector(buf));
}
}
LOG.setCompleted(prog);
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class SortByLabelFilter method filter.
@Override
public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
if (LOG.isDebugging()) {
LOG.debug("Sorting the data set");
}
// Prepare a reposition array for cheap resorting
final int size = objects.dataLength();
final int[] offsets = new int[size];
for (int i = 0; i < size; i++) {
offsets[i] = i;
}
// Sort by labels - identify a label column
final int lblcol = FilterUtil.findLabelColumn(objects);
if (lblcol == -1) {
throw new AbortException("No label column found - cannot sort by label.");
}
IntegerArrayQuickSort.sort(offsets, new IntegerComparator() {
@Override
public int compare(int o1, int o2) {
String l1 = objects.data(o1, lblcol).toString();
String l2 = objects.data(o2, lblcol).toString();
return l1.compareToIgnoreCase(l2);
}
});
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for (int j = 0; j < objects.metaLength(); j++) {
// Reorder column accordingly
List<?> in = objects.getColumn(j);
List<Object> data = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
data.add(in.get(offsets[i]));
}
bundle.appendColumn(objects.meta(j), data);
}
return bundle;
}
Aggregations