use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.
the class ArffParser method setupBundleHeaders.
/**
* Setup the headers for the object bundle.
*
* @param names Attribute names
* @param targ Target columns
* @param etyp ELKI type information
* @param dimsize Number of dimensions in the individual types
* @param bundle Output bundle
* @param sparse Flag to create sparse vectors
*/
private void setupBundleHeaders(ArrayList<String> names, int[] targ, TypeInformation[] etyp, int[] dimsize, MultipleObjectsBundle bundle, boolean sparse) {
for (int in = 0, out = 0; in < targ.length; out++) {
int nin = in + 1;
for (; nin < targ.length; nin++) {
if (targ[nin] != targ[in]) {
break;
}
}
if (TypeUtil.NUMBER_VECTOR_FIELD.equals(etyp[out])) {
String[] labels = new String[dimsize[out]];
// Collect labels:
for (int i = 0; i < dimsize[out]; i++) {
labels[i] = names.get(out + i);
}
if (!sparse) {
VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dimsize[out], labels);
bundle.appendColumn(type, new ArrayList<DoubleVector>());
} else {
VectorFieldTypeInformation<SparseDoubleVector> type = new VectorFieldTypeInformation<>(SparseDoubleVector.FACTORY, dimsize[out], labels);
bundle.appendColumn(type, new ArrayList<SparseDoubleVector>());
}
} else if (TypeUtil.LABELLIST.equals(etyp[out])) {
StringBuilder label = new StringBuilder(names.get(out));
for (int i = 1; i < dimsize[out]; i++) {
label.append(' ').append(names.get(out + i));
}
bundle.appendColumn(new SimpleTypeInformation<>(LabelList.class, label.toString()), new ArrayList<LabelList>());
} else if (TypeUtil.EXTERNALID.equals(etyp[out])) {
bundle.appendColumn(new SimpleTypeInformation<>(ExternalID.class, names.get(out)), new ArrayList<ExternalID>());
} else if (TypeUtil.CLASSLABEL.equals(etyp[out])) {
bundle.appendColumn(new SimpleTypeInformation<>(ClassLabel.class, names.get(out)), new ArrayList<ClassLabel>());
} else {
throw new AbortException("Unsupported type for column " + in + "->" + out + ": " + ((etyp[out] != null) ? etyp[out].toString() : "null"));
}
assert (out == bundle.metaLength() - 1);
in = nin;
}
}
use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.
the class CASH method buildDB.
/**
* Builds a dim-1 dimensional database where the objects are projected into
* the specified subspace.
*
* @param dim the dimensionality of the database
* @param basis the basis defining the subspace
* @param ids the ids for the new database
* @param relation the database storing the parameterization functions
* @return a dim-1 dimensional database where the objects are projected into
* the specified subspace
*/
private MaterializedRelation<ParameterizationFunction> buildDB(int dim, double[][] basis, DBIDs ids, Relation<ParameterizationFunction> relation) {
ProxyDatabase proxy = new ProxyDatabase(ids);
SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<>(ParameterizationFunction.class);
WritableDataStore<ParameterizationFunction> prep = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT, ParameterizationFunction.class);
// Project
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
prep.put(iter, project(basis, relation.get(iter)));
}
if (LOG.isDebugging()) {
LOG.debugFine("db fuer dim " + (dim - 1) + ": " + ids.size());
}
MaterializedRelation<ParameterizationFunction> prel = new MaterializedRelation<>(type, ids, null, prep);
proxy.addRelation(prel);
return prel;
}
use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.
the class AttributeWiseBetaNormalization method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
for (int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = (SimpleTypeInformation<?>) objects.meta(r);
final List<?> column = (List<?>) objects.getColumn(r);
if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
continue;
}
@SuppressWarnings("unchecked") final List<V> castColumn = (List<V>) column;
// Get the replacement type information
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
factory = FilterUtil.guessFactory(castType);
// Scan to find the best
final int dim = castType.getDimensionality();
dists = new ArrayList<>(dim);
// Scratch space for testing:
double[] test = new double[castColumn.size()];
// We iterate over dimensions, this kind of filter needs fast random
// access.
Adapter adapter = new Adapter();
for (int d = 0; d < dim; d++) {
adapter.dim = d;
Distribution dist = findBestFit(castColumn, adapter, d, test);
if (LOG.isVerbose()) {
LOG.verbose("Best fit for dimension " + d + ": " + dist.toString());
}
dists.add(dist);
}
// Beta distribution for projection
double p = FastMath.pow(alpha, -1 / FastMath.sqrt(dim));
BetaDistribution beta = new BetaDistribution(p, p);
// Normalization scan
double[] buf = new double[dim];
for (int i = 0; i < objects.dataLength(); i++) {
final V obj = castColumn.get(i);
for (int d = 0; d < dim; d++) {
// TODO: when available, use logspace for better numerical precision!
buf[d] = beta.quantile(dists.get(d).cdf(obj.doubleValue(d)));
}
castColumn.set(i, factory.newNumberVector(buf));
}
}
return objects;
}
use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.
the class ClassicMultidimensionalScalingTransform method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
final int size = objects.dataLength();
if (size == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for (int r = 0; r < objects.metaLength(); r++) {
@SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
@SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
if (!dist.getInputTypeRestriction().isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
// Get the replacement type information
@SuppressWarnings("unchecked") final List<I> castColumn = (List<I>) column;
bundle.appendColumn(new VectorFieldTypeInformation<>(factory, tdim), castColumn);
StepProgress prog = LOG.isVerbose() ? new StepProgress("Classic MDS", 2) : null;
// Compute distance matrix.
LOG.beginStep(prog, 1, "Computing distance matrix");
double[][] mat = computeSquaredDistanceMatrix(castColumn, dist);
doubleCenterSymmetric(mat);
// Find eigenvectors.
{
LOG.beginStep(prog, 2, "Computing singular value decomposition");
SingularValueDecomposition svd = new SingularValueDecomposition(mat);
double[][] u = svd.getU();
double[] lambda = svd.getSingularValues();
// Undo squared, unless we were given a squared distance function:
if (!dist.isSquared()) {
for (int i = 0; i < tdim; i++) {
lambda[i] = FastMath.sqrt(Math.abs(lambda[i]));
}
}
double[] buf = new double[tdim];
for (int i = 0; i < size; i++) {
double[] row = u[i];
for (int x = 0; x < buf.length; x++) {
buf[x] = lambda[x] * row[x];
}
column.set(i, factory.newNumberVector(buf));
}
}
LOG.setCompleted(prog);
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.
the class EvaluatePrecomputedOutlierScores method run.
@Override
public void run() {
try (//
FileInputStream fis = new FileInputStream(infile);
//
InputStream is = new BufferedInputStream(FileUtil.tryGzipInput(fis));
FileOutputStream fosResult = new FileOutputStream(outfile, true);
PrintStream fout = new PrintStream(fosResult);
FileChannel chan = fosResult.getChannel()) {
// Setup the input stream.
parser.initStream(is);
// Lock the output file:
chan.lock();
if (chan.position() == 0L) {
writeHeader(fout);
} else {
LOG.info("Appending to existing output " + outfile);
}
int lcol = -1, dcol = -1;
loop: while (true) {
BundleStreamSource.Event ev = parser.nextEvent();
switch(ev) {
case END_OF_STREAM:
break loop;
case META_CHANGED:
BundleMeta meta = parser.getMeta();
lcol = -1;
dcol = -1;
for (int i = 0; i < meta.size(); i++) {
SimpleTypeInformation<?> m = meta.get(i);
if (TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH.isAssignableFromType(m)) {
if (dcol >= 0) {
throw new AbortException("More than one vector column.");
}
dcol = i;
} else if (TypeUtil.GUESSED_LABEL.isAssignableFromType(m)) {
if (lcol >= 0) {
throw new AbortException("More than one label column.");
}
lcol = i;
} else {
throw new AbortException("Unexpected data column type: " + m);
}
}
break;
case NEXT_OBJECT:
if (lcol < 0) {
throw new AbortException("No label column available.");
}
if (dcol < 0) {
throw new AbortException("No vector column available.");
}
processRow(fout, (NumberVector) parser.data(dcol), parser.data(lcol).toString());
break;
}
}
} catch (IOException e) {
throw new AbortException("IO error.", e);
}
}
Aggregations