use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class StratifiedCrossValidation method nextPartitioning.
@Override
public TrainingAndTestSet nextPartitioning() {
if (fold >= nfold) {
return null;
}
final int tesize = sizes[fold], trsize = bundle.dataLength() - tesize;
MultipleObjectsBundle training = new MultipleObjectsBundle();
MultipleObjectsBundle test = new MultipleObjectsBundle();
// Process column-wise.
for (int c = 0, cs = bundle.metaLength(); c < cs; ++c) {
ArrayList<Object> tr = new ArrayList<>(trsize), te = new ArrayList<>(tesize);
for (int i = 0; i < bundle.dataLength(); ++i) {
((assignment[i] != fold) ? tr : te).add(bundle.data(i, c));
}
training.appendColumn(bundle.meta(c), tr);
test.appendColumn(bundle.meta(c), te);
}
++fold;
return new TrainingAndTestSet(training, test, labels);
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class GeneratorMain method generate.
/**
* Main loop to generate data set.
*
* @return Generated data set
*/
public MultipleObjectsBundle generate() {
// we actually need some clusters.
if (generators.isEmpty()) {
throw new AbortException("No clusters specified.");
}
// Assert that cluster dimensions agree.
final int dim = generators.get(0).getDim();
for (GeneratorInterface c : generators) {
if (c.getDim() != dim) {
throw new AbortException("Cluster dimensions do not agree.");
}
}
// Prepare result bundle
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
bundle.appendColumn(type, new ArrayList<>());
bundle.appendColumn(TypeUtil.CLASSLABEL, new ArrayList<>());
bundle.appendColumn(Model.TYPE, new ArrayList<Model>());
// generate clusters
ClassLabel[] labels = new ClassLabel[generators.size()];
Model[] models = new Model[generators.size()];
initLabelsAndModels(generators, labels, models, relabelClusters);
final AssignPoint assignment;
if (!testAgainstModel) {
assignment = new AssignPoint();
} else if (relabelClusters == null) {
assignment = new TestModel();
} else if (!relabelDistance) {
assignment = new AssignLabelsByDensity(labels);
} else {
assignment = new AssignLabelsByDistance(labels);
}
for (int i = 0; i < labels.length; i++) {
final GeneratorInterface curclus = generators.get(i);
assignment.newCluster(i, curclus);
// Only dynamic generators allow rejection / model testing:
GeneratorInterfaceDynamic cursclus = (curclus instanceof GeneratorInterfaceDynamic) ? (GeneratorInterfaceDynamic) curclus : null;
int kept = 0;
while (kept < curclus.getSize()) {
// generate the "missing" number of points
List<double[]> newp = curclus.generate(curclus.getSize() - kept);
for (double[] p : newp) {
int bestc = assignment.getAssignment(i, p);
if (bestc < 0) {
cursclus.incrementDiscarded();
continue;
}
bundle.appendSimple(DoubleVector.wrap(p), labels[bestc], models[bestc]);
++kept;
}
}
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class AbstractDatabaseConnection method invokeStreamFilters.
/**
* Transforms the specified list of objects and their labels into a list of
* objects and their associations.
*
* @param stream the objects to process
* @return processed objects
*/
protected BundleStreamSource invokeStreamFilters(BundleStreamSource stream) {
if (filters == null) {
return stream;
}
// We dynamically switch between streaming and bundle operations.
MultipleObjectsBundle bundle = null;
for (ObjectFilter filter : filters) {
if (filter instanceof StreamFilter) {
stream = ((StreamFilter) filter).init((stream != null) ? stream : bundle.asStream());
bundle = null;
} else {
bundle = filter.filter((bundle != null) ? bundle : stream.asMultipleObjectsBundle());
stream = null;
}
}
return (stream != null) ? stream : bundle.asStream();
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class BundleDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
try {
FileInputStream fis = new FileInputStream(infile);
FileChannel channel = fis.getChannel();
MultipleObjectsBundle bundle = invokeStreamFilters(new BundleReader(channel)).asMultipleObjectsBundle();
channel.close();
fis.close();
return bundle;
} catch (IOException e) {
throw new AbortException("IO error loading bundle", e);
}
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class ExternalIDJoinDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
for (DatabaseConnection dbc : sources) {
bundles.add(dbc.loadData());
}
MultipleObjectsBundle first = bundles.get(0);
Object2IntOpenHashMap<ExternalID> labelmap = new Object2IntOpenHashMap<>(first.dataLength());
labelmap.defaultReturnValue(-1);
// Process first bundle
{
// Identify a label column
final int lblcol;
{
int lblc = -1;
for (int i = 0; i < first.metaLength(); i++) {
if (TypeUtil.EXTERNALID.isAssignableFromType(first.meta(i))) {
lblc = i;
break;
}
}
// make static
lblcol = lblc;
}
if (lblcol == -1) {
throw new AbortException("No external ID column found in primary source.");
}
for (int i = 0; i < first.dataLength(); i++) {
ExternalID data = (ExternalID) first.data(i, lblcol);
if (data == null) {
LOG.debug("Object without ID encountered.");
continue;
}
int old = labelmap.put(data, i);
if (old != -1) {
LOG.debug("Duplicate id encountered: " + data + " in rows " + old + " and " + i);
}
}
}
// Process additional columns
for (int c = 1; c < sources.size(); c++) {
MultipleObjectsBundle cur = bundles.get(c);
final int lblcol;
{
int lblc = -1;
for (int i = 0; i < cur.metaLength(); i++) {
if (TypeUtil.EXTERNALID.isAssignableFromType(cur.meta(i))) {
lblc = i;
break;
}
}
// make static
lblcol = lblc;
}
if (lblcol == -1) {
StringBuilder buf = new StringBuilder();
for (int i = 0; i < cur.metaLength(); i++) {
if (buf.length() > 0) {
buf.append(',');
}
buf.append(cur.meta(i));
}
throw new AbortException("No external ID column found in source " + (c + 1) + " to join with. Got: " + buf.toString());
}
// Destination columns
List<ArrayList<Object>> dcol = new ArrayList<>(cur.metaLength());
for (int i = 0; i < cur.metaLength(); i++) {
// Skip the label columns
if (i == lblcol) {
dcol.add(null);
continue;
}
ArrayList<Object> newcol = new ArrayList<>(first.dataLength());
// Pre-fill with nulls.
for (int j = 0; j < first.dataLength(); j++) {
newcol.add(null);
}
first.appendColumn(cur.meta(i), newcol);
dcol.add(newcol);
}
for (int i = 0; i < cur.dataLength(); i++) {
ExternalID data = (ExternalID) cur.data(i, lblcol);
if (data == null) {
LOG.warning("Object without label encountered.");
continue;
}
int row = labelmap.getInt(data);
if (row == -1) {
LOG.debug("ID not found for join: " + data + " in row " + i);
continue;
}
for (int d = 0; d < cur.metaLength(); d++) {
if (d == lblcol) {
continue;
}
List<Object> col = dcol.get(d);
assert (col != null);
col.set(row, cur.data(i, d));
}
}
}
for (int i = 0; i < first.dataLength(); i++) {
for (int d = 0; d < first.metaLength(); d++) {
if (first.data(i, d) == null) {
StringBuilder buf = new StringBuilder();
for (int d2 = 0; d2 < first.metaLength(); d2++) {
if (buf.length() > 0) {
buf.append(", ");
}
if (first.data(i, d2) == null) {
buf.append("null");
} else {
buf.append(first.data(i, d2));
}
}
LOG.warning("null value in joined data, row " + i + " column " + d + FormatUtil.NEWLINE + "[" + buf.toString() + "]");
break;
}
}
}
return first;
}
Aggregations