use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project elki by elki-project.
the class ArffParser method loadSparseInstance.
private Object[] loadSparseInstance(StreamTokenizer tokenizer, int[] targ, int[] dimsize, TypeInformation[] elkitypes, int metaLength) throws IOException {
Int2ObjectOpenHashMap<Object> map = new Int2ObjectOpenHashMap<>();
while (true) {
nextToken(tokenizer);
assert (tokenizer.ttype != StreamTokenizer.TT_EOF && tokenizer.ttype != StreamTokenizer.TT_EOL);
if (tokenizer.ttype == '}') {
nextToken(tokenizer);
assert (tokenizer.ttype == StreamTokenizer.TT_EOF || tokenizer.ttype == StreamTokenizer.TT_EOL);
break;
} else {
// sparse token
if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
throw new AbortException("Unexpected token type encountered: " + tokenizer.toString() + " type: " + tokenizer.ttype);
}
int dim = ParseUtil.parseIntBase10(tokenizer.sval);
if (map.containsKey(dim)) {
throw new AbortException("Duplicate key in sparse vector: " + tokenizer.toString());
}
nextToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
map.put(dim, //
TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[targ[dim]]) ? (Double) ParseUtil.parseDouble(tokenizer.sval) : tokenizer.sval);
} else {
throw new AbortException("Unexpected token type encountered: " + tokenizer.toString());
}
}
}
Object[] data = new Object[metaLength];
for (int out = 0; out < metaLength; out++) {
// Find the first index
int s = -1;
for (int i = 0; i < targ.length; i++) {
if (targ[i] == out && s < 0) {
s = i;
break;
}
}
assert (s >= 0);
if (TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[out])) {
Int2DoubleOpenHashMap f = new Int2DoubleOpenHashMap(dimsize[out]);
for (ObjectIterator<Int2ObjectMap.Entry<Object>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectMap.Entry<Object> entry = iter.next();
int i = entry.getIntKey();
if (i < s || i >= s + dimsize[out]) {
continue;
}
double v = ((Double) entry.getValue()).doubleValue();
f.put(i - s, v);
}
data[out] = new SparseDoubleVector(f, dimsize[out]);
} else if (TypeUtil.LABELLIST.equals(elkitypes[out])) {
// Build a label list out of successive labels
labels.clear();
for (ObjectIterator<Int2ObjectMap.Entry<Object>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectMap.Entry<Object> entry = iter.next();
int i = entry.getIntKey();
if (i < s) {
continue;
}
if (i >= s + dimsize[out]) {
break;
}
if (labels.size() < i - s) {
LOG.warning("Sparse consecutive labels are currently not correctly supported.");
}
labels.add((String) entry.getValue());
}
data[out] = LabelList.make(labels);
} else if (TypeUtil.EXTERNALID.equals(elkitypes[out])) {
String val = (String) map.get(s);
if (val == null) {
throw new AbortException("External ID column not set in sparse instance." + tokenizer.toString());
}
data[out] = new ExternalID(val);
} else if (TypeUtil.CLASSLABEL.equals(elkitypes[out])) {
Object val = map.get(s);
if (val == null) {
throw new AbortException("Class label column not set in sparse instance." + tokenizer.toString());
}
// TODO: support other class label types.
ClassLabel lbl = new SimpleClassLabel(String.valueOf(val));
data[out] = lbl;
} else {
throw new AbortException("Unsupported type for column " + "->" + out + ": " + ((elkitypes[out] != null) ? elkitypes[out].toString() : "null"));
}
}
return data;
}
use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project elki by elki-project.
the class MiniMaxAnderberg method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
DistanceQuery<O> dq = DatabaseUtil.precomputedDistanceQuery(db, relation, getDistanceFunction(), LOG);
final DBIDs ids = relation.getDBIDs();
final int size = ids.size();
// Initialize space for result:
PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>();
// Compute the initial (lower triangular) distance matrix.
MatrixParadigm mat = new MatrixParadigm(ids);
ArrayModifiableDBIDs prots = DBIDUtil.newArray(MatrixParadigm.triangleSize(size));
DBIDArrayMIter protiter = prots.iter();
MiniMax.initializeMatrices(mat, prots, dq);
// Arrays used for caching:
double[] bestd = new double[size];
int[] besti = new int[size];
initializeNNCache(mat.matrix, bestd, besti);
// Repeat until everything merged into 1 cluster
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
DBIDArrayIter ix = mat.ix;
for (int i = 1, end = size; i < size; i++) {
end = //
AGNES.shrinkActiveSet(//
ix, //
builder, //
end, findMerge(end, mat, protiter, builder, clusters, bestd, besti, dq));
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return (PointerPrototypeHierarchyRepresentationResult) builder.complete();
}
use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project elki by elki-project.
the class MiniMax method run.
/**
* Run the algorithm on a database.
*
* @param db Database
* @param relation Relation to process.
* @return Hierarchical result
*/
public PointerPrototypeHierarchyRepresentationResult run(Database db, Relation<O> relation) {
DistanceQuery<O> dq = DatabaseUtil.precomputedDistanceQuery(db, relation, getDistanceFunction(), LOG);
final DBIDs ids = relation.getDBIDs();
final int size = ids.size();
// Initialize space for result:
PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>(size);
// Allocate working space:
MatrixParadigm mat = new MatrixParadigm(ids);
ArrayModifiableDBIDs prots = DBIDUtil.newArray(MatrixParadigm.triangleSize(size));
initializeMatrices(mat, prots, dq);
DBIDArrayMIter protiter = prots.iter();
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("MiniMax clustering", size - 1, LOG) : null;
DBIDArrayIter ix = mat.ix;
for (int i = 1, end = size; i < size; i++) {
end = //
AGNES.shrinkActiveSet(//
ix, //
builder, //
end, findMerge(end, mat, protiter, builder, clusters, dq));
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
return (PointerPrototypeHierarchyRepresentationResult) builder.complete();
}
use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project elki by elki-project.
the class AffinityPropagationClusteringAlgorithm method run.
/**
* Perform affinity propagation clustering.
*
* @param db Database
* @param relation Relation
* @return Clustering result
*/
public Clustering<MedoidModel> run(Database db, Relation<O> relation) {
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
int[] assignment = new int[size];
double[][] s = initialization.getSimilarityMatrix(db, relation, ids);
double[][] r = new double[size][size];
double[][] a = new double[size][size];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("Affinity Propagation Iteration", LOG) : null;
MutableProgress aprog = LOG.isVerbose() ? new MutableProgress("Stable assignments", size + 1, LOG) : null;
int inactive = 0;
for (int iteration = 0; iteration < maxiter && inactive < convergence; iteration++) {
// Update responsibility matrix:
for (int i = 0; i < size; i++) {
double[] ai = a[i], ri = r[i], si = s[i];
// Find the two largest values (as initially maxk == i)
double max1 = Double.NEGATIVE_INFINITY, max2 = Double.NEGATIVE_INFINITY;
int maxk = -1;
for (int k = 0; k < size; k++) {
double val = ai[k] + si[k];
if (val > max1) {
max2 = max1;
max1 = val;
maxk = k;
} else if (val > max2) {
max2 = val;
}
}
// With the maximum value known, update r:
for (int k = 0; k < size; k++) {
double val = si[k] - ((k != maxk) ? max1 : max2);
ri[k] = ri[k] * lambda + val * (1. - lambda);
}
}
// Update availability matrix
for (int k = 0; k < size; k++) {
// Compute sum of max(0, r_ik) for all i.
// For r_kk, don't apply the max.
double colposum = 0.;
for (int i = 0; i < size; i++) {
if (i == k || r[i][k] > 0.) {
colposum += r[i][k];
}
}
for (int i = 0; i < size; i++) {
double val = colposum;
// Adjust column sum by the one extra term.
if (i == k || r[i][k] > 0.) {
val -= r[i][k];
}
if (i != k && val > 0.) {
// min
val = 0.;
}
a[i][k] = a[i][k] * lambda + val * (1 - lambda);
}
}
int changed = 0;
for (int i = 0; i < size; i++) {
double[] ai = a[i], ri = r[i];
double max = Double.NEGATIVE_INFINITY;
int maxj = -1;
for (int j = 0; j < size; j++) {
double v = ai[j] + ri[j];
if (v > max || (i == j && v >= max)) {
max = v;
maxj = j;
}
}
if (assignment[i] != maxj) {
changed += 1;
assignment[i] = maxj;
}
}
inactive = (changed > 0) ? 0 : (inactive + 1);
LOG.incrementProcessed(prog);
if (aprog != null) {
aprog.setProcessed(size - changed, LOG);
}
}
if (aprog != null) {
aprog.setProcessed(aprog.getTotal(), LOG);
}
LOG.setCompleted(prog);
// Cluster map, by lead object
Int2ObjectOpenHashMap<ModifiableDBIDs> map = new Int2ObjectOpenHashMap<>();
DBIDArrayIter i1 = ids.iter();
for (int i = 0; i1.valid(); i1.advance(), i++) {
int c = assignment[i];
// Add to cluster members:
ModifiableDBIDs cids = map.get(c);
if (cids == null) {
cids = DBIDUtil.newArray();
map.put(c, cids);
}
cids.add(i1);
}
// If we stopped early, the cluster lead might be in a different cluster.
for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
final int key = entry.getIntKey();
int targetkey = key;
ModifiableDBIDs tids = null;
// Chase arrows:
while (ids == null && assignment[targetkey] != targetkey) {
targetkey = assignment[targetkey];
tids = map.get(targetkey);
}
if (tids != null && targetkey != key) {
tids.addDBIDs(entry.getValue());
iter.remove();
}
}
Clustering<MedoidModel> clustering = new Clustering<>("Affinity Propagation Clustering", "ap-clustering");
ModifiableDBIDs noise = DBIDUtil.newArray();
for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
i1.seek(entry.getIntKey());
if (entry.getValue().size() > 1) {
MedoidModel mod = new MedoidModel(DBIDUtil.deref(i1));
clustering.addToplevelCluster(new Cluster<>(entry.getValue(), mod));
} else {
noise.add(i1);
}
}
if (noise.size() > 0) {
MedoidModel mod = new MedoidModel(DBIDUtil.deref(noise.iter()));
clustering.addToplevelCluster(new Cluster<>(noise, true, mod));
}
return clustering;
}
use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project elki by elki-project.
the class ClusteringVectorParser method nextEvent.
@Override
public Event nextEvent() {
if (nextevent != null) {
Event ret = nextevent;
nextevent = null;
return ret;
}
try {
while (reader.nextLineExceptComments()) {
buf1.clear();
lbl.clear();
Int2IntOpenHashMap csize = new Int2IntOpenHashMap();
String name = null;
for (; /* initialized by nextLineExceptComments() */
tokenizer.valid(); tokenizer.advance()) {
try {
int cnum = tokenizer.getIntBase10();
buf1.add(cnum);
// Update cluster sizes:
csize.addTo(cnum, 1);
} catch (NumberFormatException e) {
final String label = tokenizer.getSubstring();
lbl.add(label);
if (name == null) {
name = label;
}
}
}
if (name == null) {
name = "Cluster";
}
// Update meta on first record:
boolean metaupdate = (range == null);
if (range == null) {
range = DBIDUtil.generateStaticDBIDRange(buf1.size());
}
if (buf1.size() != range.size()) {
throw new AbortException("Clusterings do not contain the same number of elements!");
}
// Build clustering to store in the relation.
Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>(csize.size());
curclu = new Clustering<>(name, name);
for (ObjectIterator<Int2IntMap.Entry> iter = csize.int2IntEntrySet().fastIterator(); iter.hasNext(); ) {
Int2IntMap.Entry entry = iter.next();
if (entry.getIntValue() > 0) {
clusters.put(entry.getIntKey(), DBIDUtil.newArray(entry.getIntValue()));
}
}
DBIDArrayIter iter = range.iter();
for (int i = 0; i < buf1.size(); i++) {
clusters.get(buf1.getInt(i)).add(iter.seek(i));
}
for (ModifiableDBIDs cids : clusters.values()) {
curclu.addToplevelCluster(new Cluster<Model>(cids, ClusterModel.CLUSTER));
}
// Label handling.
if (!haslbl && !lbl.isEmpty()) {
haslbl = true;
metaupdate = true;
}
curlbl = LabelList.make(lbl);
if (metaupdate) {
// Force a meta update.
nextevent = Event.NEXT_OBJECT;
return Event.META_CHANGED;
}
return Event.NEXT_OBJECT;
}
return Event.END_OF_STREAM;
} catch (IOException e) {
throw new IllegalArgumentException("Error while parsing line " + reader.getLineNumber() + ".");
}
}
Aggregations