use of de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator in project elki by elki-project.
the class FPGrowth method buildIndex.
/**
* Build a forward map, item id (dimension) to frequency position
*
* @param counts Item counts
* @param positions Position index (output)
* @param minsupp Minimum support
* @return Forward index
*/
private int[] buildIndex(final int[] counts, int[] positions, int minsupp) {
// Count the number of frequent items:
int numfreq = 0;
for (int i = 0; i < counts.length; i++) {
if (counts[i] >= minsupp) {
++numfreq;
}
}
// Build the index table
int[] idx = new int[numfreq];
for (int i = 0, j = 0; i < counts.length; i++) {
if (counts[i] >= minsupp) {
idx[j++] = i;
}
}
IntegerArrayQuickSort.sort(idx, new IntegerComparator() {
@Override
public int compare(int x, int y) {
return Integer.compare(counts[y], counts[x]);
}
});
Arrays.fill(positions, -1);
for (int i = 0; i < idx.length; i++) {
positions[idx[i]] = i;
}
return idx;
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator in project elki by elki-project.
the class AbstractDependenceMeasure method sortedIndex.
/**
* Build a sorted index of objects.
*
* @param adapter Data adapter
* @param data Data array
* @param len Length of data
* @return Sorted index
*/
public static <A> int[] sortedIndex(final NumberArrayAdapter<?, A> adapter, final A data, int len) {
int[] s1 = MathUtil.sequence(0, len);
IntegerArrayQuickSort.sort(s1, new IntegerComparator() {
@Override
public int compare(int x, int y) {
return Double.compare(adapter.getDouble(data, x), adapter.getDouble(data, y));
}
});
return s1;
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator in project elki by elki-project.
the class SortByLabelFilter method filter.
@Override
public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
if (LOG.isDebugging()) {
LOG.debug("Sorting the data set");
}
// Prepare a reposition array for cheap resorting
final int size = objects.dataLength();
final int[] offsets = new int[size];
for (int i = 0; i < size; i++) {
offsets[i] = i;
}
// Sort by labels - identify a label column
final int lblcol = FilterUtil.findLabelColumn(objects);
if (lblcol == -1) {
throw new AbortException("No label column found - cannot sort by label.");
}
IntegerArrayQuickSort.sort(offsets, new IntegerComparator() {
@Override
public int compare(int o1, int o2) {
String l1 = objects.data(o1, lblcol).toString();
String l2 = objects.data(o2, lblcol).toString();
return l1.compareToIgnoreCase(l2);
}
});
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for (int j = 0; j < objects.metaLength(); j++) {
// Reorder column accordingly
List<?> in = objects.getColumn(j);
List<Object> data = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
data.add(in.get(offsets[i]));
}
bundle.appendColumn(objects.meta(j), data);
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator in project elki by elki-project.
the class HiCSDependenceMeasure method dependence.
@Override
public <A, B> double dependence(final NumberArrayAdapter<?, A> adapter1, final A data1, final NumberArrayAdapter<?, B> adapter2, final B data2) {
final int len = size(adapter1, data1, adapter2, data2);
final int windowsize = (int) (len * alphasqrt);
final Random random = rnd.getSingleThreadedRandom();
// Sorted copies for slicing.
int[] s1 = MathUtil.sequence(0, len), s2 = MathUtil.sequence(0, len);
IntegerArrayQuickSort.sort(s1, new IntegerComparator() {
@Override
public int compare(int x, int y) {
return Double.compare(adapter1.getDouble(data1, x), adapter1.getDouble(data1, y));
}
});
IntegerArrayQuickSort.sort(s2, new IntegerComparator() {
@Override
public int compare(int x, int y) {
return Double.compare(adapter2.getDouble(data2, x), adapter2.getDouble(data2, y));
}
});
// Distributions for testing
double[] fullValues = new double[len];
double[] sampleValues = new double[windowsize];
double deviationSum = 0.;
// For the first half, we use the first dimension as reference
for (int i = 0; i < len; i++) {
fullValues[i] = adapter1.getDouble(data1, i);
if (fullValues[i] != fullValues[i]) {
throw new AbortException("NaN values are not allowed by this implementation!");
}
}
// TODO: remove bias?
int half = m >> 1;
for (int i = 0; i < half; ++i) {
// Build the sample
for (int j = random.nextInt(len - windowsize), k = 0; k < windowsize; ++k, ++j) {
sampleValues[k] = adapter2.getDouble(data2, j);
}
double contrast = statTest.deviation(fullValues, sampleValues);
if (Double.isNaN(contrast)) {
// Retry.
--i;
continue;
}
deviationSum += contrast;
}
// For the second half, we use the second dimension as reference
for (int i = 0; i < len; i++) {
fullValues[i] = adapter2.getDouble(data2, i);
if (fullValues[i] != fullValues[i]) {
throw new AbortException("NaN values are not allowed by this implementation!");
}
}
for (int i = half; i < m; ++i) {
// Build the sample
for (int j = random.nextInt(len - windowsize), k = 0; k < windowsize; ++k, ++j) {
sampleValues[k] = adapter1.getDouble(data1, j);
}
double contrast = statTest.deviation(fullValues, sampleValues);
if (Double.isNaN(contrast)) {
// Retry.
--i;
continue;
}
deviationSum += contrast;
}
return deviationSum / m;
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator in project elki by elki-project.
the class MCEDependenceMeasure method buildPartitions.
/**
* Partitions an attribute.
*
* @param adapter1 Data adapter
* @param data1 Data set
* @param len Length of data
* @param depth Splitting depth
* @return List of sorted objects
*/
private <A> ArrayList<int[]> buildPartitions(NumberArrayAdapter<?, A> adapter1, A data1, int len, int depth) {
final int[] idx = new int[len];
final double[] tmp = new double[len];
for (int i = 0; i < len; ++i) {
idx[i] = i;
tmp[i] = adapter1.getDouble(data1, i);
}
// Sort indexes:
IntegerArrayQuickSort.sort(idx, new IntegerComparator() {
@Override
public int compare(int x, int y) {
return Double.compare(tmp[x], tmp[y]);
}
});
// Should yield the same ordering
Arrays.sort(tmp);
ArrayList<int[]> ret = new ArrayList<>(1 << depth);
divide(idx, tmp, ret, 0, tmp.length, depth);
return ret;
}
Aggregations