use of cern.colt.list.IntArrayList in project tdq-studio-se by Talend.
the class AbstractIntDoubleMap method keys.
/**
* Returns a list filled with all keys contained in the receiver.
* The returned list has a size that equals <tt>this.size()</tt>.
* Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
* <p>
* This method can be used to iterate over the keys of the receiver.
*
* @return the keys.
*/
public IntArrayList keys() {
IntArrayList list = new IntArrayList(size());
keys(list);
return list;
}
use of cern.colt.list.IntArrayList in project tdq-studio-se by Talend.
the class AbstractIntObjectMap method keys.
/**
* Returns a list filled with all keys contained in the receiver.
* The returned list has a size that equals <tt>this.size()</tt>.
* Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
* <p>
* This method can be used to iterate over the keys of the receiver.
*
* @return the keys.
*/
public IntArrayList keys() {
IntArrayList list = new IntArrayList(size());
keys(list);
return list;
}
use of cern.colt.list.IntArrayList in project Gemma by PavlidisLab.
the class RowMissingValueFilter method filter.
@Override
public ExpressionDataDoubleMatrix filter(ExpressionDataDoubleMatrix data) {
int numRows = data.rows();
int numCols = data.columns();
IntArrayList present = new IntArrayList(numRows);
List<CompositeSequence> kept = new ArrayList<>();
/*
* Do not allow minPresentFraction to override minPresent if minPresent is higher.
*/
if (minPresentFractionIsSet) {
int proposedMinimumNumberOfSamples = (int) Math.ceil(minPresentFraction * numCols);
if (!minPresentIsSet) {
this.setMinPresentCount(proposedMinimumNumberOfSamples);
} else if (proposedMinimumNumberOfSamples > minPresentCount) {
RowMissingValueFilter.log.info("The minimum number of samples is already set to " + this.minPresentCount + " but computed missing threshold from fraction of " + minPresentFraction + " is higher (" + proposedMinimumNumberOfSamples + ")");
this.setMinPresentCount(proposedMinimumNumberOfSamples);
} else {
RowMissingValueFilter.log.info("The minimum number of samples is already set to " + this.minPresentCount + " and computed missing threshold from fraction of " + minPresentFraction + " is lower (" + proposedMinimumNumberOfSamples + "), keeping higher value.");
}
}
if (minPresentCount > numCols) {
throw new IllegalStateException("Minimum present count is set to " + minPresentCount + " but there are only " + numCols + " columns in the matrix.");
}
if (!minPresentIsSet) {
RowMissingValueFilter.log.info("No filtering was requested");
return data;
}
/* first pass - determine how many missing values there are per row */
for (int i = 0; i < numRows; i++) {
CompositeSequence designElementForRow = data.getDesignElementForRow(i);
/* allow for the possibility that the absent/present matrix is not in the same order, etc. */
int absentPresentRow = absentPresentCalls == null ? -1 : absentPresentCalls.getRowIndex(designElementForRow);
int presentCount = 0;
for (int j = 0; j < numCols; j++) {
boolean callIsPresent = true;
if (absentPresentRow >= 0) {
callIsPresent = absentPresentCalls.get(absentPresentRow, j);
}
if (!Double.isNaN(data.get(i, j)) && callIsPresent) {
presentCount++;
}
}
present.add(presentCount);
if (presentCount >= RowMissingValueFilter.ABSOLUTE_MIN_PRESENT && presentCount >= minPresentCount) {
kept.add(designElementForRow);
}
}
/* decide whether we need to invoke the 'too many removed' clause, to avoid removing too many rows. */
if (maxFractionRemoved != 0.0 && kept.size() < numRows * (1.0 - maxFractionRemoved)) {
IntArrayList sortedPresent = present.copy();
sortedPresent.sort();
sortedPresent.reverse();
RowMissingValueFilter.log.info("There are " + kept.size() + " rows that meet criterion of at least " + minPresentCount + " non-missing values, but that's too many given the max fraction of " + maxFractionRemoved + "; minPresent adjusted to " + sortedPresent.get((int) (numRows * (maxFractionRemoved))));
minPresentCount = sortedPresent.get((int) (numRows * (maxFractionRemoved)));
// Do another pass to add rows we missed before.
for (int i = 0; i < numRows; i++) {
if (present.get(i) >= minPresentCount && present.get(i) >= RowMissingValueFilter.ABSOLUTE_MIN_PRESENT) {
CompositeSequence designElementForRow = data.getDesignElementForRow(i);
if (kept.contains(designElementForRow))
continue;
kept.add(designElementForRow);
}
}
}
RowMissingValueFilter.log.info("Retaining " + kept.size() + " rows that meet criterion of at least " + minPresentCount + " non-missing values");
return new ExpressionDataDoubleMatrix(data, kept);
}
use of cern.colt.list.IntArrayList in project Gemma by PavlidisLab.
the class BatchConfound method factorBatchConfoundTest.
private static Collection<BatchConfoundValueObject> factorBatchConfoundTest(ExpressionExperiment ee, Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap) throws IllegalArgumentException {
Map<Long, Long> batchMembership = new HashMap<>();
ExperimentalFactor batchFactor = null;
Map<Long, Integer> batchIndexes = new HashMap<>();
for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
if (ExperimentalDesignUtils.isBatch(ef)) {
batchFactor = ef;
Map<Long, Double> bmToFv = bioMaterialFactorMap.get(batchFactor);
if (bmToFv == null) {
log.warn("No biomaterial --> factor value map for batch factor: " + batchFactor);
continue;
}
int index = 0;
for (FactorValue fv : batchFactor.getFactorValues()) {
batchIndexes.put(fv.getId(), index++);
}
for (Long bmId : bmToFv.keySet()) {
batchMembership.put(bmId, bmToFv.get(bmId).longValue());
}
break;
}
}
Set<BatchConfoundValueObject> result = new HashSet<>();
if (batchFactor == null) {
return result;
}
for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
if (ef.equals(batchFactor))
continue;
Map<Long, Double> bmToFv = bioMaterialFactorMap.get(ef);
int numBioMaterials = bmToFv.keySet().size();
assert numBioMaterials > 0 : "No biomaterials for " + ef;
double p = Double.NaN;
double chiSquare;
int df;
int numBatches = batchFactor.getFactorValues().size();
if (ExperimentalDesignUtils.isContinuous(ef)) {
DoubleArrayList factorValues = new DoubleArrayList(numBioMaterials);
factorValues.setSize(numBioMaterials);
IntArrayList batches = new IntArrayList(numBioMaterials);
batches.setSize(numBioMaterials);
int j = 0;
for (Long bmId : bmToFv.keySet()) {
assert factorValues.size() > 0 : "Biomaterial to factorValue is empty for " + ef;
factorValues.set(j, bmToFv.get(bmId));
long batch = batchMembership.get(bmId);
batches.set(j, batchIndexes.get(batch));
j++;
}
p = KruskalWallis.test(factorValues, batches);
df = KruskalWallis.dof(factorValues, batches);
chiSquare = KruskalWallis.kwStatistic(factorValues, batches);
log.debug("KWallis\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
} else {
Map<Long, Integer> factorValueIndexes = new HashMap<>();
int index = 0;
for (FactorValue fv : ef.getFactorValues()) {
factorValueIndexes.put(fv.getId(), index++);
}
Map<Long, Long> factorValueMembership = new HashMap<>();
for (Long bmId : bmToFv.keySet()) {
factorValueMembership.put(bmId, bmToFv.get(bmId).longValue());
}
long[][] counts = new long[numBatches][ef.getFactorValues().size()];
for (int i = 0; i < batchIndexes.size(); i++) {
for (int j = 0; j < factorValueIndexes.size(); j++) {
counts[i][j] = 0;
}
}
for (Long bm : bmToFv.keySet()) {
long fv = factorValueMembership.get(bm);
Long batch = batchMembership.get(bm);
if (batch == null) {
log.warn("No batch membership for : " + bm);
continue;
}
int batchIndex = batchIndexes.get(batch);
int factorIndex = factorValueIndexes.get(fv);
counts[batchIndex][factorIndex]++;
}
ChiSquareTest cst = new ChiSquareTest();
try {
chiSquare = cst.chiSquare(counts);
} catch (IllegalArgumentException e) {
log.warn("IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e.getMessage());
chiSquare = Double.NaN;
}
df = (counts.length - 1) * (counts[0].length - 1);
ChiSquaredDistribution distribution = new ChiSquaredDistribution(df);
if (!Double.isNaN(chiSquare)) {
p = 1.0 - distribution.cumulativeProbability(chiSquare);
}
log.debug("ChiSq\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
}
BatchConfoundValueObject summary = new BatchConfoundValueObject(ee, ef, chiSquare, df, p, numBatches);
result.add(summary);
}
return result;
}
use of cern.colt.list.IntArrayList in project Gemma by PavlidisLab.
the class GeneCoexpressionNodeDegreeValueObject method asIntArray.
private int[] asIntArray(TreeMap<Integer, Integer> nodeDegreesNeg) {
IntArrayList list = new IntArrayList();
if (nodeDegreesNeg.isEmpty())
return this.toPrimitive(list);
Integer maxSupport = nodeDegreesNeg.lastKey();
list.setSize(maxSupport + 1);
for (Integer s = 0; s <= maxSupport; s++) {
if (nodeDegreesNeg.containsKey(s)) {
list.set(s, nodeDegreesNeg.get(s));
} else {
list.set(s, 0);
}
}
return this.toPrimitive(list);
}
Aggregations