Search in sources :

Example 46 with FactorValue

use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.

the class ExpressionDataMatrixColumnSort method organizeByFactorValues.

/**
 * Organized the results by the factor values (for one factor)
 *
 * @param fv2bms           master map
 * @param bioMaterialChunk biomaterials to organize
 * @param factorValues     factor value to consider - biomaterials will be organized in the order given
 * @param chunks           map of factor values to chunks goes here
 * @param organized        the results go here
 */
private static void organizeByFactorValues(Map<FactorValue, List<BioMaterial>> fv2bms, List<BioMaterial> bioMaterialChunk, List<FactorValue> factorValues, LinkedHashMap<FactorValue, List<BioMaterial>> chunks, List<BioMaterial> organized) {
    Collection<BioMaterial> seenBioMaterials = new HashSet<>();
    for (FactorValue fv : factorValues) {
        if (!fv2bms.containsKey(fv)) {
            /*
                 * This can happen if a factorvalue has been created but not yet associated with any biomaterials. This
                 * can also be cruft.
                 */
            continue;
        }
        // all in entire experiment, so we might not want them all as we may just be processing a small chunk.
        List<BioMaterial> bioMsForFv = fv2bms.get(fv);
        for (BioMaterial bioMaterial : bioMsForFv) {
            if (bioMaterialChunk.contains(bioMaterial)) {
                if (!chunks.containsKey(fv)) {
                    chunks.put(fv, new ArrayList<BioMaterial>());
                }
                if (!chunks.get(fv).contains(bioMaterial)) {
                    /*
                         * shouldn't be twice, but ya never know.
                         */
                    chunks.get(fv).add(bioMaterial);
                }
            }
            seenBioMaterials.add(bioMaterial);
        }
        // If we used that fv ...
        if (chunks.containsKey(fv)) {
            // now at least this is in order of this factor
            organized.addAll(chunks.get(fv));
        }
    }
    // Leftovers contains biomaterials which have no factorvalue assigned for this factor.
    Collection<BioMaterial> leftovers = new HashSet<>();
    for (BioMaterial bm : bioMaterialChunk) {
        if (!seenBioMaterials.contains(bm)) {
            leftovers.add(bm);
        }
    }
    if (leftovers.size() > 0) {
        organized.addAll(leftovers);
        chunks.put(null, new ArrayList<>(leftovers));
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue)

Example 47 with FactorValue

use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.

the class ExpressionDataMatrixColumnSort method getBaselineLevels.

/**
 * Identify the FactorValue that should be treated as 'Baseline' for each of the given factors. This is done
 * heuristically, and if all else fails we choose arbitrarily. For continuous factors, the minimum value is treated
 * as baseline.
 *
 * @param samplesUsed These are used to make sure we don't bother using factor values as baselines if they are not
 *                    used by any of the samples. This is important for subsets. If null, this is ignored.
 * @param factors     factors
 * @return map of factors to the baseline factorvalue for that factor.
 */
public static Map<ExperimentalFactor, FactorValue> getBaselineLevels(List<BioMaterial> samplesUsed, Collection<ExperimentalFactor> factors) {
    Map<ExperimentalFactor, FactorValue> result = new HashMap<>();
    for (ExperimentalFactor factor : factors) {
        if (factor.getFactorValues().isEmpty()) {
            throw new IllegalStateException("Factor has no factor values: " + factor);
        }
        if (ExperimentalDesignUtils.isContinuous(factor)) {
            // then there is no baseline, but we'll take the minimum value.
            TreeMap<Double, FactorValue> sortedVals = new TreeMap<>();
            for (FactorValue fv : factor.getFactorValues()) {
                /*
                     * Check that this factor value is used by at least one of the given samples. Only matters if this
                     * is a subset of the full data set.
                     */
                if (samplesUsed != null && !ExpressionDataMatrixColumnSort.used(fv, samplesUsed)) {
                    // this factorValue cannot be a candidate baseline for this subset.
                    continue;
                }
                if (fv.getMeasurement() == null) {
                    throw new IllegalStateException("Continuous factors should have Measurements as values");
                }
                Double v = Double.parseDouble(fv.getMeasurement().getValue());
                sortedVals.put(v, fv);
            }
            result.put(factor, sortedVals.firstEntry().getValue());
        } else {
            for (FactorValue fv : factor.getFactorValues()) {
                /*
                     * Check that this factor value is used by at least one of the given samples. Only matters if this
                     * is a subset of the full data set.
                     */
                if (samplesUsed != null && !ExpressionDataMatrixColumnSort.used(fv, samplesUsed)) {
                    // this factorValue cannot be a candidate baseline for this subset.
                    continue;
                }
                if (BaselineSelection.isForcedBaseline(fv)) {
                    ExpressionDataMatrixColumnSort.log.info("Baseline chosen: " + fv);
                    result.put(factor, fv);
                    break;
                }
                if (BaselineSelection.isBaselineCondition(fv)) {
                    if (result.containsKey(factor)) {
                        ExpressionDataMatrixColumnSort.log.warn("A second potential baseline was found for " + factor + ": " + fv);
                        continue;
                    }
                    ExpressionDataMatrixColumnSort.log.info("Baseline chosen: " + fv);
                    result.put(factor, fv);
                }
            }
            if (!result.containsKey(factor)) {
                // fallback
                FactorValue arbitraryBaselineFV = null;
                if (samplesUsed != null) {
                    // make sure we choose a fv that is actually used (see above for non-arbitrary case)
                    for (FactorValue fv : factor.getFactorValues()) {
                        for (BioMaterial bm : samplesUsed) {
                            for (FactorValue bfv : bm.getFactorValues()) {
                                if (fv.equals(bfv)) {
                                    arbitraryBaselineFV = fv;
                                    break;
                                }
                            }
                            if (arbitraryBaselineFV != null)
                                break;
                        }
                        if (arbitraryBaselineFV != null)
                            break;
                    }
                } else {
                    arbitraryBaselineFV = factor.getFactorValues().iterator().next();
                }
                if (arbitraryBaselineFV == null) {
                    throw new IllegalStateException("No baseline could be identified for factor:  " + factor + " has " + factor.getFactorValues().size() + " factor values");
                }
                ExpressionDataMatrixColumnSort.log.info("Falling back on choosing baseline arbitrarily: " + arbitraryBaselineFV);
                result.put(factor, arbitraryBaselineFV);
            }
        }
    }
    return result;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor)

Example 48 with FactorValue

use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.

the class ExpressionDataMatrixColumnSort method orderByFactor.

/**
 * @param fv2bms map of factorValues to lists of biomaterials that have that factorValue.
 * @param bms    Chunk of biomaterials to organize.
 * @return ordered list, or null if there was a problem.
 */
private static List<BioMaterial> orderByFactor(ExperimentalFactor ef, Map<FactorValue, List<BioMaterial>> fv2bms, List<BioMaterial> bms) {
    if (bms.size() == 1)
        return bms;
    ExpressionDataMatrixColumnSort.log.debug("Ordering " + bms.size() + " biomaterials by " + ef);
    // probably redundant.
    ExpressionDataMatrixColumnSort.sortBioMaterials(bms);
    List<FactorValue> factorValues = new ArrayList<>(ef.getFactorValues());
    if (factorValues.size() < 2) {
        /*
             * Not strictly disallowed, but useless.
             */
        return bms;
    }
    if (!ExperimentalDesignUtils.isContinuous(ef)) {
        ExpressionDataMatrixColumnSort.sortByControl(factorValues);
    } else {
        ExpressionDataMatrixColumnSort.sortIfMeasurement(factorValues);
    }
    LinkedHashMap<FactorValue, List<BioMaterial>> chunks = new LinkedHashMap<>();
    List<BioMaterial> organized = new ArrayList<>();
    ExpressionDataMatrixColumnSort.organizeByFactorValues(fv2bms, bms, factorValues, chunks, organized);
    if (ExpressionDataMatrixColumnSort.log.isDebugEnabled()) {
        for (BioMaterial b : organized) {
            for (FactorValue f : b.getFactorValues()) {
                if (f.getExperimentalFactor().equals(ef)) {
                    System.err.println(b.getId() + " " + f);
                }
            }
        }
    }
    if (organized.size() != bms.size()) {
        // fail gracefully.
        ExpressionDataMatrixColumnSort.log.error("Could not order by factor: " + ef + " Biomaterial count (" + bms.size() + ") does not equal the size of the reorganized biomaterial list (" + organized.size() + "). Check the experimental design for completeness/correctness");
        // return bms;
        return null;
    }
    return organized;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue)

Example 49 with FactorValue

use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.

the class ExpressionDataMatrixColumnSort method buildFv2BmMap.

private static Map<FactorValue, List<BioMaterial>> buildFv2BmMap(Collection<BioMaterial> bms) {
    Map<FactorValue, List<BioMaterial>> fv2bms = new HashMap<>();
    FactorValue dummy = FactorValue.Factory.newInstance();
    dummy.setId(-1L);
    for (BioMaterial bm : bms) {
        // boolean used = false;
        Collection<FactorValue> factorValues = bm.getFactorValues();
        for (FactorValue fv : factorValues) {
            if (!fv2bms.containsKey(fv)) {
                fv2bms.put(fv, new ArrayList<BioMaterial>());
            }
            fv2bms.get(fv).add(bm);
        }
    }
    for (Entry<FactorValue, List<BioMaterial>> e : fv2bms.entrySet()) {
        List<BioMaterial> biomaterials = e.getValue();
        ExpressionDataMatrixColumnSort.sortBioMaterials(biomaterials);
    }
    return fv2bms;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue)

Example 50 with FactorValue

use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.

the class ExpressionDataMatrixColumnSort method chooseSimplestFactor.

/**
 * Choose the factor with the smallest number of categories. 'Batch' is a special case and is always considered
 * 'last'. Another special case is if a factor is continuous: it is returned first and aborts reordering by other
 * factors.
 *
 * @return null if no factor has at least 2 values represented, or the factor with the fewest number of values (at
 * least 2 values that is)
 */
private static ExperimentalFactor chooseSimplestFactor(List<BioMaterial> bms, Collection<ExperimentalFactor> factors) {
    ExperimentalFactor simplest = null;
    int smallestSize = Integer.MAX_VALUE;
    Collection<FactorValue> usedValues = new HashSet<>();
    for (BioMaterial bm : bms) {
        usedValues.addAll(bm.getFactorValues());
    }
    for (ExperimentalFactor ef : factors) {
        if (ExperimentalDesignUtils.isContinuous(ef)) {
            return ef;
        }
        /*
             * Always push 'batch' down the list
             */
        if (factors.size() > 1 && ExperimentalDesignUtils.isBatch(ef)) {
            continue;
        }
        int numvals = 0;
        for (FactorValue fv : ef.getFactorValues()) {
            if (usedValues.contains(fv)) {
                numvals++;
            }
        }
        if (numvals > 1 && numvals < smallestSize) {
            smallestSize = numvals;
            simplest = ef;
        }
    }
    return simplest;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor)

Aggregations

FactorValue (ubic.gemma.model.expression.experiment.FactorValue)55 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)30 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)27 Test (org.junit.Test)12 VocabCharacteristic (ubic.gemma.model.common.description.VocabCharacteristic)8 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)8 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)8 HashSet (java.util.HashSet)7 Characteristic (ubic.gemma.model.common.description.Characteristic)6 DifferentialExpressionAnalysis (ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis)5 ArrayList (java.util.ArrayList)4 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)4 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)4 FactorValueValueObject (ubic.gemma.model.expression.experiment.FactorValueValueObject)4 StopWatch (org.apache.commons.lang3.time.StopWatch)3 DifferentialExpressionAnalysisResult (ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult)3 ExpressionAnalysisResultSet (ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet)3 AnnotationValueObject (ubic.gemma.model.common.description.AnnotationValueObject)3 Measurement (ubic.gemma.model.common.measurement.Measurement)3 BioAssaySet (ubic.gemma.model.expression.experiment.BioAssaySet)3