Search in sources :

Example 76 with ExperimentalFactor

use of ubic.gemma.model.expression.experiment.ExperimentalFactor in project Gemma by PavlidisLab.

the class DifferentialExpressionAnalysisUtil method filterFactorValuesFromBiomaterials.

/**
 * Returns biomaterials with 'filtered' factor values. That is, each biomaterial will only contain those factor
 * values equivalent to a factor value from one of the input experimental factors.
 *
 * @return Collection<BioMaterial>
 */
private static Collection<BioMaterial> filterFactorValuesFromBiomaterials(Collection<ExperimentalFactor> factors, Collection<BioMaterial> biomaterials) {
    assert !biomaterials.isEmpty();
    assert !factors.isEmpty();
    Collection<FactorValue> allFactorValuesFromGivenFactors = new HashSet<>();
    for (ExperimentalFactor ef : factors) {
        allFactorValuesFromGivenFactors.addAll(ef.getFactorValues());
    }
    Collection<BioMaterial> biomaterialsWithGivenFactorValues = new HashSet<>();
    int numHaveAny = 0;
    for (BioMaterial b : biomaterials) {
        Collection<FactorValue> biomaterialFactorValues = b.getFactorValues();
        Collection<FactorValue> factorValuesToConsider = new HashSet<>(biomaterialFactorValues);
        for (FactorValue biomaterialFactorValue : biomaterialFactorValues) {
            numHaveAny++;
            if (!allFactorValuesFromGivenFactors.contains(biomaterialFactorValue)) {
                factorValuesToConsider.remove(biomaterialFactorValue);
            }
        }
        b.setFactorValues(factorValuesToConsider);
        biomaterialsWithGivenFactorValues.add(b);
    }
    if (numHaveAny == 0) {
        throw new IllegalStateException("No biomaterials had any factor values");
    }
    return biomaterialsWithGivenFactorValues;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor)

Example 77 with ExperimentalFactor

use of ubic.gemma.model.expression.experiment.ExperimentalFactor in project Gemma by PavlidisLab.

the class DifferentialExpressionAnalysisUtil method generateFactorValuePairings.

/**
 * Generates all possible factor value pairings for the given experimental factors.
 *
 * @param experimentalFactors exp. factors
 * @return A collection of hashSets, where each hashSet is a pairing.
 */
private static Collection<Set<FactorValue>> generateFactorValuePairings(Collection<ExperimentalFactor> experimentalFactors) {
    /* set up the possible pairings */
    Collection<FactorValue> allFactorValues = new HashSet<>();
    for (ExperimentalFactor experimentalFactor : experimentalFactors) {
        allFactorValues.addAll(experimentalFactor.getFactorValues());
    }
    Collection<Set<FactorValue>> factorValuePairings = new HashSet<>();
    for (FactorValue factorValue : allFactorValues) {
        for (FactorValue f : allFactorValues) {
            if (f.getExperimentalFactor().equals(factorValue.getExperimentalFactor()))
                continue;
            HashSet<FactorValue> factorValuePairing = new HashSet<>();
            factorValuePairing.add(factorValue);
            factorValuePairing.add(f);
            if (!factorValuePairings.contains(factorValuePairing)) {
                factorValuePairings.add(factorValuePairing);
            }
        }
    }
    return factorValuePairings;
}
Also used : FactorValue(ubic.gemma.model.expression.experiment.FactorValue) BioAssaySet(ubic.gemma.model.expression.experiment.BioAssaySet) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor)

Example 78 with ExperimentalFactor

use of ubic.gemma.model.expression.experiment.ExperimentalFactor in project Gemma by PavlidisLab.

the class ExpressionDataMatrixColumnSort method orderBiomaterialsBySortedFactors.

/**
 * Sort biomaterials according to a list of ordered factors
 *
 * @param start   biomaterials to sort
 * @param factors sorted list of factors to define sort order for biomaterials, cannot be null
 */
private static List<BioMaterial> orderBiomaterialsBySortedFactors(List<BioMaterial> start, List<ExperimentalFactor> factors) {
    if (start.size() == 1) {
        return start;
    }
    if (start.size() == 0) {
        throw new IllegalArgumentException("Must provide some biomaterials");
    }
    if (factors == null) {
        throw new IllegalArgumentException("Must provide sorted factors, or at least an empty list");
    }
    if (factors.isEmpty()) {
        // we're done.
        return start;
    }
    ExperimentalFactor simplest = factors.get(0);
    if (simplest == null) {
        // we're done.
        return start;
    }
    /*
         * Order this chunk by the selected factor
         */
    Map<FactorValue, List<BioMaterial>> fv2bms = ExpressionDataMatrixColumnSort.buildFv2BmMap(start);
    List<BioMaterial> ordered = ExpressionDataMatrixColumnSort.orderByFactor(simplest, fv2bms, start);
    // Abort ordering, so we are ordered only by the first continuous factor.
    if (ExperimentalDesignUtils.isContinuous(simplest)) {
        assert ordered != null;
        return ordered;
    }
    LinkedList<ExperimentalFactor> factorsStillToDo = new LinkedList<>();
    factorsStillToDo.addAll(factors);
    factorsStillToDo.remove(simplest);
    if (factorsStillToDo.size() == 0) {
        /*
             * No more ordering is necessary.
             */
        return ordered;
    }
    ExpressionDataMatrixColumnSort.log.debug("Factors: " + factors.size());
    /*
         * Recurse in and order each chunk. First split it up, but retaining the order we just made.
         */
    LinkedHashMap<FactorValue, List<BioMaterial>> chunks = ExpressionDataMatrixColumnSort.chunkOnFactor(simplest, ordered);
    if (chunks == null) {
        // this means we should bail, gracefully.
        return start;
    }
    /*
         * Process each chunk.
         */
    List<BioMaterial> result = new ArrayList<>();
    for (FactorValue fv : chunks.keySet()) {
        List<BioMaterial> chunk = chunks.get(fv);
        if (chunk.size() < 2) {
            result.addAll(chunk);
        } else {
            List<BioMaterial> orderedChunk = ExpressionDataMatrixColumnSort.orderBiomaterialsBySortedFactors(chunk, factorsStillToDo);
            result.addAll(orderedChunk);
        }
    }
    return result;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor)

Example 79 with ExperimentalFactor

use of ubic.gemma.model.expression.experiment.ExperimentalFactor in project Gemma by PavlidisLab.

the class ExpressionDataMatrixColumnSort method getBaselineLevels.

/**
 * Identify the FactorValue that should be treated as 'Baseline' for each of the given factors. This is done
 * heuristically, and if all else fails we choose arbitrarily. For continuous factors, the minimum value is treated
 * as baseline.
 *
 * @param samplesUsed These are used to make sure we don't bother using factor values as baselines if they are not
 *                    used by any of the samples. This is important for subsets. If null, this is ignored.
 * @param factors     factors
 * @return map of factors to the baseline factorvalue for that factor.
 */
public static Map<ExperimentalFactor, FactorValue> getBaselineLevels(List<BioMaterial> samplesUsed, Collection<ExperimentalFactor> factors) {
    Map<ExperimentalFactor, FactorValue> result = new HashMap<>();
    for (ExperimentalFactor factor : factors) {
        if (factor.getFactorValues().isEmpty()) {
            throw new IllegalStateException("Factor has no factor values: " + factor);
        }
        if (ExperimentalDesignUtils.isContinuous(factor)) {
            // then there is no baseline, but we'll take the minimum value.
            TreeMap<Double, FactorValue> sortedVals = new TreeMap<>();
            for (FactorValue fv : factor.getFactorValues()) {
                /*
                     * Check that this factor value is used by at least one of the given samples. Only matters if this
                     * is a subset of the full data set.
                     */
                if (samplesUsed != null && !ExpressionDataMatrixColumnSort.used(fv, samplesUsed)) {
                    // this factorValue cannot be a candidate baseline for this subset.
                    continue;
                }
                if (fv.getMeasurement() == null) {
                    throw new IllegalStateException("Continuous factors should have Measurements as values");
                }
                Double v = Double.parseDouble(fv.getMeasurement().getValue());
                sortedVals.put(v, fv);
            }
            result.put(factor, sortedVals.firstEntry().getValue());
        } else {
            for (FactorValue fv : factor.getFactorValues()) {
                /*
                     * Check that this factor value is used by at least one of the given samples. Only matters if this
                     * is a subset of the full data set.
                     */
                if (samplesUsed != null && !ExpressionDataMatrixColumnSort.used(fv, samplesUsed)) {
                    // this factorValue cannot be a candidate baseline for this subset.
                    continue;
                }
                if (BaselineSelection.isForcedBaseline(fv)) {
                    ExpressionDataMatrixColumnSort.log.info("Baseline chosen: " + fv);
                    result.put(factor, fv);
                    break;
                }
                if (BaselineSelection.isBaselineCondition(fv)) {
                    if (result.containsKey(factor)) {
                        ExpressionDataMatrixColumnSort.log.warn("A second potential baseline was found for " + factor + ": " + fv);
                        continue;
                    }
                    ExpressionDataMatrixColumnSort.log.info("Baseline chosen: " + fv);
                    result.put(factor, fv);
                }
            }
            if (!result.containsKey(factor)) {
                // fallback
                FactorValue arbitraryBaselineFV = null;
                if (samplesUsed != null) {
                    // make sure we choose a fv that is actually used (see above for non-arbitrary case)
                    for (FactorValue fv : factor.getFactorValues()) {
                        for (BioMaterial bm : samplesUsed) {
                            for (FactorValue bfv : bm.getFactorValues()) {
                                if (fv.equals(bfv)) {
                                    arbitraryBaselineFV = fv;
                                    break;
                                }
                            }
                            if (arbitraryBaselineFV != null)
                                break;
                        }
                        if (arbitraryBaselineFV != null)
                            break;
                    }
                } else {
                    arbitraryBaselineFV = factor.getFactorValues().iterator().next();
                }
                if (arbitraryBaselineFV == null) {
                    throw new IllegalStateException("No baseline could be identified for factor:  " + factor + " has " + factor.getFactorValues().size() + " factor values");
                }
                ExpressionDataMatrixColumnSort.log.info("Falling back on choosing baseline arbitrarily: " + arbitraryBaselineFV);
                result.put(factor, arbitraryBaselineFV);
            }
        }
    }
    return result;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor)

Example 80 with ExperimentalFactor

use of ubic.gemma.model.expression.experiment.ExperimentalFactor in project Gemma by PavlidisLab.

the class ExpressionDataMatrixColumnSort method chooseSimplestFactor.

/**
 * Choose the factor with the smallest number of categories. 'Batch' is a special case and is always considered
 * 'last'. Another special case is if a factor is continuous: it is returned first and aborts reordering by other
 * factors.
 *
 * @return null if no factor has at least 2 values represented, or the factor with the fewest number of values (at
 * least 2 values that is)
 */
private static ExperimentalFactor chooseSimplestFactor(List<BioMaterial> bms, Collection<ExperimentalFactor> factors) {
    ExperimentalFactor simplest = null;
    int smallestSize = Integer.MAX_VALUE;
    Collection<FactorValue> usedValues = new HashSet<>();
    for (BioMaterial bm : bms) {
        usedValues.addAll(bm.getFactorValues());
    }
    for (ExperimentalFactor ef : factors) {
        if (ExperimentalDesignUtils.isContinuous(ef)) {
            return ef;
        }
        /*
             * Always push 'batch' down the list
             */
        if (factors.size() > 1 && ExperimentalDesignUtils.isBatch(ef)) {
            continue;
        }
        int numvals = 0;
        for (FactorValue fv : ef.getFactorValues()) {
            if (usedValues.contains(fv)) {
                numvals++;
            }
        }
        if (numvals > 1 && numvals < smallestSize) {
            smallestSize = numvals;
            simplest = ef;
        }
    }
    return simplest;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor)

Aggregations

ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)88 Test (org.junit.Test)31 FactorValue (ubic.gemma.model.expression.experiment.FactorValue)30 DifferentialExpressionAnalysis (ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis)26 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)22 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)18 HashSet (java.util.HashSet)17 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)17 ExpressionAnalysisResultSet (ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet)16 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)14 DifferentialExpressionAnalysisResult (ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult)12 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)11 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)10 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)10 Before (org.junit.Before)8 Collection (java.util.Collection)7 ContrastResult (ubic.gemma.model.analysis.expression.diff.ContrastResult)6 AnalysisType (ubic.gemma.core.analysis.expression.diff.DifferentialExpressionAnalyzerServiceImpl.AnalysisType)5 ExperimentalFactorValueObject (ubic.gemma.model.expression.experiment.ExperimentalFactorValueObject)5 InputStream (java.io.InputStream)4