use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class DifferentialExpressionAnalyzerServiceImpl method extendResultSet.
private void extendResultSet(ExpressionAnalysisResultSet oldrs, ExpressionAnalysisResultSet temprs) {
assert oldrs.getId() != null;
/*
* Copy the results over.
*/
Map<CompositeSequence, DifferentialExpressionAnalysisResult> p2der = new HashMap<>();
for (DifferentialExpressionAnalysisResult der : oldrs.getResults()) {
p2der.put(der.getProbe(), der);
}
Collection<DifferentialExpressionAnalysisResult> toAdd = new ArrayList<>();
for (DifferentialExpressionAnalysisResult newr : temprs.getResults()) {
if (!p2der.containsKey(newr.getProbe())) {
toAdd.add(newr);
}
newr.setResultSet(oldrs);
}
if (toAdd.isEmpty()) {
DifferentialExpressionAnalyzerServiceImpl.log.warn("Somewhat surprisingly, no new results were added");
} else {
DifferentialExpressionAnalyzerServiceImpl.log.info(toAdd.size() + " transient results added to the old analysis result set: " + oldrs.getId());
}
boolean added = oldrs.getResults().addAll(toAdd);
assert added;
assert oldrs.getResults().size() >= toAdd.size();
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class QuantitationTypeData method isTwoColor.
private boolean isTwoColor() {
for (DesignElementDataVector v : vectors) {
CompositeSequence d = v.getDesignElement();
TechnologyType technologyType = d.getArrayDesign().getTechnologyType();
if (technologyType.equals(TechnologyType.ONECOLOR) || technologyType.equals(TechnologyType.NONE)) {
continue;
}
QuantitationType qt = v.getQuantitationType();
if ((qt.getIsPreferred() || qt.getIsMaskedPreferred()) && qt.getIsRatio()) {
return true;
}
}
return false;
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class LinkAnalysis method getGenesTested.
/**
* Gene the genes that were tested, according to the rows that are currently in the dataMatrix (so call this after
* filtering!)
*
* @return set
*/
public Set<Gene> getGenesTested() {
Set<Gene> genes = new HashSet<>();
for (CompositeSequence cs : dataMatrix.getRowNames()) {
Set<Gene> geneClusters = this.probeToGeneMap.get(cs);
if (geneClusters == null) {
if (numWarnings <= LinkAnalysis.MAX_WARNINGS) {
LinkAnalysis.log.warn("No genes for: " + cs);
numWarnings++;
if (numWarnings > LinkAnalysis.MAX_WARNINGS) {
LinkAnalysis.log.warn("Further warnings will be suppressed");
}
}
continue;
}
genes.addAll(geneClusters);
}
return genes;
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class LinkAnalysisServiceImpl method writeLinks.
/**
* Write links as text.
*/
private void writeLinks(final LinkAnalysis la, FilterConfig filterConfig, Writer wr) throws IOException {
Map<CompositeSequence, Set<Gene>> probeToGeneMap = la.getProbeToGeneMap();
ObjectArrayList links = la.getKeep();
double subsetSize = la.getConfig().getSubsetSize();
List<String> buf = new ArrayList<>();
if (la.getConfig().isSubset() && links.size() > subsetSize) {
la.getConfig().setSubsetUsed(true);
}
wr.write(la.getConfig().toString());
wr.write(filterConfig.toString());
NumberFormat nf = NumberFormat.getInstance();
nf.setMaximumFractionDigits(4);
Integer probeDegreeThreshold = la.getConfig().getProbeDegreeThreshold();
int i = 0;
int keptLinksCount = 0;
Random generator = new Random();
double rand;
double fraction = subsetSize / links.size();
int skippedDueToDegree = 0;
for (int n = links.size(); i < n; i++) {
Object val = links.getQuick(i);
if (val == null)
continue;
Link m = (Link) val;
Double w = m.getWeight();
int x = m.getx();
int y = m.gety();
if (probeDegreeThreshold > 0 && (la.getProbeDegree(x) > probeDegreeThreshold || la.getProbeDegree(y) > probeDegreeThreshold)) {
skippedDueToDegree++;
continue;
}
CompositeSequence p1 = la.getProbe(x);
CompositeSequence p2 = la.getProbe(y);
Set<Gene> g1 = probeToGeneMap.get(p1);
Set<Gene> g2 = probeToGeneMap.get(p2);
List<String> genes1 = new ArrayList<>();
for (Gene cluster : g1) {
String t = cluster.getOfficialSymbol();
genes1.add(t);
}
List<String> genes2 = new ArrayList<>();
for (Gene cluster : g2) {
String t = cluster.getOfficialSymbol();
genes2.add(t);
}
if (genes2.size() == 0 || genes1.size() == 0) {
continue;
}
String gene1String = StringUtils.join(genes1.iterator(), "|");
String gene2String = StringUtils.join(genes2.iterator(), "|");
if (gene1String.equals(gene2String)) {
continue;
}
if (++keptLinksCount % 50000 == 0) {
LinkAnalysisServiceImpl.log.info(keptLinksCount + " links retained");
}
if (la.getConfig().isSubsetUsed()) {
rand = generator.nextDouble();
if (rand > fraction)
continue;
}
buf.add(p1.getId() + "\t" + p2.getId() + "\t" + gene1String + "\t" + gene2String + "\t" + nf.format(w) + // save links
"\n");
// wr.write( p1.getId() + "\t" + p2.getId() + "\t" + gene1String + "\t" + gene2String + "\t" + nf.format( w
// ) + "\n" );
}
wr.write("# totalLinks:" + keptLinksCount + "\n");
wr.write("# printedLinks:" + buf.size() + "\n");
wr.write("# skippedDueToHighNodeDegree:" + skippedDueToDegree + "\n");
for (String line : buf) {
// write links to file
wr.write(line);
}
if (la.getConfig().isSubsetUsed()) {
// subset option activated
LinkAnalysisServiceImpl.log.info("Done, " + keptLinksCount + "/" + links.size() + " links kept, " + buf.size() + " links printed");
// wr.write("# Amount of links before subsetting/after subsetting: " + links.size() + "/" + numPrinted +
// "\n" );
} else {
LinkAnalysisServiceImpl.log.info("Done, " + keptLinksCount + "/" + links.size() + " links printed (some may have been filtered)");
}
wr.flush();
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class LinkAnalysisServiceImpl method getProbe2GeneMap.
/**
* Fills in the probe2gene map for the linkAnalysis. Note that the collection DOES NOT contain probes that have NO
* genes mapped
*
* @param eeDoubleMatrix - used to make sure we don't use probes from vectors that are removed?
*/
private void getProbe2GeneMap(LinkAnalysis la, Collection<ProcessedExpressionDataVector> dataVectors, ExpressionDataDoubleMatrix eeDoubleMatrix) {
Collection<CompositeSequence> probesForVectors = new HashSet<>();
for (DesignElementDataVector v : dataVectors) {
CompositeSequence cs = v.getDesignElement();
if (eeDoubleMatrix.getRow(cs) != null)
probesForVectors.add(cs);
}
Map<CompositeSequence, Collection<BioSequence2GeneProduct>> specificityData = csService.getGenesWithSpecificity(probesForVectors);
assert !specificityData.isEmpty();
/*
* Convert the specificity
*/
Map<CompositeSequence, Set<Gene>> probeToGeneMap = new HashMap<>();
for (CompositeSequence cs : specificityData.keySet()) {
Collection<BioSequence2GeneProduct> bioSequenceToGeneProducts = specificityData.get(cs);
if (!probeToGeneMap.containsKey(cs)) {
probeToGeneMap.put(cs, new HashSet<Gene>());
}
for (BioSequence2GeneProduct bioSequence2GeneProduct : bioSequenceToGeneProducts) {
Gene gene = bioSequence2GeneProduct.getGeneProduct().getGene();
probeToGeneMap.get(cs).add(gene);
}
}
/*
* Remove the probes that have no mapping
*/
int startingSize = probeToGeneMap.size();
int numRemoved = 0;
for (Iterator<CompositeSequence> it = probeToGeneMap.keySet().iterator(); it.hasNext(); ) {
CompositeSequence cs = it.next();
if (probeToGeneMap.get(cs).isEmpty()) {
it.remove();
numRemoved++;
}
}
if (numRemoved > 0) {
LinkAnalysisServiceImpl.log.info(numRemoved + "/" + startingSize + " elements had no genes mapped and were removed.");
}
// assert !probeToGeneMap.isEmpty();
if (probeToGeneMap.isEmpty()) {
throw new IllegalStateException("No probes are mapped to genes; example=" + probeToGeneMap.keySet().iterator().next());
}
la.setProbeToGeneMap(probeToGeneMap);
}
Aggregations