use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ArrayDesignMergeServiceImpl method makeNewProbes.
/**
* Makes the new or additional probes (non-persistent) for the merged array design. If mergeWithExisting=true,
* probes from arrayDesign will not be included; just the ones that we need to add to it will be returned.
*
* @param globalBsMap Map that tells us, in effect, how many probes to make for the sequence.
*/
private Collection<CompositeSequence> makeNewProbes(ArrayDesign arrayDesign, Map<BioSequence, Collection<CompositeSequence>> globalBsMap, boolean mergeWithExisting) {
Collection<CompositeSequence> newProbes = new HashSet<>();
ArrayDesignMergeServiceImpl.log.info(globalBsMap.size() + " unique sequences");
Collection<String> probeNames = new HashSet<>();
for (BioSequence bs : globalBsMap.keySet()) {
// should be the placeholder NULL_BIOSEQUENCE
assert bs != null;
for (CompositeSequence cs : globalBsMap.get(bs)) {
if (mergeWithExisting && cs.getArrayDesign().equals(arrayDesign)) {
assert arrayDesign.getId() != null;
/*
* Only add probes from the _other_ array designs.
*/
continue;
}
CompositeSequence newCs = CompositeSequence.Factory.newInstance();
if (!bs.equals(ExpressionExperimentPlatformSwitchService.NULL_BIOSEQUENCE)) {
newCs.setBiologicalCharacteristic(bs);
}
String name = this.getProbeName(probeNames, cs);
probeNames.add(name);
newCs.setName(name);
newCs.setDescription((cs.getDescription() == null ? "" : cs.getDescription()) + " (via merge)");
newCs.setArrayDesign(arrayDesign);
newProbes.add(newCs);
if (ArrayDesignMergeServiceImpl.log.isDebugEnabled())
ArrayDesignMergeServiceImpl.log.debug("Made merged probe for " + bs + ": " + newCs + " for old probe on " + cs.getArrayDesign().getShortName());
}
}
ArrayDesignMergeServiceImpl.log.info("Made " + newProbes.size() + " new probes");
return newProbes;
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class CompositeSequenceParser method parseOneLine.
@Override
public CompositeSequence parseOneLine(String line) {
String[] tokens = StringUtils.splitPreserveAllTokens(line, '\t');
if (tokens.length != 3) {
return null;
}
String probeid = tokens[0];
String genbankAcc = tokens[1];
String description = tokens[2];
CompositeSequence result = CompositeSequence.Factory.newInstance();
result.setName(probeid);
result.setDescription(description);
DatabaseEntry dbEntry = ExternalDatabaseUtils.getGenbankAccession(genbankAcc);
BioSequence biologicalCharacteristic = BioSequence.Factory.newInstance();
// this will be changed later, typically.
biologicalCharacteristic.setName(genbankAcc);
// this will be changed later, typically.
biologicalCharacteristic.setDescription(description + " (From platform source)");
biologicalCharacteristic.setSequenceDatabaseEntry(dbEntry);
result.setBiologicalCharacteristic(biologicalCharacteristic);
return result;
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ExpressionExperimentPrePersistServiceImpl method loadOrPersistArrayDesignAndAddToCache.
/**
* Put an array design in the cache (if it already isn't there). This is needed when loading
* designelementdatavectors, for example, to avoid repeated (and one-at-a-time) fetching of designelement.
*
* @return the persistent array design.
*/
private ArrayDesign loadOrPersistArrayDesignAndAddToCache(ArrayDesign arrayDesign, ArrayDesignsForExperimentCache cache) {
assert arrayDesign != null;
if (StringUtils.isBlank(arrayDesign.getShortName())) {
throw new IllegalArgumentException("Array design must have a 'short name'");
}
if (cache.getArrayDesignCache().containsKey(arrayDesign.getShortName())) {
// already done.
return cache.getArrayDesignCache().get(arrayDesign.getShortName());
}
StopWatch timer = new StopWatch();
timer.start();
// transaction, but fast if the design already exists.
arrayDesign = (ArrayDesign) persisterHelper.persist(arrayDesign);
// transaction (read-only). Wasteful, if this is an existing design.
// arrayDesign = arrayDesignService.thawRawAndProcessed( arrayDesign );
Map<CompositeSequence, BioSequence> sequences = arrayDesignService.getBioSequences(arrayDesign);
cache.add(arrayDesign, sequences.keySet());
if (timer.getTime() > 20000) {
ExpressionExperimentPrePersistServiceImpl.log.info("Load/persist & thawRawAndProcessed array design: " + timer.getTime() + "ms");
}
return arrayDesign;
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ExpressionExperimentPrePersistServiceImpl method addNewDesignElementToPersistentArrayDesign.
private CompositeSequence addNewDesignElementToPersistentArrayDesign(ArrayDesign arrayDesign, CompositeSequence designElement) {
if (designElement == null)
return null;
if (!persisterHelper.isTransient(designElement))
return designElement;
/*
* No sequence, or the sequence name isn't provided. Of course, if there is no sequence it isn't going to be
* very useful.
*/
BioSequence biologicalCharacteristic = designElement.getBiologicalCharacteristic();
assert arrayDesign.getId() != null;
designElement.setArrayDesign(arrayDesign);
if (persisterHelper.isTransient(biologicalCharacteristic)) {
// transaction.
designElement.setBiologicalCharacteristic((BioSequence) persisterHelper.persist(biologicalCharacteristic));
}
return compositeSequenceService.create(designElement);
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class BlatAssociationScorer method organizeBlatAssociationsByGeneProductAndInitializeScores.
/**
* Break results down by gene product, and throw out duplicates (only allow one result per gene product), fills in
* score and initializes specificity
*
* @param blatAssociations blat assocs
* @return map
*/
private static Map<GeneProduct, Collection<BlatAssociation>> organizeBlatAssociationsByGeneProductAndInitializeScores(Collection<BlatAssociation> blatAssociations) {
Map<GeneProduct, Collection<BlatAssociation>> geneProducts = new HashMap<>();
Collection<BioSequence> sequences = new HashSet<>();
for (BlatAssociation blatAssociation : blatAssociations) {
assert blatAssociation.getBioSequence() != null;
BlatAssociationScorer.computeScore(blatAssociation);
sequences.add(blatAssociation.getBioSequence());
if (sequences.size() > 1) {
throw new IllegalArgumentException("Blat associations must all be for the same query sequence");
}
assert blatAssociation.getGeneProduct() != null;
GeneProduct geneProduct = blatAssociation.getGeneProduct();
if (!geneProducts.containsKey(geneProduct)) {
geneProducts.put(geneProduct, new HashSet<BlatAssociation>());
}
geneProducts.get(geneProduct).add(blatAssociation);
// an initial value.
blatAssociation.setSpecificity(1.0);
}
return geneProducts;
}
Aggregations