use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ArrayDesignSequenceProcessingServiceImpl method findOrUpdateSequences.
/**
* Copy sequences into the original versions, or create new sequences in the DB, as needed.
*
* @param force If true, if an existing BioSequence that matches if found in the system, any existing sequence
* information in the BioSequence will be overwritten.
* @return Items that were found.
*/
private Map<String, BioSequence> findOrUpdateSequences(Collection<String> accessionsToFetch, Collection<BioSequence> retrievedSequences, Taxon taxon, boolean force) {
Map<String, BioSequence> found = new HashMap<>();
for (BioSequence sequence : retrievedSequences) {
if (ArrayDesignSequenceProcessingServiceImpl.log.isDebugEnabled())
ArrayDesignSequenceProcessingServiceImpl.log.debug("Processing retrieved sequence: " + sequence);
sequence.setTaxon(taxon);
sequence = this.createOrUpdateGenbankSequence(sequence, force);
String accession = sequence.getSequenceDatabaseEntry().getAccession();
found.put(accession, sequence);
accessionsToFetch.remove(accession);
}
return found;
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class IlluminaProbeReader method parseOneLine.
@Override
public Reporter parseOneLine(String line) {
String[] sArray = line.split("\t");
if (sArray.length == 0)
throw new IllegalArgumentException("Line format is not valid");
String probeId = sArray[0];
if (probeId == null || probeId.length() == 0)
throw new IllegalArgumentException("Probe id invalid");
if (probeId.startsWith("Search"))
return null;
if (sArray.length < 10)
throw new IllegalArgumentException("Line format is not valid");
String sequence = sArray[9];
if (sequence == null || sequence.length() == 0)
throw new IllegalArgumentException("Sequence is invalid");
Reporter ap = Reporter.Factory.newInstance();
BioSequence immobChar = BioSequence.Factory.newInstance();
immobChar.setSequence(sequence);
ap.setName(probeId);
ap.setImmobilizedCharacteristic(immobChar);
return ap;
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class GeoConverterImpl method createMinimalBioSequence.
/**
* @param taxon Can be null, we will discard this
*/
private BioSequence createMinimalBioSequence(Taxon taxon) {
BioSequence bs = BioSequence.Factory.newInstance();
bs.setTaxon(taxon);
bs.setPolymerType(PolymerType.DNA);
bs.setType(SequenceType.DNA);
return bs;
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class AffyProbeReader method parseOneLine.
@Override
public Collection<Reporter> parseOneLine(String line) {
if (StringUtils.isEmpty(line)) {
return null;
}
String[] sArray = line.split("\t");
if (sArray.length == 0)
throw new IllegalArgumentException("Line format is not valid (not tab-delimited or no fields found)");
String probeSetId = sArray[0];
if (probeSetId.startsWith("Probe")) {
return null;
}
if (sArray.length < sequenceField + 1) {
throw new IllegalArgumentException("Too few fields in line, expected at least " + (sequenceField + 1) + " but got " + sArray.length);
}
String sequence = sArray[sequenceField];
if (StringUtils.isBlank(sequence)) {
log.warn("No sequence");
}
String xcoord;
String ycoord;
String startInSequence;
String index = null;
if (sequenceField == 4) {
xcoord = sArray[1];
ycoord = sArray[2];
startInSequence = sArray[3];
} else {
index = sArray[1];
xcoord = sArray[2];
ycoord = sArray[3];
startInSequence = sArray[sequenceField - 1];
}
Reporter reporter = Reporter.Factory.newInstance();
try {
reporter.setRow(Integer.parseInt(xcoord));
reporter.setCol(Integer.parseInt(ycoord));
} catch (NumberFormatException e) {
log.warn("Invalid row: could not parse coordinates: " + xcoord + ", " + ycoord);
return null;
}
try {
reporter.setStartInBioChar(Long.parseLong(startInSequence));
} catch (NumberFormatException e) {
if (startInSequence.equals("---")) {
/*
* Controls have no start/end information. We really have to bail on these.
*/
log.debug("Control sequence");
} else {
log.warn("Invalid row: could not parse start in sequence: " + startInSequence);
}
return null;
}
String reporterName = probeSetId + (index == null ? "" : "#" + index) + ":" + xcoord + ":" + ycoord;
reporter.setName(reporterName);
BioSequence immobChar = BioSequence.Factory.newInstance();
immobChar.setSequence(sequence);
immobChar.setIsApproximateLength(false);
immobChar.setLength((long) sequence.length());
immobChar.setType(SequenceType.AFFY_PROBE);
immobChar.setPolymerType(PolymerType.DNA);
reporter.setImmobilizedCharacteristic(immobChar);
CompositeSequence probeSet = CompositeSequence.Factory.newInstance();
probeSet.setName(probeSetId);
if (!reporterMap.containsKey(probeSet)) {
reporterMap.put(probeSet, new HashSet<Reporter>());
}
reporter.setCompositeSequence(probeSet);
reporterMap.get(probeSet).add(reporter);
return reporterMap.get(probeSet);
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ArrayDesignMergeServiceImpl method merge.
@Override
public ArrayDesign merge(ArrayDesign arrayDesign, Collection<ArrayDesign> otherArrayDesigns, String nameOfNewDesign, String shortNameOfNewDesign, boolean add) {
if (otherArrayDesigns.isEmpty())
throw new IllegalArgumentException("Must merge at least one array design");
/*
* We allow merging of, or into, an already merged design, but array designs can't be merged into more than one.
*/
if (arrayDesign.getMergedInto() != null) {
throw new IllegalArgumentException("Sorry, can't merge an array design that is already a mergee (" + arrayDesign + ")");
}
if (add && arrayDesign.getMergees().isEmpty()) {
throw new IllegalArgumentException("Can't use 'add' when arrayDesign isn't already a mergee (" + arrayDesign + ")");
}
// make map of biosequence -> design elements for all the array designs. But watch out for biosequences that
// appear more than once per array design.
Map<BioSequence, Collection<CompositeSequence>> globalBsMap = new HashMap<>();
ArrayDesign thawed = this.makeBioSeqMap(globalBsMap, arrayDesign);
ArrayDesignMergeServiceImpl.log.info(globalBsMap.keySet().size() + " sequences in first array design.");
// Now check the other designs, add slots for additional probes if necessary.
Collection<ArrayDesign> thawedOthers = new HashSet<>();
for (ArrayDesign otherArrayDesign : otherArrayDesigns) {
if (otherArrayDesign.getMergedInto() != null) {
throw new IllegalArgumentException("Sorry, can't merge an array design that is already a mergee (" + otherArrayDesign + ")");
}
if (arrayDesign.equals(otherArrayDesign)) {
// defensive.
continue;
}
ArrayDesignMergeServiceImpl.log.info("Examining " + otherArrayDesign);
thawedOthers.add(this.makeBioSeqMap(globalBsMap, otherArrayDesign));
ArrayDesignMergeServiceImpl.log.info(globalBsMap.keySet().size() + " unique sequences encountered in total so far");
}
return this.createMerged(thawed, thawedOthers, globalBsMap, nameOfNewDesign, shortNameOfNewDesign, add);
}
Aggregations