use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class NcbiGeneConverter method getChromosomeDetails.
private void getChromosomeDetails(NCBIGene2Accession acc, Gene gene) {
Chromosome chrom = gene.getPhysicalLocation().getChromosome();
BioSequence chromSeq = BioSequence.Factory.newInstance();
chromSeq.setName(acc.getGenomicNucleotideAccession());
chromSeq.setType(SequenceType.WHOLE_CHROMOSOME);
chromSeq.setTaxon(gene.getTaxon());
DatabaseEntry dbe = DatabaseEntry.Factory.newInstance();
dbe.setExternalDatabase(NcbiGeneConverter.genBank);
dbe.setAccession(acc.getGenomicNucleotideAccession());
dbe.setAccessionVersion(acc.getGenomicNucleotideAccessionVersion());
chromSeq.setSequenceDatabaseEntry(dbe);
try {
FieldUtils.writeField(chrom, "sequence", chromSeq, true);
} catch (IllegalAccessException e) {
e.printStackTrace();
}
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class FastaParser method parseOneRecord.
@Override
public Object parseOneRecord(String record) {
if (StringUtils.isBlank(record))
return null;
Matcher matcher = pattern.matcher(record);
Collection<BioSequence> bioSequences = this.parseHeader(matcher);
if (bioSequences.size() == 0) {
return null;
}
StringBuilder sequence = new StringBuilder();
while (matcher.find()) {
// skip comments.
if (matcher.group(1).startsWith(";"))
continue;
sequence.append(matcher.group(1));
}
if (sequence.length() == 0) {
return null;
}
for (BioSequence bioSequence : bioSequences) {
bioSequence.setLength((long) sequence.length());
bioSequence.setIsApproximateLength(false);
bioSequence.setSequence(sequence.toString());
}
return bioSequences;
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class SimpleFastaCmd method getSequencesFromFastaCmdOutput.
private Collection<BioSequence> getSequencesFromFastaCmdOutput(Process pr) throws IOException {
try (final InputStream is = new BufferedInputStream(pr.getInputStream());
InputStream err = pr.getErrorStream()) {
final FastaParser parser = new FastaParser();
ParsingStreamConsumer<BioSequence> sg = new ParsingStreamConsumer<>(parser, is);
GenericStreamConsumer gsc = new GenericStreamConsumer(err);
sg.start();
gsc.start();
try {
int exitVal = pr.waitFor();
// Makes sure results are flushed.
Thread.sleep(200);
SimpleFastaCmd.log.debug(// often nonzero if some sequences are not found.
"fastacmd exit value=" + exitVal);
return parser.getResults();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ProbeSequenceParser method parse.
@Override
public void parse(InputStream is) throws IOException {
if (is == null)
throw new IllegalArgumentException("InputStream was null");
try (BufferedReader br = new BufferedReader(new InputStreamReader(is))) {
StopWatch timer = new StopWatch();
timer.start();
int nullLines = 0;
String line;
int linesParsed = 0;
while ((line = br.readLine()) != null) {
BioSequence newItem = this.parseOneLine(line);
if (++linesParsed % Parser.PARSE_ALERT_FREQUENCY == 0 && timer.getTime() > LineParser.PARSE_ALERT_TIME_FREQUENCY_MS) {
String message = "Parsed " + linesParsed + " lines ";
log.info(message);
timer.reset();
timer.start();
}
if (newItem == null) {
nullLines++;
}
}
log.info("Parsed " + linesParsed + " lines. " + (nullLines > 0 ? nullLines + " yielded no parse result (they may have been filtered)." : ""));
}
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ArrayDesignPersister method persistArrayDesignCompositeSequenceAssociations.
private ArrayDesign persistArrayDesignCompositeSequenceAssociations(ArrayDesign arrayDesign) {
int numElements = arrayDesign.getCompositeSequences().size();
if (numElements == 0)
return arrayDesign;
AbstractPersister.log.info("Filling in or updating sequences in composite seqences for " + arrayDesign);
int persistedBioSequences = 0;
int numElementsPerUpdate = this.numElementsPerUpdate(arrayDesign.getCompositeSequences());
for (CompositeSequence compositeSequence : arrayDesign.getCompositeSequences()) {
if (!this.isTransient(compositeSequence)) {
// in case of retry (not used?)
continue;
}
compositeSequence.setId(null);
compositeSequence.setArrayDesign(arrayDesign);
BioSequence biologicalCharacteristic = compositeSequence.getBiologicalCharacteristic();
BioSequence persistedBs = this.persistBioSequence(biologicalCharacteristic);
compositeSequence.setBiologicalCharacteristic(persistedBs);
if (++persistedBioSequences % numElementsPerUpdate == 0 && numElements > 1000) {
AbstractPersister.log.info(persistedBioSequences + "/" + numElements + " compositeSequence sequences examined for " + arrayDesign);
}
}
if (persistedBioSequences > 0) {
AbstractPersister.log.info("Total of " + persistedBioSequences + " compositeSequence sequences examined for " + arrayDesign);
}
return arrayDesign;
}
Aggregations