use of htsjdk.samtools.reference.ReferenceSequence in project gatk by broadinstitute.
the class ClipReads method onTraversalStart.
/**
* The initialize function.
*/
@Override
public void onTraversalStart() {
if (qTrimmingThreshold >= 0) {
logger.info(String.format("Creating Q-score clipper with threshold %d", qTrimmingThreshold));
}
//
if (clipSequencesArgs != null) {
int i = 0;
for (String toClip : clipSequencesArgs) {
i++;
ReferenceSequence rs = new ReferenceSequence("CMDLINE-" + i, -1, StringUtil.stringToBytes(toClip));
addSeqToClip(rs.getName(), rs.getBases());
}
}
if (clipSequenceFile != null) {
ReferenceSequenceFile rsf = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(clipSequenceFile));
while (true) {
ReferenceSequence rs = rsf.nextSequence();
if (rs == null)
break;
else {
addSeqToClip(rs.getName(), rs.getBases());
}
}
}
//
if (cyclesToClipArg != null) {
cyclesToClip = new ArrayList<>();
for (String range : cyclesToClipArg.split(",")) {
try {
String[] elts = range.split("-");
int start = Integer.parseInt(elts[0]) - 1;
int stop = Integer.parseInt(elts[1]) - 1;
if (start < 0)
throw new Exception();
if (stop < start)
throw new Exception();
logger.info(String.format("Creating cycle clipper %d-%d", start, stop));
cyclesToClip.add(new MutablePair<>(start, stop));
} catch (Exception e) {
throw new RuntimeException("Badly formatted cyclesToClip argument: " + cyclesToClipArg);
}
}
}
final boolean presorted = EnumSet.of(ClippingRepresentation.WRITE_NS, ClippingRepresentation.WRITE_NS_Q0S, ClippingRepresentation.WRITE_Q0S).contains(clippingRepresentation);
outputBam = createSAMWriter(OUTPUT, presorted);
accumulator = new ClippingData(sequencesToClip);
try {
outputStats = STATSOUTPUT == null ? null : new PrintStream(STATSOUTPUT);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(STATSOUTPUT, e);
}
}
use of htsjdk.samtools.reference.ReferenceSequence in project gatk by broadinstitute.
the class ReferenceFileSource method getAllReferenceBases.
public Map<String, ReferenceBases> getAllReferenceBases() throws IOException {
try (ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(referencePath))) {
Map<String, ReferenceBases> bases = new LinkedHashMap<>();
ReferenceSequence seq;
while ((seq = referenceSequenceFile.nextSequence()) != null) {
String name = seq.getName();
bases.put(name, new ReferenceBases(seq.getBases(), new SimpleInterval(name, 1, seq.length())));
}
return bases;
}
}
use of htsjdk.samtools.reference.ReferenceSequence in project gatk by broadinstitute.
the class ReferenceHadoopSource method getReferenceBases.
@Override
public ReferenceBases getReferenceBases(final PipelineOptions pipelineOptions, final SimpleInterval interval) {
ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(IOUtils.getPath(referencePath));
ReferenceSequence sequence = referenceSequenceFile.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
return new ReferenceBases(sequence.getBases(), interval);
}
use of htsjdk.samtools.reference.ReferenceSequence in project gatk by broadinstitute.
the class SinglePassSamProgram method makeItSo.
public static void makeItSo(final File input, final File referenceSequence, final boolean assumeSorted, final long stopAfter, final Collection<SinglePassSamProgram> programs) {
// Setup the standard inputs
IOUtil.assertFileIsReadable(input);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
// Optionally load up the reference sequence and double check sequence dictionaries
final ReferenceSequenceFileWalker walker;
if (referenceSequence == null) {
walker = null;
} else {
IOUtil.assertFileIsReadable(referenceSequence);
walker = new ReferenceSequenceFileWalker(referenceSequence);
if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), walker.getSequenceDictionary());
}
}
// Check on the sort order of the BAM file
{
final SortOrder sort = in.getFileHeader().getSortOrder();
if (sort != SortOrder.coordinate) {
if (assumeSorted) {
logger.warn("File reports sort order '" + sort + "', assuming it's coordinate sorted anyway.");
} else {
throw new UserException("File " + input.getAbsolutePath() + " should be coordinate sorted but " + "the header says the sort order is " + sort + ". If you believe the file " + "to be coordinate sorted you may pass ASSUME_SORTED=true");
}
}
}
// Call the abstract setup method!
boolean anyUseNoRefReads = false;
for (final SinglePassSamProgram program : programs) {
program.setup(in.getFileHeader(), input);
anyUseNoRefReads = anyUseNoRefReads || program.usesNoRefReads();
}
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord rec : in) {
final ReferenceSequence ref;
if (walker == null || rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
ref = null;
} else {
ref = walker.get(rec.getReferenceIndex());
}
for (final SinglePassSamProgram program : programs) {
program.acceptRead(rec, ref);
}
progress.record(rec);
// See if we need to terminate early?
if (stopAfter > 0 && progress.getCount() >= stopAfter) {
break;
}
// And see if we're into the unmapped reads at the end
if (!anyUseNoRefReads && rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
break;
}
}
CloserUtil.close(in);
for (final SinglePassSamProgram program : programs) {
program.finish();
}
}
use of htsjdk.samtools.reference.ReferenceSequence in project gatk by broadinstitute.
the class CachingIndexedFastaSequenceFileUnitTest method testCachingIndexedFastaReaderTwoStage.
// Tests grabbing sequences around a middle cached value.
@Test(dataProvider = "fastas", enabled = !DEBUG)
public void testCachingIndexedFastaReaderTwoStage(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true, false);
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
int middleStart = (contig.getSequenceLength() - querySize) / 2;
int middleStop = middleStart + querySize;
logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d with intermediate query", contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
for (int i = 0; i < contig.getSequenceLength(); i += 10) {
int start = i;
int stop = start + querySize;
if (stop <= contig.getSequenceLength()) {
ReferenceSequence grabMiddle = caching.getSubsequenceAt(contig.getSequenceName(), middleStart, middleStop);
ReferenceSequence cachedVal = caching.getSubsequenceAt(contig.getSequenceName(), start, stop);
ReferenceSequence uncachedVal = uncached.getSubsequenceAt(contig.getSequenceName(), start, stop);
Assert.assertEquals(cachedVal.getName(), uncachedVal.getName());
Assert.assertEquals(cachedVal.getContigIndex(), uncachedVal.getContigIndex());
Assert.assertEquals(cachedVal.getBases(), uncachedVal.getBases());
}
}
}
Aggregations