use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.
the class IntervalUtils method getContigSizes.
/**
* Returns a map of contig names with their sizes.
* @param reference The reference for the intervals.
* @return A map of contig names with their sizes.
*/
public static Map<String, Integer> getContigSizes(final File reference) {
final ReferenceSequenceFile referenceSequenceFile = createReference(reference);
final List<GenomeLoc> locs = GenomeLocSortedSet.createSetFromSequenceDictionary(referenceSequenceFile.getSequenceDictionary()).toList();
final Map<String, Integer> lengths = new LinkedHashMap<>();
for (final GenomeLoc loc : locs) {
lengths.put(loc.getContig(), loc.size());
}
return lengths;
}
use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.
the class ClipReads method onTraversalStart.
/**
* The initialize function.
*/
@Override
public void onTraversalStart() {
if (qTrimmingThreshold >= 0) {
logger.info(String.format("Creating Q-score clipper with threshold %d", qTrimmingThreshold));
}
//
if (clipSequencesArgs != null) {
int i = 0;
for (String toClip : clipSequencesArgs) {
i++;
ReferenceSequence rs = new ReferenceSequence("CMDLINE-" + i, -1, StringUtil.stringToBytes(toClip));
addSeqToClip(rs.getName(), rs.getBases());
}
}
if (clipSequenceFile != null) {
ReferenceSequenceFile rsf = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(clipSequenceFile));
while (true) {
ReferenceSequence rs = rsf.nextSequence();
if (rs == null)
break;
else {
addSeqToClip(rs.getName(), rs.getBases());
}
}
}
//
if (cyclesToClipArg != null) {
cyclesToClip = new ArrayList<>();
for (String range : cyclesToClipArg.split(",")) {
try {
String[] elts = range.split("-");
int start = Integer.parseInt(elts[0]) - 1;
int stop = Integer.parseInt(elts[1]) - 1;
if (start < 0)
throw new Exception();
if (stop < start)
throw new Exception();
logger.info(String.format("Creating cycle clipper %d-%d", start, stop));
cyclesToClip.add(new MutablePair<>(start, stop));
} catch (Exception e) {
throw new RuntimeException("Badly formatted cyclesToClip argument: " + cyclesToClipArg);
}
}
}
final boolean presorted = EnumSet.of(ClippingRepresentation.WRITE_NS, ClippingRepresentation.WRITE_NS_Q0S, ClippingRepresentation.WRITE_Q0S).contains(clippingRepresentation);
outputBam = createSAMWriter(OUTPUT, presorted);
accumulator = new ClippingData(sequencesToClip);
try {
outputStats = STATSOUTPUT == null ? null : new PrintStream(STATSOUTPUT);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(STATSOUTPUT, e);
}
}
use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.
the class ReferenceFileSource method getAllReferenceBases.
public Map<String, ReferenceBases> getAllReferenceBases() throws IOException {
try (ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(referencePath))) {
Map<String, ReferenceBases> bases = new LinkedHashMap<>();
ReferenceSequence seq;
while ((seq = referenceSequenceFile.nextSequence()) != null) {
String name = seq.getName();
bases.put(name, new ReferenceBases(seq.getBases(), new SimpleInterval(name, 1, seq.length())));
}
return bases;
}
}
use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.
the class ReferenceHadoopSource method getReferenceBases.
@Override
public ReferenceBases getReferenceBases(final PipelineOptions pipelineOptions, final SimpleInterval interval) {
ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(IOUtils.getPath(referencePath));
ReferenceSequence sequence = referenceSequenceFile.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
return new ReferenceBases(sequence.getBases(), interval);
}
use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.
the class ReorderSam method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
IOUtil.assertFileIsWritable(OUTPUT);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SAMSequenceDictionary refDict = reference.getSequenceDictionary();
if (refDict == null) {
CloserUtil.close(in);
throw new UserException("No reference sequence dictionary found. Aborting. " + "You can create a sequence dictionary for the reference fasta using CreateSequenceDictionary.jar.");
}
printDictionary("SAM/BAM file", in.getFileHeader().getSequenceDictionary());
printDictionary("Reference", refDict);
Map<Integer, Integer> newOrder = buildSequenceDictionaryMap(refDict, in.getFileHeader().getSequenceDictionary());
// has to be after we create the newOrder
SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(in.getFileHeader());
outHeader.setSequenceDictionary(refDict);
logger.info("Writing reads...");
if (in.hasIndex()) {
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, true)) {
// write the reads in contig order
for (final SAMSequenceRecord contig : refDict.getSequences()) {
final SAMRecordIterator it = in.query(contig.getSequenceName(), 0, 0, false);
writeReads(out, it, newOrder, contig.getSequenceName());
}
// don't forget the unmapped reads
writeReads(out, in.queryUnmapped(), newOrder, "unmapped");
}
} else {
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, false)) {
writeReads(out, in.iterator(), newOrder, "All reads");
}
}
// cleanup
CloserUtil.close(in);
return null;
}
Aggregations