use of htsjdk.samtools.SamReader in project gridss by PapenfussLab.
the class SplitReadRealigner method createSupplementaryAlignments.
public void createSupplementaryAlignments(StreamingAligner aligner, File input, File output) throws IOException {
SplitReadFastqExtractor rootExtractor = new SplitReadFastqExtractor(false, minSoftClipLength, minSoftClipQuality, isProcessSecondaryAlignments(), eidgen);
SplitReadFastqExtractor recursiveExtractor = new SplitReadFastqExtractor(true, minSoftClipLength, minSoftClipQuality, false, eidgen);
Map<String, SplitReadRealignmentInfo> realignments = new HashMap<>();
try (SamReader reader = readerFactory.open(input)) {
SAMFileHeader header = reader.getFileHeader().clone();
header.setSortOrder(SortOrder.unsorted);
try (SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(header, true, output)) {
try (AsyncBufferedIterator<SAMRecord> bufferedIt = new AsyncBufferedIterator<>(reader.iterator(), input.getName())) {
while (bufferedIt.hasNext()) {
SAMRecord r = bufferedIt.next();
processInputRecord(aligner, rootExtractor, realignments, writer, r);
while (aligner.hasAlignmentRecord()) {
processAlignmentRecord(aligner, recursiveExtractor, realignments, writer);
}
}
// flush out all realignments
aligner.flush();
while (aligner.hasAlignmentRecord()) {
// perform nested realignment
while (aligner.hasAlignmentRecord()) {
processAlignmentRecord(aligner, recursiveExtractor, realignments, writer);
}
aligner.flush();
}
}
}
}
assert (realignments.size() == 0);
}
use of htsjdk.samtools.SamReader in project gridss by PapenfussLab.
the class ComputeSamTags method doWork.
@Override
protected int doWork() {
log.debug("Setting language-neutral locale");
java.util.Locale.setDefault(Locale.ROOT);
validateParameters();
SamReaderFactory readerFactory = SamReaderFactory.make();
SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
try {
try (SamReader reader = readerFactory.open(INPUT)) {
SAMFileHeader header = reader.getFileHeader();
if (!ASSUME_SORTED) {
if (header.getSortOrder() != SortOrder.queryname) {
log.error("INPUT is not sorted by queryname. " + "ComputeSamTags requires that reads with the same name be sorted together. " + "If the input file satisfies this constraint (the output from many aligners do)," + " this check can be disabled with the ASSUME_SORTED option.");
return -1;
}
}
try (SAMRecordIterator it = reader.iterator()) {
File tmpoutput = gridss.Defaults.OUTPUT_TO_TEMP_FILE ? FileSystemContext.getWorkingFileFor(OUTPUT, "gridss.tmp.ComputeSamTags.") : OUTPUT;
try (SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(header, true, tmpoutput)) {
compute(it, writer, getReference(), TAGS, SOFTEN_HARD_CLIPS, FIX_MATE_INFORMATION, RECALCULATE_SA_SUPPLEMENTARY, INPUT.getName() + "-");
}
if (tmpoutput != OUTPUT) {
FileHelper.move(tmpoutput, OUTPUT, true);
}
}
}
} catch (IOException e) {
log.error(e);
return -1;
}
return 0;
}
use of htsjdk.samtools.SamReader in project gridss by PapenfussLab.
the class ByReadNameSinglePassSamProgram method makeItSo.
public static void makeItSo(final File input, final File referenceSequence, final boolean assumeSorted, final long stopAfter, final Collection<ByReadNameSinglePassSamProgram> programs) throws FileNotFoundException {
// Setup the standard inputs
IOUtil.assertFileIsReadable(input);
SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
// Optionally load up the reference sequence and double check sequence dictionaries
final ReferenceLookup lookup;
if (referenceSequence == null) {
lookup = null;
} else {
IOUtil.assertFileIsReadable(referenceSequence);
lookup = new TwoBitBufferedReferenceSequenceFile(new IndexedFastaSequenceFile(referenceSequence));
if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), lookup.getSequenceDictionary());
}
}
// Check on the sort order of the BAM file
final SortOrder sort = in.getFileHeader().getSortOrder();
if (sort != SortOrder.queryname) {
if (assumeSorted) {
log.warn("File reports sort order '" + sort + "', assuming it's queryname sorted anyway.");
} else {
throw new PicardException("File " + input.getAbsolutePath() + " should be queryname sorted but " + "the header says the sort order is " + sort + ". If you believe the file " + "to be queryname sorted you may pass ASSUME_SORTED=true");
}
}
for (final ByReadNameSinglePassSamProgram program : programs) {
program.setReference(lookup);
program.setup(in.getFileHeader(), input);
}
final ProgressLogger progress = new ProgressLogger(log);
final SAMRecordIterator rawit = in.iterator();
final CloseableIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(rawit, "ByReadNameSinglePassSamProgram " + input.getName());
try {
List<SAMRecord> currentRecords = new ArrayList<>();
String currentReadName = null;
while (it.hasNext()) {
SAMRecord r = it.next();
String readname = r.getReadName();
// if read name we have to just treat it as a single read
if (readname == null || !readname.equals(currentReadName)) {
if (currentRecords.size() > 0) {
for (final ByReadNameSinglePassSamProgram program : programs) {
program.acceptFragment(currentRecords, lookup);
}
}
currentRecords.clear();
currentReadName = readname;
if (stopAfter > 0 && progress.getCount() >= stopAfter) {
break;
}
}
currentRecords.add(r);
progress.record(r);
}
if (currentRecords.size() > 0) {
for (final ByReadNameSinglePassSamProgram program : programs) {
program.acceptFragment(currentRecords, lookup);
}
}
} finally {
CloserUtil.close(it);
CloserUtil.close(rawit);
CloserUtil.close(in);
}
for (final ByReadNameSinglePassSamProgram program : programs) {
program.finish();
}
}
use of htsjdk.samtools.SamReader in project gridss by PapenfussLab.
the class MultipleSamFileCommandLineProgram method getCategoryLabelFor.
private String getCategoryLabelFor(File file) {
String label = file.getName();
if (file.exists() && getContext().getConfig().useReadGroupSampleNameCategoryLabel) {
try (SamReader reader = SamReaderFactory.makeDefault().open(file)) {
SAMFileHeader header = reader.getFileHeader();
if (header.getReadGroups().size() == 1) {
String sampleName = header.getReadGroups().get(0).getSample();
if (sampleName != null && sampleName.length() > 0) {
label = sampleName;
log.info(String.format("Using Read Group sample name %s for file %s", label, file));
}
}
} catch (IOException e) {
log.debug(e, "Unable to load read group headers for ", file);
}
}
return label;
}
use of htsjdk.samtools.SamReader in project gridss by PapenfussLab.
the class MultipleSamFileCommandLineProgram method ensureDictionariesMatch.
public void ensureDictionariesMatch() throws IOException {
ReferenceSequenceFile ref = null;
try {
ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SAMSequenceDictionary dictionary = ref.getSequenceDictionary();
final SamReaderFactory samFactory = SamReaderFactory.makeDefault();
for (File f : INPUT) {
SamReader reader = null;
try {
reader = samFactory.open(f);
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), dictionary, f, REFERENCE_SEQUENCE);
} catch (htsjdk.samtools.util.SequenceUtil.SequenceListsDifferException e) {
log.error("Reference genome used by ", f, " does not match reference genome ", REFERENCE_SEQUENCE, ". ", "The reference supplied must match the reference used for every input.");
throw e;
} finally {
if (reader != null)
reader.close();
}
}
} finally {
if (ref != null)
ref.close();
}
}
Aggregations