use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.
the class SplitReadRealigner method createSupplementaryAlignments.
public void createSupplementaryAlignments(StreamingAligner aligner, File input, File output) throws IOException {
SplitReadFastqExtractor rootExtractor = new SplitReadFastqExtractor(false, minSoftClipLength, minSoftClipQuality, isProcessSecondaryAlignments(), eidgen);
SplitReadFastqExtractor recursiveExtractor = new SplitReadFastqExtractor(true, minSoftClipLength, minSoftClipQuality, false, eidgen);
Map<String, SplitReadRealignmentInfo> realignments = new HashMap<>();
try (SamReader reader = readerFactory.open(input)) {
SAMFileHeader header = reader.getFileHeader().clone();
header.setSortOrder(SortOrder.unsorted);
try (SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(header, true, output)) {
try (AsyncBufferedIterator<SAMRecord> bufferedIt = new AsyncBufferedIterator<>(reader.iterator(), input.getName())) {
while (bufferedIt.hasNext()) {
SAMRecord r = bufferedIt.next();
processInputRecord(aligner, rootExtractor, realignments, writer, r);
while (aligner.hasAlignmentRecord()) {
processAlignmentRecord(aligner, recursiveExtractor, realignments, writer);
}
}
// flush out all realignments
aligner.flush();
while (aligner.hasAlignmentRecord()) {
// perform nested realignment
while (aligner.hasAlignmentRecord()) {
processAlignmentRecord(aligner, recursiveExtractor, realignments, writer);
}
aligner.flush();
}
}
}
}
assert (realignments.size() == 0);
}
use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.
the class SmithWatermanFastqAligner method align.
@Override
public void align(File fastq, File output, File reference, int threads) throws IOException {
try (ReferenceSequenceFile ref = new IndexedFastaSequenceFile(reference)) {
SAMFileHeader header = new SAMFileHeader();
header.setSequenceDictionary(ref.getSequenceDictionary());
byte[] bases = ref.getSequence(ref.getSequenceDictionary().getSequence(referenceIndex).getSequenceName()).getBases();
try (FastqReader reader = new FastqReader(fastq)) {
try (SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, output)) {
for (FastqRecord fqr : reader) {
Alignment aln = aligner.align_smith_waterman(fqr.getReadString().getBytes(), bases);
SAMRecord r = new SAMRecord(header);
r.setReadName(fqr.getReadName());
r.setReferenceIndex(referenceIndex);
r.setAlignmentStart(aln.getStartPosition() + 1);
r.setCigarString(aln.getCigar());
r.setReadBases(fqr.getReadString().getBytes());
r.setBaseQualities(SAMUtils.fastqToPhred(fqr.getBaseQualityString()));
writer.addAlignment(r);
}
}
}
}
}
use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.
the class ComputeSamTags method doWork.
@Override
protected int doWork() {
log.debug("Setting language-neutral locale");
java.util.Locale.setDefault(Locale.ROOT);
validateParameters();
SamReaderFactory readerFactory = SamReaderFactory.make();
SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
try {
try (SamReader reader = readerFactory.open(INPUT)) {
SAMFileHeader header = reader.getFileHeader();
if (!ASSUME_SORTED) {
if (header.getSortOrder() != SortOrder.queryname) {
log.error("INPUT is not sorted by queryname. " + "ComputeSamTags requires that reads with the same name be sorted together. " + "If the input file satisfies this constraint (the output from many aligners do)," + " this check can be disabled with the ASSUME_SORTED option.");
return -1;
}
}
try (SAMRecordIterator it = reader.iterator()) {
File tmpoutput = gridss.Defaults.OUTPUT_TO_TEMP_FILE ? FileSystemContext.getWorkingFileFor(OUTPUT, "gridss.tmp.ComputeSamTags.") : OUTPUT;
try (SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(header, true, tmpoutput)) {
compute(it, writer, getReference(), TAGS, SOFTEN_HARD_CLIPS, FIX_MATE_INFORMATION, RECALCULATE_SA_SUPPLEMENTARY, INPUT.getName() + "-");
}
if (tmpoutput != OUTPUT) {
FileHelper.move(tmpoutput, OUTPUT, true);
}
}
}
} catch (IOException e) {
log.error(e);
return -1;
}
return 0;
}
use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.
the class AssemblyEvidenceSource method assembleChunk.
private void assembleChunk(File output, int chunkNumber, QueryInterval[] qi) throws IOException {
AssemblyIdGenerator assemblyNameGenerator = new SequentialIdGenerator(String.format("asm%d-", chunkNumber));
String chuckName = String.format("chunk %d (%s:%d-%s:%d)", chunkNumber, getContext().getDictionary().getSequence(qi[0].referenceIndex).getSequenceName(), qi[0].start, getContext().getDictionary().getSequence(qi[qi.length - 1].referenceIndex).getSequenceName(), qi[qi.length - 1].end);
log.info(String.format("Starting assembly on %s", chuckName));
Stopwatch timer = Stopwatch.createStarted();
SAMFileHeader header = getContext().getBasicSamHeader();
// TODO: add assembly @PG header
File filteredout = FileSystemContext.getWorkingFileFor(output, "filtered.");
File tmpout = FileSystemContext.getWorkingFileFor(output, "gridss.tmp.");
try (SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, tmpout)) {
if (getContext().getAssemblyParameters().writeFiltered) {
try (SAMFileWriter filteredWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, filteredout)) {
for (BreakendDirection direction : BreakendDirection.values()) {
assembleChunk(writer, filteredWriter, chunkNumber, qi, direction, assemblyNameGenerator);
}
}
} else {
for (BreakendDirection direction : BreakendDirection.values()) {
assembleChunk(writer, null, chunkNumber, qi, direction, assemblyNameGenerator);
}
}
} catch (Exception e) {
log.error(e, "Error assembling ", chuckName);
if (getContext().getConfig().terminateOnFirstError) {
System.exit(1);
}
throw e;
} finally {
timer.stop();
log.info(String.format("Completed assembly on %s in %ds (%s)", chuckName, timer.elapsed(TimeUnit.SECONDS), timer.toString()));
}
SAMFileUtil.sort(getContext().getFileSystemContext(), tmpout, output, SortOrder.coordinate);
if (gridss.Defaults.DELETE_TEMPORARY_FILES) {
tmpout.delete();
filteredout.delete();
}
if (gridss.Defaults.DEFENSIVE_GC) {
log.debug("Requesting defensive GC to ensure OS file handles are closed");
System.gc();
System.runFinalization();
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class Biostar154220 method doWork.
@SuppressWarnings("resource")
private int doWork(final SamReader in) throws IOException {
SAMFileHeader header = in.getFileHeader();
if (header.getSortOrder() != SAMFileHeader.SortOrder.unsorted) {
LOG.error("input should be unsorted, reads sorted on REF/query-name e.g: see https://github.com/lindenb/jvarkit/wiki/SortSamRefName");
return -1;
}
SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict == null) {
LOG.error("no dict !");
return -1;
}
SAMFileWriter out = null;
SAMRecordIterator iter = null;
int prev_tid = -1;
int[] depth_array = null;
try {
SAMFileHeader header2 = header.clone();
header2.addComment("Biostar154220" + " " + getVersion() + " " + getProgramCommandLine());
out = this.writingBams.openSAMFileWriter(outputFile, header2, true);
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
iter = in.iterator();
List<SAMRecord> buffer = new ArrayList<>();
for (; ; ) {
SAMRecord rec = null;
if (iter.hasNext()) {
rec = progress.watch(iter.next());
}
if (rec != null && rec.getReadUnmappedFlag()) {
out.addAlignment(rec);
continue;
}
// no more record or
if (!buffer.isEmpty() && rec != null && buffer.get(0).getReadName().equals(rec.getReadName()) && buffer.get(0).getReferenceIndex().equals(rec.getReferenceIndex())) {
buffer.add(progress.watch(rec));
} else if (buffer.isEmpty() && rec != null) {
buffer.add(progress.watch(rec));
} else // dump buffer
{
if (!buffer.isEmpty()) {
final int tid = buffer.get(0).getReferenceIndex();
if (prev_tid == -1 || prev_tid != tid) {
SAMSequenceRecord ssr = dict.getSequence(tid);
prev_tid = tid;
depth_array = null;
System.gc();
LOG.info("Alloc memory for contig " + ssr.getSequenceName() + " N=" + ssr.getSequenceLength() + "*sizeof(int)");
// use a +1 pos
depth_array = new int[ssr.getSequenceLength() + 1];
Arrays.fill(depth_array, 0);
}
// position->coverage for this set of reads
Counter<Integer> readposition2coverage = new Counter<Integer>();
boolean dump_this_buffer = true;
for (final SAMRecord sr : buffer) {
if (!dump_this_buffer)
break;
if (this.filter.filterOut(sr))
continue;
final Cigar cigar = sr.getCigar();
if (cigar == null) {
throw new IOException("Cigar missing in " + rec.getSAMString());
}
int refPos1 = sr.getAlignmentStart();
for (final CigarElement ce : cigar.getCigarElements()) {
final CigarOperator op = ce.getOperator();
if (!op.consumesReferenceBases())
continue;
if (op.consumesReadBases()) {
for (int x = 0; x < ce.getLength() && refPos1 + x < depth_array.length; ++x) {
int cov = (int) readposition2coverage.incr(refPos1 + x);
if (depth_array[refPos1 + x] + cov > this.capDepth) {
dump_this_buffer = false;
break;
}
}
}
if (!dump_this_buffer)
break;
refPos1 += ce.getLength();
}
}
if (dump_this_buffer) {
// consumme this coverage
for (Integer pos : readposition2coverage.keySet()) {
depth_array[pos] += (int) readposition2coverage.count(pos);
}
for (SAMRecord sr : buffer) {
out.addAlignment(sr);
}
}
buffer.clear();
}
if (rec == null)
break;
buffer.add(rec);
}
}
depth_array = null;
progress.finish();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(out);
}
}
Aggregations