use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class ExtendReferenceWithReads method scanRegion.
/**
*scanRegion
* @param contig Reference sequence of interest.
* @param start 0-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
* @param end 0-based, exclusive end of interval of interest. Zero implies end of the reference sequence.
*/
private Map<Integer, Counter<Byte>> scanRegion(final SAMSequenceRecord contig, final int chromStart, final int chromEnd, final Rescue rescue) {
final Map<Integer, Counter<Byte>> pos2bases = new HashMap<>(1 + chromEnd - chromStart);
LOG.info("Scanning :" + contig.getSequenceName() + ":" + chromStart + "-" + chromEnd);
for (int side = 0; side < 2; ++side) {
if (// 5' or 3'
!rescue.equals(Rescue.CENTER) && side > 0) {
// already done
break;
}
for (final SamReader samReader : samReaders) {
final SAMSequenceDictionary dict2 = samReader.getFileHeader().getSequenceDictionary();
final SAMSequenceRecord ssr2 = dict2.getSequence(contig.getSequenceName());
if (ssr2 == null || ssr2.getSequenceLength() != contig.getSequenceLength()) {
LOG.info("No contig " + contig.getSequenceName() + " with L=" + contig.getSequenceLength() + " bases in " + samReader.getResourceDescription());
continue;
}
int mappedPos = -1;
switch(rescue) {
case LEFT:
mappedPos = 1;
break;
case RIGHT:
mappedPos = contig.getSequenceLength();
break;
case CENTER:
mappedPos = (side == 0 ? chromStart + 1 : chromEnd);
break;
default:
throw new IllegalStateException();
}
final SAMRecordIterator iter = samReader.query(contig.getSequenceName(), mappedPos, mappedPos, false);
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (rec.getReadUnmappedFlag())
continue;
if (this.filter.filterOut(rec))
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null)
continue;
final byte[] bases = rec.getReadBases();
if (bases == null || bases.length == 0)
continue;
int refPos1 = rec.getUnclippedStart();
int readpos = 0;
for (final CigarElement ce : cigar) {
final CigarOperator op = ce.getOperator();
for (int L = 0; L < ce.getLength(); ++L) {
if (op.consumesReadBases()) {
Counter<Byte> count = pos2bases.get(refPos1 - 1);
if (count == null) {
count = new Counter<>();
pos2bases.put(refPos1 - 1, count);
}
count.incr((byte) Character.toLowerCase(bases[readpos]));
readpos++;
}
if (op.consumesReferenceBases()) {
refPos1++;
}
}
}
}
iter.close();
}
}
return pos2bases;
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class ExtendReferenceWithReads method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null) {
LOG.error("No REF defined");
return -1;
}
this.samReaders.clear();
PrintStream out = null;
try {
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
SAMSequenceDictionary dict = this.indexedFastaSequenceFile.getSequenceDictionary();
if (dict == null) {
LOG.error("Reference file is missing a sequence dictionary (use picard)");
return -1;
}
final SamReaderFactory srf = super.createSamReaderFactory();
srf.setOption(Option.CACHE_FILE_BASED_INDEXES, true);
for (final String uri : IOUtils.unrollFiles(args)) {
LOG.info("opening BAM " + uri);
final SamReader sr = srf.open(SamInputResource.of(uri));
/* doesn't work with remote ?? */
if (!sr.hasIndex()) {
LOG.error("file " + uri + " is not indexed");
return -1;
}
final SAMFileHeader header = sr.getFileHeader();
if (!header.getSortOrder().equals(SortOrder.coordinate)) {
LOG.error("file " + uri + " not sorted on coordinate but " + header.getSortOrder());
return -1;
}
final SAMSequenceDictionary dict2 = header.getSequenceDictionary();
if (dict2 == null) {
LOG.error("file " + uri + " needs a sequence dictionary");
return -1;
}
samReaders.add(sr);
}
if (samReaders.isEmpty()) {
LOG.error("No BAM defined");
return -1;
}
out = super.openFileOrStdoutAsPrintStream(this.outputFile);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
for (final SAMSequenceRecord ssr : dict.getSequences()) {
final GenomicSequence genomic = new GenomicSequence(this.indexedFastaSequenceFile, ssr.getSequenceName());
int chromStart = 0;
int nPrinted = 0;
out.print(">");
out.print(ssr.getSequenceName());
for (final Rescue side : Rescue.values()) {
switch(side) {
case LEFT:
/* look before position 0 of chromosome */
{
final Map<Integer, Counter<Byte>> pos2bases = scanRegion(ssr, -1, -1, side);
int newstart = 0;
for (final Integer pos : pos2bases.keySet()) {
if (pos >= 0)
continue;
newstart = Math.min(newstart, pos);
}
while (newstart < 0) {
final Counter<Byte> count = pos2bases.get(newstart);
if (nPrinted % 60 == 0)
out.println();
out.print(consensus(count));
newstart++;
++nPrinted;
}
break;
}
case RIGHT:
/* look after position > length(chromosome) */
{
final Map<Integer, Counter<Byte>> pos2bases = this.scanRegion(ssr, -1, -1, side);
int newend = ssr.getSequenceLength();
for (final Integer pos : pos2bases.keySet()) {
if (pos < ssr.getSequenceLength())
continue;
newend = Math.max(newend, pos + 1);
}
for (int i = ssr.getSequenceLength(); i < newend; i++) {
final Counter<Byte> count = pos2bases.get(i);
if (nPrinted % 60 == 0)
out.println();
out.print(consensus(count));
++nPrinted;
}
break;
}
case CENTER:
/* 0 to chromosome-length */
{
chromStart = 0;
while (chromStart < genomic.length()) {
final char base = Character.toUpperCase(genomic.charAt(chromStart));
if (base != 'N') {
progress.watch(ssr.getSequenceName(), chromStart);
if (nPrinted % 60 == 0)
out.println();
out.print(base);
++chromStart;
++nPrinted;
continue;
}
int chromEnd = chromStart + 1;
while (chromEnd < genomic.length() && Character.toUpperCase(genomic.charAt(chromEnd)) == 'N') {
++chromEnd;
}
if (chromEnd - chromStart < this.minLenNNNNContig) {
while (chromStart < chromEnd) {
progress.watch(ssr.getSequenceName(), chromStart);
if (nPrinted % 60 == 0)
out.println();
out.print(base);
++chromStart;
++nPrinted;
}
continue;
}
final Map<Integer, Counter<Byte>> pos2bases = scanRegion(ssr, chromStart, chromEnd, side);
while (chromStart < chromEnd) {
final Counter<Byte> count = pos2bases.get(chromStart);
if (nPrinted % 60 == 0)
out.println();
if (count == null) {
out.print('N');
} else {
out.print(consensus(count));
}
++chromStart;
++nPrinted;
continue;
}
}
break;
}
}
// end switch type
}
out.println();
}
progress.finish();
out.flush();
out.close();
return RETURN_OK;
} catch (final Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
for (final SamReader r : samReaders) CloserUtil.close(r);
samReaders.clear();
}
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class BamToFastq method doWork.
@Override
public int doWork(List<String> args) {
SamReader sfr = null;
SortingCollection<MappedFastq> fastqCollection = null;
try {
boolean found_single = false;
boolean found_paired = false;
long non_primary_alignmaned_flag = 0L;
sfr = super.openSamReader(oneFileOrNull(args));
fastqCollection = SortingCollection.newInstance(MappedFastq.class, new MappedFastqCodec(), new MappedFastqComparator(), this.maxRecordsInRam, this.tmpDir.toPath());
fastqCollection.setDestructiveIteration(true);
SAMRecordIterator iter = sfr.iterator();
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(sfr.getFileHeader().getSequenceDictionary());
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
if (rec.isSecondaryOrSupplementary()) {
if (non_primary_alignmaned_flag == 0) {
LOG.warn("SKIPPING NON-PRIMARY " + (non_primary_alignmaned_flag + 1) + " ALIGNMENTS");
}
non_primary_alignmaned_flag++;
continue;
}
MappedFastq m = new MappedFastq();
m.name = rec.getReadName();
if (m.name == null)
m.name = "";
m.seq = rec.getReadString();
if (m.seq.equals(SAMRecord.NULL_SEQUENCE_STRING))
m.seq = "";
m.qual = rec.getBaseQualityString();
if (m.qual.equals(SAMRecord.NULL_QUALS_STRING))
m.qual = "";
if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
m.seq = AcidNucleics.reverseComplement(m.seq);
m.qual = new StringBuilder(m.qual).reverse().toString();
}
if (m.seq.length() != m.qual.length()) {
LOG.error("length(seq)!=length(qual) in " + m.name);
continue;
}
if (m.seq.isEmpty() && m.qual.isEmpty()) {
m.seq = "N";
m.qual = "#";
}
if (rec.getReadPairedFlag()) {
found_paired = true;
if (found_single) {
sfr.close();
throw new RuntimeException("input is a mix of paired/singled reads");
}
m.side = (byte) (rec.getSecondOfPairFlag() ? 2 : 1);
} else {
found_single = true;
if (found_paired) {
sfr.close();
throw new RuntimeException("input is a mix of paired/singled reads");
}
m.side = (byte) 0;
}
fastqCollection.add(m);
}
iter.close();
CloserUtil.close(iter);
CloserUtil.close(sfr);
progress.finish();
fastqCollection.doneAdding();
LOG.info("Done reading.");
if (found_paired) {
FastqWriter fqw1 = null;
FastqWriter fqw2 = null;
if (forwardFile != null) {
LOG.info("Writing to " + forwardFile);
fqw1 = new BasicFastqWriter(forwardFile);
} else {
LOG.info("Writing to stdout");
fqw1 = new BasicFastqWriter(new PrintStream(stdout()));
}
if (reverseFile != null) {
LOG.info("Writing to " + reverseFile);
fqw2 = new BasicFastqWriter(reverseFile);
} else {
LOG.info("Writing to interlaced stdout");
fqw2 = fqw1;
}
List<MappedFastq> row = new ArrayList<MappedFastq>();
CloseableIterator<MappedFastq> r = fastqCollection.iterator();
for (; ; ) {
MappedFastq curr = null;
if (r.hasNext())
curr = r.next();
if (curr == null || (!row.isEmpty() && !row.get(0).name.equals(curr.name))) {
if (!row.isEmpty()) {
if (row.size() > 2) {
LOG.warn("WTF :" + row);
}
boolean found_F = false;
boolean found_R = false;
for (MappedFastq m : row) {
switch((int) m.side) {
case 1:
if (found_F)
throw new RuntimeException("two forward reads found for " + row.get(0).name);
found_F = true;
echo(fqw1, m);
break;
case 2:
if (found_R)
throw new RuntimeException("two reverse reads found for " + row.get(0).name);
found_R = true;
echo(fqw2, m);
break;
default:
throw new IllegalStateException("uh???");
}
}
if (!found_F) {
if (this.repair_missing_read) {
LOG.warn("forward not found for " + row.get(0));
MappedFastq pad = new MappedFastq();
pad.side = (byte) 1;
pad.name = row.get(0).name;
pad.seq = "N";
pad.qual = "#";
echo(fqw1, pad);
} else {
throw new RuntimeException("forward not found for " + row);
}
}
if (!found_R) {
if (repair_missing_read) {
LOG.warn("reverse not found for " + row.get(0));
MappedFastq pad = new MappedFastq();
pad.side = (byte) 2;
pad.name = row.get(0).name;
pad.seq = "N";
pad.qual = "#";
echo(fqw2, pad);
} else {
throw new RuntimeException("reverse not found for " + row);
}
}
}
if (curr == null)
break;
row.clear();
}
row.add(curr);
}
r.close();
fqw1.close();
fqw2.close();
} else if (found_single) {
FastqWriter fqw1 = null;
if (forwardFile != null) {
LOG.info("Writing to " + forwardFile);
fqw1 = new BasicFastqWriter(forwardFile);
} else {
LOG.info("Writing to stdout");
fqw1 = new BasicFastqWriter(new PrintStream(stdout()));
}
final CloseableIterator<MappedFastq> r = fastqCollection.iterator();
while (r.hasNext()) {
echo(fqw1, r.next());
}
r.close();
fqw1.close();
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
if (fastqCollection != null)
fastqCollection.cleanup();
}
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class SamScanSplitReads method doWork.
@Override
public int doWork(final List<String> args) {
SamReader r = null;
SAMSequenceDictionary dic = null;
final Set<String> sampleNames = new TreeSet<>();
try {
if (args.isEmpty()) {
LOG.info("read stdin");
r = openSamReader(null);
sampleNames.addAll(samples(r.getFileHeader()));
dic = r.getFileHeader().getSequenceDictionary();
if (dic == null) {
LOG.error("SAM input is missing a dictionary");
return -1;
}
scanFile(r);
r.close();
r = null;
} else
for (final String filename : args) {
LOG.info("read " + filename);
r = openSamReader(filename);
sampleNames.addAll(samples(r.getFileHeader()));
final SAMSequenceDictionary dict2 = r.getFileHeader().getSequenceDictionary();
if (dict2 == null) {
LOG.error("SAM input is missing a dictionary");
return -1;
} else if (dic == null) {
dic = dict2;
} else if (!SequenceUtil.areSequenceDictionariesEqual(dic, dict2)) {
LOG.error("incompatibles sequences dictionaries");
return -1;
}
scanFile(r);
r.close();
r = null;
}
if ("vcf".equalsIgnoreCase(this.outputFormat) || (this.outputFile != null && (this.outputFile.getName().endsWith(".vcf") || this.outputFile.getName().endsWith(".vcf.gz")))) {
saveAsVcf(sampleNames, dic);
} else {
saveAsText();
}
return 0;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(r);
}
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class TestNg01 method assertIsBam.
private static void assertIsBam(final File f) {
boolean b = false;
try {
final SamReader r = SamReaderFactory.makeDefault().open(f);
r.iterator().stream().forEach(R -> R.getReadName());
r.close();
b = true;
} catch (final Exception e) {
b = false;
}
Assert.assertTrue(b, "file " + f + " should be bam.");
}
Aggregations