use of htsjdk.samtools.SamReaderFactory in project jvarkit by lindenb.
the class Biostar103303 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.gtfuri == null) {
LOG.error("GTF input misssing");
return -1;
}
SamReader samReader = null;
SAMRecordIterator iter = null;
PrintWriter out = null;
try {
out = super.openFileOrStdoutAsPrintWriter(outputFile);
SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
if (args.isEmpty()) {
LOG.info("Reading sfomr stdin");
samReader = srf.open(SamInputResource.of(stdin()));
} else if (args.isEmpty()) {
final File filename = new File(args.get(0));
LOG.info("Reading from " + filename);
samReader = srf.open(filename);
} else {
LOG.error("Illegal number of arguments.");
return -1;
}
this.readGTF(gtfuri, samReader.getFileHeader().getSequenceDictionary());
if (this.exonMap.isEmpty()) {
LOG.error("no exon found");
return -1;
}
iter = samReader.iterator();
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(samReader.getFileHeader().getSequenceDictionary());
while (iter.hasNext()) {
SAMRecord rec = iter.next();
progress.watch(rec);
if (rec.getReadUnmappedFlag())
continue;
if (rec.getReadFailsVendorQualityCheckFlag())
continue;
Cigar cigar = rec.getCigar();
if (cigar == null)
continue;
for (List<GTFGene.Exon> L : this.exonMap.getOverlapping(new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd()))) {
for (GTFGene.Exon exon : L) {
boolean found_in_prev = false;
boolean found_in_next = false;
boolean found_in_curr = false;
List<GTFGene.Exon> prev = exon.getPrev();
List<GTFGene.Exon> next = exon.getNext();
int refPos = rec.getAlignmentStart();
for (CigarElement ce : cigar.getCigarElements()) {
switch(ce.getOperator()) {
case M:
case X:
case EQ:
{
for (int i = 0; i < ce.getLength(); ++i) {
for (GTFGene.Exon ex2 : prev) {
if (ex2.contains(refPos)) {
found_in_prev = true;
}
}
for (GTFGene.Exon ex2 : next) {
if (ex2.contains(refPos)) {
found_in_next = true;
}
}
if (exon.contains(refPos)) {
found_in_curr = true;
}
refPos++;
}
break;
}
default:
{
if (ce.getOperator().consumesReferenceBases()) {
refPos += ce.getLength();
}
break;
}
}
}
if (found_in_prev && found_in_next && !found_in_curr) {
exon.count_prev_and_next++;
} else if (found_in_prev && !found_in_next && found_in_curr) {
exon.count_prev_and_curr++;
} else if (!found_in_prev && found_in_next && found_in_curr) {
exon.count_curr_and_next++;
} else if (!found_in_prev && !found_in_next && found_in_curr) {
exon.count_curr_only++;
} else if (!found_in_curr && !found_in_next && !found_in_prev) {
// ??
} else {
exon.count_others++;
}
}
}
}
progress.finish();
out.print("#chrom");
out.print("\t");
out.print("exon.start");
out.print("\t");
out.print("exon.end");
out.print("\t");
out.print("exon.exon_id");
out.print("\t");
out.print("exon.index5_3");
out.print("\t");
out.print("transcript_id");
out.print("\t");
out.print("gene_name");
out.print("\t");
out.print("gene_id");
out.print("\t");
out.print("exon.count_prev_and_next");
out.print("\t");
out.print("exon.count_prev_and_curr");
out.print("\t");
out.print("exon.count_curr_and_next");
out.print("\t");
out.print("exon.count_curr_only");
out.print("\t");
out.print("exon.count_others");
out.println();
for (List<GTFGene.Exon> L : exonMap.values()) {
for (GTFGene.Exon exon : L) {
out.print(exon.getGene().chrom);
out.print("\t");
out.print(exon.start);
out.print("\t");
out.print(exon.end);
out.print("\t");
out.print(notnull(exon.exon_id));
out.print("\t");
out.print("" + (exon.index + 1) + "/" + exon.getGene().exons.size());
out.print("\t");
out.print(exon.getGene().transcript_id);
out.print("\t");
out.print(notnull(exon.getGene().gene_name));
out.print("\t");
out.print(notnull(exon.getGene().gene_id));
out.print("\t");
out.print(exon.count_prev_and_next);
out.print("\t");
out.print(exon.count_prev_and_curr);
out.print("\t");
out.print(exon.count_curr_and_next);
out.print("\t");
out.print(exon.count_curr_only);
out.print("\t");
out.print(exon.count_others);
out.println();
}
}
out.flush();
return 0;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(samReader);
CloserUtil.close(out);
}
}
use of htsjdk.samtools.SamReaderFactory in project jvarkit by lindenb.
the class LowResBam2Raster method doWork.
@Override
public int doWork(final List<String> args) {
if (this.regionStr == null) {
LOG.error("Region was not defined.");
return -1;
}
if (this.WIDTH < 100) {
LOG.info("adjusting WIDTH to 100");
this.WIDTH = 100;
}
if (this.gcWinSize <= 0) {
LOG.info("adjusting GC win size to 5");
this.gcWinSize = 5;
}
SamReader samFileReader = null;
try {
if (this.referenceFile != null) {
LOG.info("loading reference");
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.referenceFile);
}
final SamReaderFactory srf = super.createSamReaderFactory();
this.interval = new IntervalParser(this.indexedFastaSequenceFile == null ? null : this.indexedFastaSequenceFile.getSequenceDictionary()).parse(this.regionStr);
if (this.interval == null) {
LOG.error("Cannot parse interval " + regionStr + " or chrom doesn't exists in sam dictionary.");
return -1;
}
LOG.info("Interval is " + this.interval);
loadVCFs();
if (this.knownGeneUrl != null) {
IntervalTreeMap<List<KnownGene>> map = KnownGene.loadUriAsIntervalTreeMap(this.knownGeneUrl, (KG) -> (KG.getContig().equals(this.interval.getContig()) && !(KG.getEnd() < this.interval.getStart() || KG.getStart() + 1 > this.interval.getEnd())));
this.knownGenes.addAll(map.values().stream().flatMap(L -> L.stream()).collect(Collectors.toList()));
}
for (final String bamFile : IOUtils.unrollFiles(args)) {
samFileReader = srf.open(SamInputResource.of(bamFile));
final SAMFileHeader header = samFileReader.getFileHeader();
final SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict == null) {
LOG.error("no dict in " + bamFile);
return -1;
}
if (dict.getSequence(this.interval.getContig()) == null) {
LOG.error("no such chromosome in " + bamFile + " " + this.interval);
return -1;
}
scan(samFileReader);
samFileReader.close();
samFileReader = null;
}
if (this.key2partition.isEmpty()) {
LOG.error("No data was found. no Read-Group specified ? no data in that region ?");
return -1;
}
this.key2partition.values().stream().forEach(P -> P.make());
saveImages(this.key2partition.values().stream().map(P -> P.image).collect(Collectors.toList()));
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(samFileReader);
}
}
Aggregations