Search in sources :

Example 56 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project jvarkit by lindenb.

the class Biostar103303 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.gtfuri == null) {
        LOG.error("GTF input misssing");
        return -1;
    }
    SamReader samReader = null;
    SAMRecordIterator iter = null;
    PrintWriter out = null;
    try {
        out = super.openFileOrStdoutAsPrintWriter(outputFile);
        SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
        if (args.isEmpty()) {
            LOG.info("Reading sfomr stdin");
            samReader = srf.open(SamInputResource.of(stdin()));
        } else if (args.isEmpty()) {
            final File filename = new File(args.get(0));
            LOG.info("Reading from " + filename);
            samReader = srf.open(filename);
        } else {
            LOG.error("Illegal number of arguments.");
            return -1;
        }
        this.readGTF(gtfuri, samReader.getFileHeader().getSequenceDictionary());
        if (this.exonMap.isEmpty()) {
            LOG.error("no exon found");
            return -1;
        }
        iter = samReader.iterator();
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(samReader.getFileHeader().getSequenceDictionary());
        while (iter.hasNext()) {
            SAMRecord rec = iter.next();
            progress.watch(rec);
            if (rec.getReadUnmappedFlag())
                continue;
            if (rec.getReadFailsVendorQualityCheckFlag())
                continue;
            Cigar cigar = rec.getCigar();
            if (cigar == null)
                continue;
            for (List<GTFGene.Exon> L : this.exonMap.getOverlapping(new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd()))) {
                for (GTFGene.Exon exon : L) {
                    boolean found_in_prev = false;
                    boolean found_in_next = false;
                    boolean found_in_curr = false;
                    List<GTFGene.Exon> prev = exon.getPrev();
                    List<GTFGene.Exon> next = exon.getNext();
                    int refPos = rec.getAlignmentStart();
                    for (CigarElement ce : cigar.getCigarElements()) {
                        switch(ce.getOperator()) {
                            case M:
                            case X:
                            case EQ:
                                {
                                    for (int i = 0; i < ce.getLength(); ++i) {
                                        for (GTFGene.Exon ex2 : prev) {
                                            if (ex2.contains(refPos)) {
                                                found_in_prev = true;
                                            }
                                        }
                                        for (GTFGene.Exon ex2 : next) {
                                            if (ex2.contains(refPos)) {
                                                found_in_next = true;
                                            }
                                        }
                                        if (exon.contains(refPos)) {
                                            found_in_curr = true;
                                        }
                                        refPos++;
                                    }
                                    break;
                                }
                            default:
                                {
                                    if (ce.getOperator().consumesReferenceBases()) {
                                        refPos += ce.getLength();
                                    }
                                    break;
                                }
                        }
                    }
                    if (found_in_prev && found_in_next && !found_in_curr) {
                        exon.count_prev_and_next++;
                    } else if (found_in_prev && !found_in_next && found_in_curr) {
                        exon.count_prev_and_curr++;
                    } else if (!found_in_prev && found_in_next && found_in_curr) {
                        exon.count_curr_and_next++;
                    } else if (!found_in_prev && !found_in_next && found_in_curr) {
                        exon.count_curr_only++;
                    } else if (!found_in_curr && !found_in_next && !found_in_prev) {
                    // ??
                    } else {
                        exon.count_others++;
                    }
                }
            }
        }
        progress.finish();
        out.print("#chrom");
        out.print("\t");
        out.print("exon.start");
        out.print("\t");
        out.print("exon.end");
        out.print("\t");
        out.print("exon.exon_id");
        out.print("\t");
        out.print("exon.index5_3");
        out.print("\t");
        out.print("transcript_id");
        out.print("\t");
        out.print("gene_name");
        out.print("\t");
        out.print("gene_id");
        out.print("\t");
        out.print("exon.count_prev_and_next");
        out.print("\t");
        out.print("exon.count_prev_and_curr");
        out.print("\t");
        out.print("exon.count_curr_and_next");
        out.print("\t");
        out.print("exon.count_curr_only");
        out.print("\t");
        out.print("exon.count_others");
        out.println();
        for (List<GTFGene.Exon> L : exonMap.values()) {
            for (GTFGene.Exon exon : L) {
                out.print(exon.getGene().chrom);
                out.print("\t");
                out.print(exon.start);
                out.print("\t");
                out.print(exon.end);
                out.print("\t");
                out.print(notnull(exon.exon_id));
                out.print("\t");
                out.print("" + (exon.index + 1) + "/" + exon.getGene().exons.size());
                out.print("\t");
                out.print(exon.getGene().transcript_id);
                out.print("\t");
                out.print(notnull(exon.getGene().gene_name));
                out.print("\t");
                out.print(notnull(exon.getGene().gene_id));
                out.print("\t");
                out.print(exon.count_prev_and_next);
                out.print("\t");
                out.print(exon.count_prev_and_curr);
                out.print("\t");
                out.print(exon.count_curr_and_next);
                out.print("\t");
                out.print(exon.count_curr_only);
                out.print("\t");
                out.print(exon.count_others);
                out.println();
            }
        }
        out.flush();
        return 0;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(samReader);
        CloserUtil.close(out);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) CigarElement(htsjdk.samtools.CigarElement) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) PrintWriter(java.io.PrintWriter) Interval(htsjdk.samtools.util.Interval)

Example 57 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project jvarkit by lindenb.

the class LowResBam2Raster method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.regionStr == null) {
        LOG.error("Region was not defined.");
        return -1;
    }
    if (this.WIDTH < 100) {
        LOG.info("adjusting WIDTH to 100");
        this.WIDTH = 100;
    }
    if (this.gcWinSize <= 0) {
        LOG.info("adjusting GC win size to 5");
        this.gcWinSize = 5;
    }
    SamReader samFileReader = null;
    try {
        if (this.referenceFile != null) {
            LOG.info("loading reference");
            this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.referenceFile);
        }
        final SamReaderFactory srf = super.createSamReaderFactory();
        this.interval = new IntervalParser(this.indexedFastaSequenceFile == null ? null : this.indexedFastaSequenceFile.getSequenceDictionary()).parse(this.regionStr);
        if (this.interval == null) {
            LOG.error("Cannot parse interval " + regionStr + " or chrom doesn't exists in sam dictionary.");
            return -1;
        }
        LOG.info("Interval is " + this.interval);
        loadVCFs();
        if (this.knownGeneUrl != null) {
            IntervalTreeMap<List<KnownGene>> map = KnownGene.loadUriAsIntervalTreeMap(this.knownGeneUrl, (KG) -> (KG.getContig().equals(this.interval.getContig()) && !(KG.getEnd() < this.interval.getStart() || KG.getStart() + 1 > this.interval.getEnd())));
            this.knownGenes.addAll(map.values().stream().flatMap(L -> L.stream()).collect(Collectors.toList()));
        }
        for (final String bamFile : IOUtils.unrollFiles(args)) {
            samFileReader = srf.open(SamInputResource.of(bamFile));
            final SAMFileHeader header = samFileReader.getFileHeader();
            final SAMSequenceDictionary dict = header.getSequenceDictionary();
            if (dict == null) {
                LOG.error("no dict in " + bamFile);
                return -1;
            }
            if (dict.getSequence(this.interval.getContig()) == null) {
                LOG.error("no such chromosome in " + bamFile + " " + this.interval);
                return -1;
            }
            scan(samFileReader);
            samFileReader.close();
            samFileReader = null;
        }
        if (this.key2partition.isEmpty()) {
            LOG.error("No data was found. no Read-Group specified ? no data in that region ?");
            return -1;
        }
        this.key2partition.values().stream().forEach(P -> P.make());
        saveImages(this.key2partition.values().stream().map(P -> P.image).collect(Collectors.toList()));
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(samFileReader);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) SamReaderFactory(htsjdk.samtools.SamReaderFactory) ArrayList(java.util.ArrayList) List(java.util.List) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Aggregations

SamReaderFactory (htsjdk.samtools.SamReaderFactory)57 SamReader (htsjdk.samtools.SamReader)51 File (java.io.File)43 SAMRecord (htsjdk.samtools.SAMRecord)27 IOException (java.io.IOException)26 SAMFileHeader (htsjdk.samtools.SAMFileHeader)18 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)17 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)17 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)14 ArrayList (java.util.ArrayList)13 List (java.util.List)11 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)10 HashSet (java.util.HashSet)10 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)9 SAMFileWriter (htsjdk.samtools.SAMFileWriter)8 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)8 URL (java.net.URL)8 HashMap (java.util.HashMap)8 BufferedReader (java.io.BufferedReader)7 PrintWriter (java.io.PrintWriter)7