Search in sources :

Example 6 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project gatk by broadinstitute.

the class GATKTool method getHeaderForSAMWriter.

/**
     * Returns the SAM header suitable for writing SAM/BAM/CRAM files produced by this tool.
     *
     * The default implementation calls {@link #getHeaderForReads} (and makes an empty header if that call returns null)
     * and optionally adds program tag to the header with a program version {@link #getVersion()}, program name {@link #getToolName()}
     * and command line {@link #getCommandLine()}.
     *
     * Subclasses may override.
     *
     * @return SAM header for the SAM writer with (optionally, if {@link #addOutputSAMProgramRecord} is true) program record appropriately.
     */
protected SAMFileHeader getHeaderForSAMWriter() {
    final SAMFileHeader header = getHeaderForReads() == null ? new SAMFileHeader() : getHeaderForReads();
    if (addOutputSAMProgramRecord) {
        final SAMProgramRecord programRecord = new SAMProgramRecord(createProgramGroupID(header));
        programRecord.setProgramVersion(getVersion());
        programRecord.setCommandLine(getCommandLine());
        programRecord.setProgramName(getToolName());
        header.addProgramRecord(programRecord);
    }
    return header;
}
Also used : SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord)

Example 7 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project polyGembler by c-zhou.

the class SamFileSplit method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    Utils.makeOutputDir(bam_out);
    final File[] beds = new File(bed_in).listFiles();
    final String[] out_prefix = new String[beds.length];
    for (int i = 0; i < beds.length; i++) {
        out_prefix[i] = bam_out + "/" + beds[i].getName().replaceAll(".bed$", "");
        Utils.makeOutputDir(out_prefix[i]);
    }
    final File[] bams = new File(bam_in).listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.endsWith(".bam");
        }
    });
    this.initial_thread_pool();
    for (File bam : bams) {
        executor.submit(new Runnable() {

            private File bam;

            @Override
            public void run() {
                // TODO Auto-generated method stub
                try {
                    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
                    final SamReader inputSam = factory.open(bam);
                    final SAMFileHeader header = inputSam.getFileHeader();
                    final SAMRecordIterator iter = inputSam.iterator();
                    final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
                    final SAMFileWriter[] outputSam = new SAMFileWriter[beds.length];
                    final SAMSequenceDictionary[] seqdics = new SAMSequenceDictionary[beds.length];
                    final Map<String, Integer> outMap = new HashMap<String, Integer>();
                    final String out = bam.getName();
                    for (int i = 0; i < beds.length; i++) {
                        Set<String> bed_seq = new HashSet<String>();
                        String tmp;
                        BufferedReader br = new BufferedReader(new FileReader(beds[i]));
                        String line;
                        while ((line = br.readLine()) != null) {
                            tmp = line.split("\\s+")[0];
                            bed_seq.add(tmp);
                            outMap.put(tmp, i);
                        }
                        br.close();
                        final SAMFileHeader header_i = new SAMFileHeader();
                        final SAMSequenceDictionary seqdic_i = new SAMSequenceDictionary();
                        header_i.setAttribute("VN", header.getAttribute("VN"));
                        header_i.setAttribute("SO", header.getAttribute("SO"));
                        List<SAMSequenceRecord> seqs = seqdic.getSequences();
                        for (SAMSequenceRecord seq : seqs) if (bed_seq.contains(seq.getSequenceName()))
                            seqdic_i.addSequence(seq);
                        header_i.setSequenceDictionary(seqdic_i);
                        for (SAMReadGroupRecord rg : header.getReadGroups()) header_i.addReadGroup(rg);
                        for (SAMProgramRecord pg : header.getProgramRecords()) header_i.addProgramRecord(pg);
                        outputSam[i] = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_i, true, new File(out_prefix[i] + "/" + out));
                        seqdics[i] = seqdic_i;
                    }
                    Set<String> refs = outMap.keySet();
                    String ref;
                    int f;
                    while (iter.hasNext()) {
                        SAMRecord rec = iter.next();
                        if (refs.contains(ref = rec.getReferenceName())) {
                            f = outMap.get(ref);
                            rec.setReferenceIndex(seqdics[f].getSequenceIndex(ref));
                            outputSam[f].addAlignment(rec);
                        }
                    }
                    iter.close();
                    inputSam.close();
                    for (int i = 0; i < outputSam.length; i++) outputSam[i].close();
                    myLogger.info(out + " return true");
                } catch (Exception e) {
                    Thread t = Thread.currentThread();
                    t.getUncaughtExceptionHandler().uncaughtException(t, e);
                    e.printStackTrace();
                    executor.shutdown();
                    System.exit(1);
                }
            }

            public Runnable init(File bam) {
                this.bam = bam;
                return (this);
            }
        }.init(bam));
    }
    this.waitFor();
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) FilenameFilter(java.io.FilenameFilter) SamReader(htsjdk.samtools.SamReader) FileReader(java.io.FileReader) HashSet(java.util.HashSet) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 8 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.

the class PcrClipReads method run.

private int run(final SamReader reader) {
    final SAMFileHeader header1 = reader.getFileHeader();
    final SAMFileHeader header2 = header1.clone();
    Optional<SAMProgramRecord> samProgramRecord = Optional.empty();
    if (this.programId) {
        final SAMProgramRecord spr = header2.createProgramRecord();
        samProgramRecord = Optional.of(spr);
        spr.setProgramName(PcrClipReads.class.getSimpleName());
        spr.setProgramVersion(this.getGitHash());
        spr.setCommandLine(getProgramCommandLine().replace('\t', ' '));
    }
    header2.addComment(getProgramName() + " " + getVersion() + ": Processed with " + getProgramCommandLine());
    header2.setSortOrder(SortOrder.unsorted);
    SAMFileWriter sw = null;
    SAMRecordIterator iter = null;
    try {
        sw = this.writingBamArgs.openSAMFileWriter(outputFile, header2, false);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1).logger(LOG);
        iter = reader.iterator();
        while (iter.hasNext()) {
            SAMRecord rec = progress.watch(iter.next());
            if (this.onlyFlag != -1 && (rec.getFlags() & this.onlyFlag) != 0) {
                sw.addAlignment(rec);
                continue;
            }
            if (rec.getReadUnmappedFlag()) {
                sw.addAlignment(rec);
                continue;
            }
            final Interval fragment = findInterval(rec);
            if (fragment == null) {
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            // strand is '-' and overap in 5' of PCR fragment
            if (rec.getReadNegativeStrandFlag() && fragment.getStart() < rec.getAlignmentStart() && rec.getAlignmentStart() < fragment.getEnd()) {
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            // strand is '+' and overap in 3' of PCR fragment
            if (!rec.getReadNegativeStrandFlag() && fragment.getStart() < rec.getAlignmentEnd() && rec.getAlignmentEnd() < fragment.getEnd()) {
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            // contained int PCR fragment
            if (rec.getAlignmentStart() >= fragment.getStart() && rec.getAlignmentEnd() <= fragment.getEnd()) {
                sw.addAlignment(rec);
                continue;
            }
            final ReadClipper readClipper = new ReadClipper();
            if (samProgramRecord.isPresent()) {
                readClipper.setProgramGroup(samProgramRecord.get().getId());
            }
            rec = readClipper.clip(rec, fragment);
            sw.addAlignment(rec);
        }
        progress.finish();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sw);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) Interval(htsjdk.samtools.util.Interval)

Example 9 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.

the class SamRetrieveSeqAndQual method doWork.

@Override
public int doWork(List<String> args) {
    FastqReader[] fastqReaders = null;
    SamReader samReader = null;
    SAMFileWriter samWriter = null;
    SAMRecordIterator iter = null;
    try {
        if (fastqFin == null) {
            LOG.error("undefined fastq file");
            return -1;
        } else {
            LOG.info("opening " + fastqFin);
            FastqReader r1 = new FastqReader(fastqFin);
            if (fastqRin == null) {
                fastqReaders = new FastqReader[] { r1 };
            } else {
                LOG.info("opening " + fastqRin);
                FastqReader r2 = new FastqReader(fastqRin);
                fastqReaders = new FastqReader[] { r1, r2 };
            }
        }
        samReader = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader.SortOrder sortOrder = samReader.getFileHeader().getSortOrder();
        if (sortOrder == null) {
            LOG.warning("undefined sort order read are in the sam order");
        } else if (!sortOrder.equals(SAMFileHeader.SortOrder.queryname)) {
            LOG.error("Bad Sort Order. Sort this input on read name");
            return -1;
        }
        SAMFileHeader header = samReader.getFileHeader().clone();
        SAMProgramRecord prg = header.createProgramRecord();
        prg.setCommandLine(this.getProgramCommandLine());
        prg.setProgramName(this.getProgramName());
        prg.setProgramVersion(this.getVersion());
        samWriter = writingBamArgs.openSAMFileWriter(bamOut, header, true);
        iter = samReader.iterator();
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
        FastqRecord[] currFastq = new FastqRecord[] { null, null };
        while (iter.hasNext()) {
            SAMRecord rec = progress.watch(iter.next());
            String readName = rec.getReadName();
            int fastq_index = 0;
            if (rec.getReadPairedFlag()) {
                if (fastqReaders.length != 2) {
                    LOG.error("Not paired but number of fastq!=2");
                    return -1;
                }
                fastq_index = (rec.getFirstOfPairFlag() ? 0 : 1);
            } else {
                if (fastqReaders.length != 1) {
                    LOG.error("Not paired but number of fastq!=1");
                    return -1;
                }
                fastq_index = 0;
            }
            if (sortOrder == SAMFileHeader.SortOrder.queryname) {
                while (currFastq[fastq_index] == null || normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) < 0) {
                    if (!fastqReaders[fastq_index].hasNext()) {
                        LOG.error("Read Missing for " + readName);
                        return -1;
                    }
                    currFastq[fastq_index] = fastqReaders[fastq_index].next();
                    if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) > 0) {
                        LOG.error("Read Missing for " + readName);
                        return -1;
                    }
                }
            } else {
                if (!fastqReaders[fastq_index].hasNext()) {
                    LOG.error("Read Missing for " + readName);
                    return -1;
                }
                currFastq[fastq_index] = fastqReaders[fastq_index].next();
            }
            if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) != 0) {
                LOG.error("Read Missing/Error for " + readName + " current:" + currFastq[fastq_index].getReadName());
                return -1;
            }
            String fastqBases = currFastq[fastq_index].getReadString();
            String fastqQuals = currFastq[fastq_index].getBaseQualityString();
            /* handle orientation */
            if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
                fastqBases = AcidNucleics.reverseComplement(fastqBases);
                StringBuilder sb = new StringBuilder(fastqQuals.length());
                for (int i = fastqQuals.length() - 1; i >= 0; --i) sb.append(fastqQuals.charAt(i));
                fastqQuals = sb.toString();
            }
            /* remove hard clip */
            Cigar cigar = rec.getCigar();
            if (cigar != null) {
                List<CigarElement> ceList = cigar.getCigarElements();
                if (!ceList.isEmpty()) {
                    CigarElement ce = ceList.get(ceList.size() - 1);
                    if (ce.getOperator() == CigarOperator.HARD_CLIP) {
                        fastqBases = fastqBases.substring(0, fastqBases.length() - ce.getLength());
                        fastqQuals = fastqQuals.substring(0, fastqQuals.length() - ce.getLength());
                    }
                    ce = ceList.get(0);
                    if (ce.getOperator() == CigarOperator.HARD_CLIP) {
                        fastqBases = fastqBases.substring(ce.getLength());
                        fastqQuals = fastqQuals.substring(ce.getLength());
                    }
                }
            }
            rec.setBaseQualityString(fastqQuals);
            rec.setReadString(fastqBases);
            samWriter.addAlignment(rec);
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (fastqReaders != null)
            for (FastqReader r : fastqReaders) CloserUtil.close(r);
        CloserUtil.close(iter);
        CloserUtil.close(samReader);
        CloserUtil.close(samWriter);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) FastqRecord(htsjdk.samtools.fastq.FastqRecord) CigarElement(htsjdk.samtools.CigarElement) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) FastqReader(htsjdk.samtools.fastq.FastqReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 10 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.

the class FindNewSpliceSites method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.knownGeneUri == null || this.knownGeneUri.trim().isEmpty()) {
        LOG.error("known Gene file undefined");
        return -1;
    }
    SamReader sfr = null;
    try {
        final Pattern tab = Pattern.compile("[\t]");
        {
            LOG.info("Opening " + this.knownGeneUri);
            LineIterator r = IOUtils.openURIForLineIterator(this.knownGeneUri);
            while (r.hasNext()) {
                final KnownGene g = new KnownGene(tab.split(r.next()));
                // need spliced one
                if (g.getExonCount() == 1)
                    continue;
                final Interval interval = new Interval(g.getContig(), g.getTxStart() + 1, g.getTxEnd());
                List<KnownGene> L = this.knownGenesMap.get(interval);
                if (L == null) {
                    L = new ArrayList<>();
                    this.knownGenesMap.put(interval, L);
                }
                L.add(g);
            }
            LOG.info("Done reading: " + this.knownGeneUri);
        }
        sfr = super.openSamReader(oneFileOrNull(args));
        SAMFileHeader header = sfr.getFileHeader().clone();
        SAMProgramRecord p = header.createProgramRecord();
        p.setCommandLine(getProgramCommandLine());
        p.setProgramVersion(getVersion());
        p.setProgramName(getProgramName());
        this.sfw = this.writingBamArgs.openSAMFileWriter(outputFile, header, true);
        header = sfr.getFileHeader().clone();
        p = header.createProgramRecord();
        p.setCommandLine(getProgramCommandLine());
        p.setProgramVersion(getVersion());
        p.setProgramName(getProgramName());
        this.weird = this.writingBamArgs.createSAMFileWriterFactory().makeSAMWriter(header, true, new NullOuputStream());
        scan(sfr);
        sfr.close();
        LOG.info("Done");
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfr);
        CloserUtil.close(this.sfw);
        CloserUtil.close(this.weird);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) Pattern(java.util.regex.Pattern) ArrayList(java.util.ArrayList) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) ArrayList(java.util.ArrayList) List(java.util.List) SAMFileHeader(htsjdk.samtools.SAMFileHeader) LineIterator(htsjdk.tribble.readers.LineIterator) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) Interval(htsjdk.samtools.util.Interval)

Aggregations

SAMProgramRecord (htsjdk.samtools.SAMProgramRecord)13 SAMFileHeader (htsjdk.samtools.SAMFileHeader)11 SAMFileWriter (htsjdk.samtools.SAMFileWriter)8 SAMRecord (htsjdk.samtools.SAMRecord)7 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)6 SamReader (htsjdk.samtools.SamReader)6 File (java.io.File)5 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)4 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)4 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)4 Interval (htsjdk.samtools.util.Interval)4 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)3 Cigar (htsjdk.samtools.Cigar)3 CigarElement (htsjdk.samtools.CigarElement)3 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)3 SamReaderFactory (htsjdk.samtools.SamReaderFactory)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 FastqReader (htsjdk.samtools.fastq.FastqReader)2