Search in sources :

Example 1 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project gatk by broadinstitute.

the class MergeBamAlignment method doWork.

@Override
protected Object doWork() {
    // Check the files are readable/writable
    SAMProgramRecord prod = null;
    if (PROGRAM_RECORD_ID != null) {
        prod = new SAMProgramRecord(PROGRAM_RECORD_ID);
        prod.setProgramVersion(PROGRAM_GROUP_VERSION);
        prod.setCommandLine(PROGRAM_GROUP_COMMAND_LINE);
        prod.setProgramName(PROGRAM_GROUP_NAME);
    }
    if (EXPECTED_ORIENTATIONS == null || EXPECTED_ORIENTATIONS.isEmpty()) {
        EXPECTED_ORIENTATIONS = Arrays.asList(SamPairUtil.PairOrientation.FR);
    }
    final SamAlignmentMerger merger = new SamAlignmentMerger(UNMAPPED_BAM, OUTPUT, REFERENCE_SEQUENCE, prod, CLIP_ADAPTERS, IS_BISULFITE_SEQUENCE, ALIGNED_READS_ONLY, ALIGNED_BAM, MAX_INSERTIONS_OR_DELETIONS, ATTRIBUTES_TO_RETAIN, ATTRIBUTES_TO_REMOVE, READ1_TRIM, READ2_TRIM, READ1_ALIGNED_BAM, READ2_ALIGNED_BAM, EXPECTED_ORIENTATIONS, SORT_ORDER, PRIMARY_ALIGNMENT_STRATEGY.newInstance(), ADD_MATE_CIGAR);
    merger.setClipOverlappingReads(CLIP_OVERLAPPING_READS);
    merger.setKeepAlignerProperPairFlags(ALIGNER_PROPER_PAIR_FLAGS);
    merger.setIncludeSecondaryAlignments(INCLUDE_SECONDARY_ALIGNMENTS);
    merger.mergeAlignment(REFERENCE_SEQUENCE);
    merger.close();
    return null;
}
Also used : SAMProgramRecord(htsjdk.samtools.SAMProgramRecord)

Example 2 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project polyGembler by c-zhou.

the class SamFileExtract method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    final SamReader inputSam = factory.open(new File(bam_in));
    final SAMFileHeader header = inputSam.getFileHeader();
    final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
    final SAMFileHeader header_out = new SAMFileHeader();
    final SAMSequenceDictionary seqdic_out = new SAMSequenceDictionary();
    SAMRecordIterator iter = inputSam.iterator();
    File bed_file = new File(bed_in);
    final Set<String> extract = new HashSet<String>();
    try (BufferedReader br = new BufferedReader(new FileReader(bed_file))) {
        String line;
        while ((line = br.readLine()) != null) extract.add(line.split("\\s+")[0]);
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }
    header_out.setAttribute("VN", header.getAttribute("VN"));
    header_out.setAttribute("SO", header.getAttribute("SO"));
    List<SAMSequenceRecord> seqs = seqdic.getSequences();
    for (SAMSequenceRecord seq : seqs) if (extract.contains(seq.getSequenceName()))
        seqdic_out.addSequence(seq);
    header_out.setSequenceDictionary(seqdic_out);
    for (SAMReadGroupRecord rg : header.getReadGroups()) header_out.addReadGroup(rg);
    for (SAMProgramRecord pg : header.getProgramRecords()) header_out.addProgramRecord(pg);
    final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_out, true, new File(bam_out));
    while (iter.hasNext()) {
        SAMRecord rec = iter.next();
        if (extract.contains(rec.getReferenceName()))
            outputSam.addAlignment(rec);
    }
    iter.close();
    try {
        inputSam.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    outputSam.close();
    System.err.println(bam_in + " return true");
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 3 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.

the class BlastToSam method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null || !this.faidx.exists() || !this.faidx.isFile()) {
        LOG.error("Option -R was not defined or dictionary missing");
        return -1;
    }
    final boolean interleaved_input = this.EXPECTED_SIZE > 0;
    final int maxRecordsInRam = 5000;
    SAMFileWriter sfw = null;
    XMLEventReader rx = null;
    final SAMFileWriterFactory sfwf = new SAMFileWriterFactory();
    sfwf.setCreateIndex(false);
    sfwf.setMaxRecordsInRam(maxRecordsInRam);
    sfwf.setCreateMd5File(false);
    sfwf.setUseAsyncIo(false);
    final SAMFileHeader header = new SAMFileHeader();
    try {
        LOG.info("opening " + faidx);
        this.dictionary = SAMSequenceDictionaryExtractor.extractDictionary(faidx);
        header.setSortOrder(SortOrder.unsorted);
        header.setSequenceDictionary(this.dictionary);
        final JAXBContext jc = JAXBContext.newInstance("gov.nih.nlm.ncbi.blast");
        this.unmarshaller = jc.createUnmarshaller();
        final XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
        xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.FALSE);
        xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
        xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
        xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
        xmlInputFactory.setXMLResolver(new XMLResolver() {

            @Override
            public Object resolveEntity(String arg0, String arg1, String arg2, String arg3) throws XMLStreamException {
                LOG.info("resolveEntity:" + arg0 + "/" + arg1 + "/" + arg2);
                return null;
            }
        });
        final String inputName = oneFileOrNull(args);
        if (inputName == null) {
            LOG.info("Reading from stdin");
            rx = xmlInputFactory.createXMLEventReader(stdin());
        } else if (args.size() == 1) {
            LOG.info("Reading from " + inputName);
            rx = xmlInputFactory.createXMLEventReader(IOUtils.openURIForBufferedReading(inputName));
        } else {
            LOG.error("Illegal number of args");
            return -1;
        }
        final SAMProgramRecord prg2 = header.createProgramRecord();
        fillHeader(rx, prg2);
        final SAMProgramRecord prg1 = header.createProgramRecord();
        prg1.setCommandLine(getProgramCommandLine());
        prg1.setProgramVersion(getVersion());
        prg1.setProgramName(getProgramName());
        prg1.setPreviousProgramGroupId(prg2.getId());
        final SAMReadGroupRecord rg1 = new SAMReadGroupRecord("g1");
        rg1.setLibrary("blast");
        rg1.setSample("blast");
        rg1.setDescription("blast");
        header.addReadGroup(rg1);
        sfw = this.writingBamArgs.openSAMFileWriter(outputFile, header, true);
        if (interleaved_input) {
            run_paired(sfw, rx, header);
        } else {
            run_single(sfw, rx, header);
        }
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfw);
        CloserUtil.close(rx);
    }
}
Also used : SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) JAXBContext(javax.xml.bind.JAXBContext) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) XMLStreamException(javax.xml.stream.XMLStreamException) JAXBException(javax.xml.bind.JAXBException) XMLStreamException(javax.xml.stream.XMLStreamException) XMLEventReader(javax.xml.stream.XMLEventReader) XMLResolver(javax.xml.stream.XMLResolver) SAMFileHeader(htsjdk.samtools.SAMFileHeader) XMLInputFactory(javax.xml.stream.XMLInputFactory)

Example 4 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project gatk by broadinstitute.

the class IntervalListTools method doWork.

@Override
protected Object doWork() {
    // Check inputs
    for (final File f : INPUT) IOUtil.assertFileIsReadable(f);
    for (final File f : SECOND_INPUT) IOUtil.assertFileIsReadable(f);
    if (OUTPUT != null) {
        if (SCATTER_COUNT == 1) {
            IOUtil.assertFileIsWritable(OUTPUT);
        } else {
            IOUtil.assertDirectoryIsWritable(OUTPUT);
        }
    }
    // Read in the interval lists and apply any padding
    final List<IntervalList> lists = openIntervalLists(INPUT);
    // same for the second list
    final List<IntervalList> secondLists = openIntervalLists(SECOND_INPUT);
    if (UNIQUE && !SORT) {
        LOG.warn("UNIQUE=true requires sorting but SORT=false was specified.  Results will be sorted!");
    }
    final IntervalList result = ACTION.act(lists, secondLists);
    if (SCATTER_COUNT > 1) {
        // Scattering requires a uniqued, sorted interval list.  We want to do this up front (before BREAKING AT BANDS)
        SORT = true;
        UNIQUE = true;
    }
    if (INVERT) {
        // no need to sort, since return will be sorted by definition.
        SORT = false;
        UNIQUE = true;
    }
    final IntervalList possiblySortedResult = SORT ? result.sorted() : result;
    final IntervalList possiblyInvertedResult = INVERT ? IntervalList.invert(possiblySortedResult) : possiblySortedResult;
    //only get unique if this has been asked unless inverting (since the invert will return a unique list)
    List<Interval> finalIntervals = UNIQUE ? possiblyInvertedResult.uniqued().getIntervals() : possiblyInvertedResult.getIntervals();
    if (BREAK_BANDS_AT_MULTIPLES_OF > 0) {
        finalIntervals = IntervalList.breakIntervalsAtBandMultiples(finalIntervals, BREAK_BANDS_AT_MULTIPLES_OF);
    }
    // Decide on a PG ID and make a program group
    final SAMFileHeader header = result.getHeader();
    final Set<String> pgs = new HashSet<>();
    for (final SAMProgramRecord pg : header.getProgramRecords()) pgs.add(pg.getId());
    for (int i = 1; i < Integer.MAX_VALUE; ++i) {
        if (!pgs.contains(String.valueOf(i))) {
            final SAMProgramRecord pg = new SAMProgramRecord(String.valueOf(i));
            pg.setCommandLine(getCommandLine());
            pg.setProgramName(getClass().getSimpleName());
            header.addProgramRecord(pg);
            break;
        }
    }
    // Add any comments
    if (COMMENT != null) {
        for (final String comment : COMMENT) {
            header.addComment(comment);
        }
    }
    final IntervalList output = new IntervalList(header);
    for (final Interval i : finalIntervals) {
        output.add(i);
    }
    final List<IntervalList> resultIntervals;
    if (OUTPUT != null) {
        if (SCATTER_COUNT == 1) {
            output.write(OUTPUT);
            resultIntervals = Arrays.asList(output);
        } else {
            final List<IntervalList> scattered = writeScatterIntervals(output);
            LOG.info(String.format("Wrote %s scatter subdirectories to %s.", scattered.size(), OUTPUT));
            if (scattered.size() != SCATTER_COUNT) {
                LOG.warn(String.format("Requested scatter width of %s, but only emitted %s.  (This may be an expected consequence of running in %s mode.)", SCATTER_COUNT, scattered.size(), IntervalListScatterer.Mode.BALANCING_WITHOUT_INTERVAL_SUBDIVISION));
            }
            resultIntervals = scattered;
        }
    } else {
        resultIntervals = Arrays.asList(output);
    }
    long totalUniqueBaseCount = 0;
    long intervalCount = 0;
    for (final IntervalList finalInterval : resultIntervals) {
        totalUniqueBaseCount = finalInterval.getUniqueBaseCount();
        intervalCount += finalInterval.size();
    }
    LOG.info("Produced " + intervalCount + " intervals totalling " + totalUniqueBaseCount + " unique bases.");
    return null;
}
Also used : IntervalList(htsjdk.samtools.util.IntervalList) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) Interval(htsjdk.samtools.util.Interval)

Example 5 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project gatk by broadinstitute.

the class GATKTool method createProgramGroupID.

/**
     * Returns the program group ID that will be used in the SAM writer.
     * Starts with {@link #getToolName} and looks for the first available ID by appending consecutive integers.
     */
private String createProgramGroupID(final SAMFileHeader header) {
    final String toolName = getToolName();
    String pgID = toolName;
    SAMProgramRecord record = header.getProgramRecord(pgID);
    int count = 1;
    while (record != null) {
        pgID = toolName + "." + String.valueOf(count++);
        record = header.getProgramRecord(pgID);
    }
    return pgID;
}
Also used : SAMProgramRecord(htsjdk.samtools.SAMProgramRecord)

Aggregations

SAMProgramRecord (htsjdk.samtools.SAMProgramRecord)13 SAMFileHeader (htsjdk.samtools.SAMFileHeader)11 SAMFileWriter (htsjdk.samtools.SAMFileWriter)8 SAMRecord (htsjdk.samtools.SAMRecord)7 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)6 SamReader (htsjdk.samtools.SamReader)6 File (java.io.File)5 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)4 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)4 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)4 Interval (htsjdk.samtools.util.Interval)4 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)3 Cigar (htsjdk.samtools.Cigar)3 CigarElement (htsjdk.samtools.CigarElement)3 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)3 SamReaderFactory (htsjdk.samtools.SamReaderFactory)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 FastqReader (htsjdk.samtools.fastq.FastqReader)2