Search in sources :

Example 21 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class TestUtils method sortBamOnQueryName.

protected File sortBamOnQueryName(final Path bamFile, final Predicate<SAMRecord> pred) throws IOException {
    File sortedBam = this.createTmpFile(".bam");
    SamReader sr = SamReaderFactory.makeDefault().open(bamFile);
    SAMFileHeader outHeader = sr.getFileHeader().clone();
    outHeader.setSortOrder(SortOrder.queryname);
    SAMFileWriter w = new SAMFileWriterFactory().makeBAMWriter(outHeader, false, sortedBam);
    sr.iterator().stream().filter(R -> pred == null ? true : pred.test(R)).forEach(R -> w.addAlignment(R));
    w.close();
    sr.close();
    return sortedBam;
}
Also used : Arrays(java.util.Arrays) AfterGroups(org.testng.annotations.AfterGroups) IOUtil(htsjdk.samtools.util.IOUtil) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) SamFiles(htsjdk.samtools.SamFiles) Random(java.util.Random) Test(org.testng.annotations.Test) JFXPanel(javafx.embed.swing.JFXPanel) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SortOrder(htsjdk.samtools.SAMFileHeader.SortOrder) Application(javafx.application.Application) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) Vector(java.util.Vector) BeforeGroups(org.testng.annotations.BeforeGroups) ImageIO(javax.imageio.ImageIO) SAXParser(javax.xml.parsers.SAXParser) FastaSequenceIndexCreator(htsjdk.samtools.reference.FastaSequenceIndexCreator) Path(java.nio.file.Path) ZipEntry(java.util.zip.ZipEntry) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Predicate(java.util.function.Predicate) JfxLauncher(com.github.lindenb.jvarkit.util.jcommander.JfxLauncher) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) BeforeClass(org.testng.annotations.BeforeClass) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) AfterTest(org.testng.annotations.AfterTest) SAMSequenceDictionaryCodec(htsjdk.samtools.SAMSequenceDictionaryCodec) Platform(javafx.application.Platform) CountDownLatch(java.util.concurrent.CountDownLatch) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) Stream(java.util.stream.Stream) VariantContext(htsjdk.variant.variantcontext.VariantContext) Pattern(java.util.regex.Pattern) SamReaderFactory(htsjdk.samtools.SamReaderFactory) BeforeSuite(org.testng.annotations.BeforeSuite) FilenameFilter(java.io.FilenameFilter) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ZipInputStream(java.util.zip.ZipInputStream) DataProvider(org.testng.annotations.DataProvider) SAXParserFactory(javax.xml.parsers.SAXParserFactory) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) NcbiApiKey(com.github.lindenb.jvarkit.util.ncbi.NcbiApiKey) Interval(htsjdk.samtools.util.Interval) BeforeTest(org.testng.annotations.BeforeTest) Assert(org.testng.Assert) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) SAMRecordFactory(htsjdk.samtools.SAMRecordFactory) SAMSequenceDictionaryExtractor(htsjdk.variant.utils.SAMSequenceDictionaryExtractor) AfterClass(org.testng.annotations.AfterClass) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) Files(java.nio.file.Files) BufferedWriter(java.io.BufferedWriter) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) SamReader(htsjdk.samtools.SamReader) File(java.io.File) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Stage(javafx.stage.Stage) Paths(java.nio.file.Paths) BAMIndex(htsjdk.samtools.BAMIndex) BufferedReader(java.io.BufferedReader) AfterSuite(org.testng.annotations.AfterSuite) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SamReader(htsjdk.samtools.SamReader) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 22 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class TestUtils method addClippingToBam.

protected File addClippingToBam(final File bamFile) throws IOException {
    final String bases = "ATGC";
    File clippedBam = this.createTmpFile(".bam");
    SamReader sr = SamReaderFactory.makeDefault().open(bamFile);
    SAMFileHeader inHeader = sr.getFileHeader();
    boolean createIndex = sr.hasIndex() && inHeader.getSortOrder().equals(SortOrder.coordinate);
    if (createIndex) {
        this.deleteFilesAtExit.add(new File(bamFile.getParentFile(), IOUtil.basename(clippedBam) + BAMIndex.BAMIndexSuffix));
    }
    final SAMFileWriter w = new SAMFileWriterFactory().setCreateIndex(createIndex).makeBAMWriter(inHeader, true, clippedBam);
    sr.iterator().stream().map(R -> {
        if (R.getReadUnmappedFlag() || R.getCigar() == null)
            return R;
        if (R.getCigar().isClipped())
            return R;
        if (R.getBaseQualities().equals(SAMRecord.NULL_QUALS))
            return R;
        if (R.getBaseQualityString().equals(SAMRecord.NULL_QUALS_STRING))
            return R;
        for (int side = 0; side < 2; side++) {
            final String cigar;
            boolean hard = this.random.nextBoolean();
            final int clipLen = 1 + this.random.nextInt(100);
            final StringBuilder seq = new StringBuilder();
            final StringBuilder qual = new StringBuilder();
            if (hard) {
                cigar = String.valueOf(clipLen) + "H";
            } else {
                cigar = String.valueOf(clipLen) + "S";
                for (int x = 0; x < clipLen; ++x) {
                    seq.append(bases.charAt(this.random.nextInt(bases.length())));
                    qual.append("#");
                }
            }
            if (side == 0) {
                R.setReadString(seq.toString() + R.getReadString());
                R.setBaseQualityString(qual.toString() + R.getBaseQualityString());
                R.setCigarString(cigar + R.getCigarString());
            } else {
                R.setCigarString(R.getCigarString() + cigar);
                R.setReadString(R.getReadString() + seq.toString());
                R.setBaseQualityString(R.getBaseQualityString() + qual.toString());
            }
        }
        return R;
    }).forEach(R -> w.addAlignment(R));
    w.close();
    sr.close();
    assertIsValidBam(bamFile);
    return bamFile;
}
Also used : Arrays(java.util.Arrays) AfterGroups(org.testng.annotations.AfterGroups) IOUtil(htsjdk.samtools.util.IOUtil) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) SamFiles(htsjdk.samtools.SamFiles) Random(java.util.Random) Test(org.testng.annotations.Test) JFXPanel(javafx.embed.swing.JFXPanel) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SortOrder(htsjdk.samtools.SAMFileHeader.SortOrder) Application(javafx.application.Application) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) Vector(java.util.Vector) BeforeGroups(org.testng.annotations.BeforeGroups) ImageIO(javax.imageio.ImageIO) SAXParser(javax.xml.parsers.SAXParser) FastaSequenceIndexCreator(htsjdk.samtools.reference.FastaSequenceIndexCreator) Path(java.nio.file.Path) ZipEntry(java.util.zip.ZipEntry) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Predicate(java.util.function.Predicate) JfxLauncher(com.github.lindenb.jvarkit.util.jcommander.JfxLauncher) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) BeforeClass(org.testng.annotations.BeforeClass) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) AfterTest(org.testng.annotations.AfterTest) SAMSequenceDictionaryCodec(htsjdk.samtools.SAMSequenceDictionaryCodec) Platform(javafx.application.Platform) CountDownLatch(java.util.concurrent.CountDownLatch) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) Stream(java.util.stream.Stream) VariantContext(htsjdk.variant.variantcontext.VariantContext) Pattern(java.util.regex.Pattern) SamReaderFactory(htsjdk.samtools.SamReaderFactory) BeforeSuite(org.testng.annotations.BeforeSuite) FilenameFilter(java.io.FilenameFilter) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ZipInputStream(java.util.zip.ZipInputStream) DataProvider(org.testng.annotations.DataProvider) SAXParserFactory(javax.xml.parsers.SAXParserFactory) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) NcbiApiKey(com.github.lindenb.jvarkit.util.ncbi.NcbiApiKey) Interval(htsjdk.samtools.util.Interval) BeforeTest(org.testng.annotations.BeforeTest) Assert(org.testng.Assert) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) SAMRecordFactory(htsjdk.samtools.SAMRecordFactory) SAMSequenceDictionaryExtractor(htsjdk.variant.utils.SAMSequenceDictionaryExtractor) AfterClass(org.testng.annotations.AfterClass) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) Files(java.nio.file.Files) BufferedWriter(java.io.BufferedWriter) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) SamReader(htsjdk.samtools.SamReader) File(java.io.File) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Stage(javafx.stage.Stage) Paths(java.nio.file.Paths) BAMIndex(htsjdk.samtools.BAMIndex) BufferedReader(java.io.BufferedReader) AfterSuite(org.testng.annotations.AfterSuite) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SamReader(htsjdk.samtools.SamReader) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 23 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class Biostar90204 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.suffix_length < 0) {
        LOG.error("Bad value of suffix_length:" + this.suffix_length);
        return -1;
    }
    if (this.record_per_file < 1L) {
        LOG.error("Bad value of record_per_file:" + this.record_per_file);
        return -1;
    }
    SAMFileWriter sfw = null;
    SAMRecordIterator iter = null;
    SamReader samFileReader = null;
    PrintWriter manifest = new PrintWriter(new NullOuputStream());
    try {
        samFileReader = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = samFileReader.getFileHeader();
        int split_file_number = 0;
        long nReads = 0L;
        iter = samFileReader.iterator();
        if (this.manifestFile != null) {
            manifest.close();
            manifest = new PrintWriter(manifestFile);
        }
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            if (this.samRecordFilter.filterOut(rec))
                continue;
            ++nReads;
            if (sfw == null) {
                split_file_number++;
                final String pathname = (this.prefix.isEmpty() ? "" : this.prefix + ".") + String.format("%0" + suffix_length + "d", split_file_number) + ".bam";
                final File out = new File(pathname);
                manifest.write(pathname);
                manifest.write("\t" + (nReads) + "\t");
                final SAMFileHeader header2 = header.clone();
                header2.addComment("SPLIT:" + split_file_number);
                header2.addComment("SPLIT:Starting from Read" + nReads);
                sfw = this.writingBamArgs.openSAMFileWriter(out, header2, true);
            }
            sfw.addAlignment(rec);
            if (nReads % record_per_file == 0) {
                sfw.close();
                manifest.write((nReads) + "\n");
                sfw = null;
            }
        }
        if (sfw != null) {
            sfw.close();
            manifest.write((nReads) + "\n");
        }
        manifest.flush();
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(manifest);
        CloserUtil.close(sfw);
        CloserUtil.close(iter);
        CloserUtil.close(samFileReader);
    }
    return 0;
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) PrintWriter(java.io.PrintWriter)

Example 24 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class Biostar76892 method doWork.

@Override
public int doWork(final List<String> args) {
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        sfr = super.openSamReader(oneFileOrNull(args));
        sfw = writingBamArgs.openSAMFileWriter(this.outputFile, sfr.getFileHeader(), true);
        long nRecords = 0;
        final List<SAMRecord> buffer = new ArrayList<SAMRecord>();
        SAMRecordIterator iter = sfr.iterator();
        for (; ; ) {
            SAMRecord rec = null;
            // get next record
            if (iter.hasNext()) {
                rec = iter.next();
                ++nRecords;
                if (nRecords % 1000000 == 0)
                    LOG.info("records: " + nRecords);
                if (!rec.getReadPairedFlag() || rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag() || rec.getProperPairFlag() || rec.getReferenceIndex() != rec.getMateReferenceIndex() || rec.getReadNegativeStrandFlag() == !rec.getMateNegativeStrandFlag()) {
                    if (!onlySaveFixed)
                        sfw.addAlignment(rec);
                    continue;
                }
            }
            if (rec != null) {
                int i = 0;
                // cleanup buffer
                int mate_index = -1;
                while (i < buffer.size()) {
                    SAMRecord prev = buffer.get(i);
                    if (prev.getReferenceIndex() != rec.getReferenceIndex() || prev.getAlignmentEnd() + distance < rec.getAlignmentStart()) {
                        if (!onlySaveFixed)
                            sfw.addAlignment(prev);
                        buffer.remove(i);
                    } else if (prev.getReadName().equals(rec.getReadName()) && ((prev.getFirstOfPairFlag() && rec.getSecondOfPairFlag()) || (rec.getFirstOfPairFlag() && prev.getSecondOfPairFlag()))) {
                        mate_index = i;
                        ++i;
                    } else {
                        ++i;
                    }
                }
                if (mate_index == -1) {
                    buffer.add(rec);
                } else {
                    final SAMRecord mate = buffer.get(mate_index);
                    buffer.remove(mate_index);
                    LOG.info("changing " + rec + " " + mate);
                    if (mate.getReadNegativeStrandFlag()) {
                        mate.setReadNegativeStrandFlag(false);
                        rec.setMateNegativeStrandFlag(mate.getReadNegativeStrandFlag());
                    } else {
                        rec.setReadNegativeStrandFlag(false);
                        mate.setMateNegativeStrandFlag(rec.getReadNegativeStrandFlag());
                    }
                    if (!mate.getReadFailsVendorQualityCheckFlag() && !rec.getReadFailsVendorQualityCheckFlag()) {
                        mate.setProperPairFlag(true);
                        rec.setProperPairFlag(true);
                    }
                    mate.setAttribute("rv", 1);
                    rec.setAttribute("rv", 1);
                    sfw.addAlignment(mate);
                    sfw.addAlignment(rec);
                }
            } else {
                for (final SAMRecord r : buffer) {
                    if (!onlySaveFixed)
                        sfw.addAlignment(r);
                }
                break;
            }
        }
        LOG.info("done");
        sfw.close();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfw);
        CloserUtil.close(sfr);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList)

Example 25 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class Biostar78400 method doWork.

@Override
public int doWork(List<String> args) {
    if (this.XML == null) {
        LOG.error("XML file missing");
        return -1;
    }
    final Map<String, Map<Integer, String>> flowcell2lane2id = new HashMap<String, Map<Integer, String>>();
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        final Pattern readNameSignature = Pattern.compile(this.readNameSignatureStr);
        final JAXBContext context = JAXBContext.newInstance(ReadGroup.class, ReadGroupList.class);
        final Unmarshaller unmarshaller = context.createUnmarshaller();
        final ReadGroupList rgl = unmarshaller.unmarshal(new StreamSource(XML), ReadGroupList.class).getValue();
        if (rgl.flowcells.isEmpty()) {
            LOG.error("empty XML " + XML);
            return -1;
        }
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = sfr.getFileHeader().clone();
        header.addComment("Processed with " + getProgramName());
        final Set<String> seenids = new HashSet<String>();
        final List<SAMReadGroupRecord> samReadGroupRecords = new ArrayList<SAMReadGroupRecord>();
        for (final FlowCell fc : rgl.flowcells) {
            final Map<Integer, String> lane2id = new HashMap<Integer, String>();
            for (final Lane lane : fc.lanes) {
                for (final ReadGroup rg : lane.readGroups) {
                    if (seenids.contains(rg.id)) {
                        LOG.error("Group id " + rg.id + " defined twice");
                        return -1;
                    }
                    seenids.add(rg.id);
                    // create the read group we'll be using
                    final SAMReadGroupRecord rgrec = new SAMReadGroupRecord(rg.id);
                    rgrec.setLibrary(rg.library);
                    rgrec.setPlatform(rg.platform);
                    rgrec.setSample(rg.sample);
                    rgrec.setPlatformUnit(rg.platformunit);
                    if (rg.center != null)
                        rgrec.setSequencingCenter(rg.center);
                    if (rg.description != null)
                        rgrec.setDescription(rg.description);
                    lane2id.put(lane.id, rg.id);
                    samReadGroupRecords.add(rgrec);
                }
            }
            if (flowcell2lane2id.containsKey(fc.name)) {
                LOG.error("FlowCell id " + fc.name + " defined twice in XML");
                return -1;
            }
            flowcell2lane2id.put(fc.name, lane2id);
        }
        header.setReadGroups(samReadGroupRecords);
        sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            final Matcher matcher = readNameSignature.matcher(rec.getReadName());
            final String flowcellStr;
            final String laneStr;
            if (matcher.matches()) {
                flowcellStr = matcher.group(1);
                laneStr = matcher.group(2);
            } else {
                LOG.error("Read name " + rec.getReadName() + " doesn't match regular expression " + readNameSignature.pattern() + ". please check options");
                return -1;
            }
            String RGID = null;
            final Map<Integer, String> lane2id = flowcell2lane2id.get(flowcellStr);
            if (lane2id == null)
                throw new RuntimeException("Cannot get flowcell/readgroup for " + rec.getReadName());
            try {
                RGID = lane2id.get(Integer.parseInt(laneStr));
            } catch (final Exception e) {
                LOG.error("bad lane-Id in " + rec.getReadName());
                return -1;
            }
            if (RGID == null) {
                throw new RuntimeException("Cannot get RGID for " + rec.getReadName() + " flowcell:" + flowcellStr + " lane2id:" + laneStr + " dict:" + lane2id);
            }
            rec.setAttribute(SAMTag.RG.name(), RGID);
            sfw.addAlignment(rec);
        }
        progress.finish();
        iter.close();
        LOG.info("done");
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfw);
        CloserUtil.close(sfr);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) Matcher(java.util.regex.Matcher) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) JAXBContext(javax.xml.bind.JAXBContext) SamReader(htsjdk.samtools.SamReader) Unmarshaller(javax.xml.bind.Unmarshaller) HashSet(java.util.HashSet) Pattern(java.util.regex.Pattern) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) StreamSource(javax.xml.transform.stream.StreamSource) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

SAMFileWriter (htsjdk.samtools.SAMFileWriter)76 SAMRecord (htsjdk.samtools.SAMRecord)63 SAMFileHeader (htsjdk.samtools.SAMFileHeader)55 SamReader (htsjdk.samtools.SamReader)55 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)46 File (java.io.File)40 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)27 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)25 IOException (java.io.IOException)22 ArrayList (java.util.ArrayList)20 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Cigar (htsjdk.samtools.Cigar)13 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)13 CigarElement (htsjdk.samtools.CigarElement)12 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 Interval (htsjdk.samtools.util.Interval)9 PrintWriter (java.io.PrintWriter)9 List (java.util.List)9 HashMap (java.util.HashMap)8