Search in sources :

Example 31 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project gridss by PapenfussLab.

the class SoftClipsToSplitReads method doWork.

@Override
protected int doWork() {
    log.debug("Setting language-neutral locale");
    java.util.Locale.setDefault(Locale.ROOT);
    validateParameters();
    GenomicProcessingContext pc = new GenomicProcessingContext(getFileSystemContext(), REFERENCE_SEQUENCE, getReference());
    pc.setCommandLineProgram(this);
    pc.setFilterDuplicates(IGNORE_DUPLICATES);
    SplitReadRealigner realigner = new SplitReadRealigner(pc);
    realigner.setMinSoftClipLength(MIN_CLIP_LENGTH);
    realigner.setMinSoftClipQuality(MIN_CLIP_QUAL);
    realigner.setProcessSecondaryAlignments(PROCESS_SECONDARY_ALIGNMENTS);
    realigner.setWorkerThreads(WORKER_THREADS);
    try {
        SamReaderFactory readerFactory = SamReaderFactory.make();
        SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
        if (ALIGNER_STREAMING) {
            ExternalProcessStreamingAligner aligner = new ExternalProcessStreamingAligner(readerFactory, ALIGNER_COMMAND_LINE, REFERENCE_SEQUENCE, WORKER_THREADS);
            realigner.createSupplementaryAlignments(aligner, INPUT, OUTPUT);
        } else {
            ExternalProcessFastqAligner aligner = new ExternalProcessFastqAligner(readerFactory, writerFactory, ALIGNER_COMMAND_LINE);
            realigner.createSupplementaryAlignments(aligner, INPUT, OUTPUT);
        }
    } catch (IOException e) {
        log.error(e);
        return -1;
    }
    return 0;
}
Also used : ExternalProcessStreamingAligner(au.edu.wehi.idsv.alignment.ExternalProcessStreamingAligner) SplitReadRealigner(au.edu.wehi.idsv.SplitReadRealigner) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) ExternalProcessFastqAligner(au.edu.wehi.idsv.alignment.ExternalProcessFastqAligner) GenomicProcessingContext(au.edu.wehi.idsv.GenomicProcessingContext)

Example 32 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project gridss by PapenfussLab.

the class SubsetToMissing method doWork.

@Override
protected int doWork() {
    long stop = Long.MAX_VALUE;
    if (STOP_AFTER != null && (long) STOP_AFTER > 0) {
        stop = STOP_AFTER;
    }
    log.debug("Setting language-neutral locale");
    java.util.Locale.setDefault(Locale.ROOT);
    if (TMP_DIR == null || TMP_DIR.size() == 0) {
        TMP_DIR = Lists.newArrayList(new File("."));
    }
    SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
    SamReader input = factory.open(INPUT);
    Iterator<SAMRecord> intputit = new AsyncBufferedIterator<SAMRecord>(input.iterator(), 2, 16384);
    SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(input.getFileHeader(), true, OUTPUT);
    LongSet hashtable;
    if (PREALLOCATE != null) {
        log.info("Preallocating hash table");
        hashtable = new LongOpenHashBigSet(PREALLOCATE);
    } else {
        hashtable = new LongOpenHashBigSet();
    }
    for (File file : LOOKUP) {
        log.info("Loading lookup hashes for " + file.getAbsolutePath());
        SamReader lookup = factory.open(file);
        AsyncBufferedIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(lookup.iterator(), 2, 16384);
        File cache = new File(file.getAbsolutePath() + ".SubsetToMissing.cache");
        if (cache.exists()) {
            log.info("Loading lookup hashes from cache");
            long n = stop;
            DataInputStream dis = null;
            try {
                long loadCount = 0;
                dis = new DataInputStream(new BufferedInputStream(new FileInputStream(cache)));
                while (n-- > 0) {
                    hashtable.add(dis.readLong());
                    if (loadCount % 10000000 == 0) {
                        log.info(String.format("Loaded %d from cache", loadCount));
                    }
                }
            } catch (EOFException e) {
                try {
                    if (dis != null)
                        dis.close();
                } catch (IOException e1) {
                    log.error(e1);
                }
            } catch (IOException e) {
                log.error(e);
            }
        } else {
            long n = stop;
            ProgressLoggingSAMRecordIterator loggedit = new ProgressLoggingSAMRecordIterator(it, new ProgressLogger(log));
            try {
                DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(cache)));
                while (loggedit.hasNext() && n-- > 0) {
                    long recordhash = hash(loggedit.next());
                    hashtable.add(recordhash);
                    dos.writeLong(recordhash);
                }
                dos.close();
            } catch (Exception e) {
                log.error(e, "Failed to load lookup. Running with partial results");
            }
            loggedit.close();
        }
        it.close();
    }
    long filtered = 0;
    log.info("Processing input");
    intputit = new ProgressLoggingSAMRecordIterator(intputit, new ProgressLogger(log));
    long n = stop;
    while (intputit.hasNext() && n-- > 0) {
        SAMRecord r = intputit.next();
        if (!hashtable.contains(hash(r))) {
            out.addAlignment(r);
        } else {
            filtered++;
            if (filtered % 1000000 == 0) {
                log.info(String.format("Filtered %d reads", filtered));
            }
        }
    }
    log.info("Closing output");
    out.close();
    return 0;
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) DataOutputStream(java.io.DataOutputStream) LongSet(it.unimi.dsi.fastutil.longs.LongSet) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ProgressLogger(htsjdk.samtools.util.ProgressLogger) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) EOFException(java.io.EOFException) SamReader(htsjdk.samtools.SamReader) LongOpenHashBigSet(it.unimi.dsi.fastutil.longs.LongOpenHashBigSet) BufferedInputStream(java.io.BufferedInputStream) SAMRecord(htsjdk.samtools.SAMRecord) FileOutputStream(java.io.FileOutputStream) EOFException(java.io.EOFException) ProgressLoggingSAMRecordIterator(au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream)

Example 33 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project jvarkit by lindenb.

the class Biostar78400Test method test01.

@Test
public void test01() throws IOException {
    final String flowcell = "HS20001259127";
    final String lane = "1";
    final File in = createTmpFile(".bam");
    SAMFileHeader header = new SAMFileHeader();
    header.setSortOrder(SortOrder.unsorted);
    SAMFileWriter sfw = new SAMFileWriterFactory().makeBAMWriter(header, true, in);
    DefaultSAMRecordFactory recfactory = new DefaultSAMRecordFactory();
    SAMRecord rec = recfactory.createSAMRecord(header);
    rec.setReadName(flowcell + ":" + lane + ":1210:15640:52255");
    rec.setReadString("GAATTC");
    rec.setBaseQualityString("222222");
    SAMUtils.makeReadUnmapped(rec);
    sfw.addAlignment(rec);
    sfw.close();
    assertIsValidBam(in);
    final File xml = createTmpFile(".xml");
    PrintWriter pw = new PrintWriter(xml);
    pw.println("<?xml version=\"1.0\"?><read-groups>" + "<flowcell name=\"" + flowcell + "\"><lane index=\"" + lane + "\">" + "<group ID=\"X1\"><library>L1</library><platform>P1</platform>" + "<sample>S1</sample><platformunit>PU1</platformunit>" + "<center>C1</center><description>blabla</description></group>" + "</lane></flowcell><flowcell name=\"HS20001259128\">" + "<lane index=\"2\"><group ID=\"x2\"><library>L2</library>" + "<platform>P2</platform><sample>S2</sample><platformunit>PU1</platformunit>" + "<center>C1</center><description>blabla</description></group></lane>" + "</flowcell></read-groups>");
    pw.flush();
    pw.close();
    assertIsXml(xml);
    final File out = createTmpFile(".bam");
    Assert.assertEquals(new Biostar78400().instanceMain(newCmd().add("-o").add(out).add("-x").add(xml).add(in).make()), 0);
    SamReader r = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(out);
    Assert.assertTrue(r.getFileHeader() != null);
    Assert.assertTrue(r.getFileHeader().getReadGroups() != null);
    Assert.assertFalse(r.getFileHeader().getReadGroups().isEmpty());
    SAMRecordIterator iter = r.iterator();
    Assert.assertTrue(iter.hasNext());
    rec = iter.next();
    SAMReadGroupRecord rg = rec.getReadGroup();
    Assert.assertNotNull(rg);
    Assert.assertEquals(rg.getId(), "X1");
    Assert.assertEquals(rg.getSample(), "S1");
    Assert.assertFalse(iter.hasNext());
    iter.close();
    r.close();
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) PrintWriter(java.io.PrintWriter) Test(org.testng.annotations.Test)

Aggregations

SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)33 SAMFileWriter (htsjdk.samtools.SAMFileWriter)26 File (java.io.File)26 SAMRecord (htsjdk.samtools.SAMRecord)22 SAMFileHeader (htsjdk.samtools.SAMFileHeader)20 SamReader (htsjdk.samtools.SamReader)17 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)14 IOException (java.io.IOException)14 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)8 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)7 ArrayList (java.util.ArrayList)7 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)5 Test (org.testng.annotations.Test)5 Interval (htsjdk.samtools.util.Interval)4 BufferedReader (java.io.BufferedReader)4 List (java.util.List)4 BAMIndex (htsjdk.samtools.BAMIndex)3 DefaultSAMRecordFactory (htsjdk.samtools.DefaultSAMRecordFactory)3 SAMProgramRecord (htsjdk.samtools.SAMProgramRecord)3