Search in sources :

Example 26 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project polyGembler by c-zhou.

the class TenXMoleculeStats method runBCSortedStats.

public void runBCSortedStats() {
    // TODO Auto-generated method stub
    final SamReader inputSam = factory.open(new File(this.in_bam));
    final SAMSequenceDictionary seqDic = inputSam.getFileHeader().getSequenceDictionary();
    mappedReadsOnChromosome = new long[seqDic.size()];
    /**
     *		try {
     *			oos = Utils.getGZIPBufferedWriter(out_stats);
     *		} catch (IOException e1) {
     *			// TODO Auto-generated catch block
     *			e1.printStackTrace();
     *		}
     */
    oos = Utils.getBufferedWriter(out_stats + ".txt");
    oob = new SAMFileWriterFactory().makeSAMOrBAMWriter(inputSam.getFileHeader(), true, new File(out_stats + ".bam"));
    SAMRecordIterator iter = inputSam.iterator();
    SAMRecord record = iter.next();
    String bc_str;
    this.initial_thread_pool();
    while (record != null) {
        // if(mappedReads%1000000==0) myLogger.info(mappedReads+" mapped records processed.");
        List<SAMRecord> bc_records = new ArrayList<SAMRecord>();
        bc_str = record.getStringAttribute("BX");
        this.processRecord(bc_records, record);
        while (bc_str == null) {
            record = iter.hasNext() ? iter.next() : null;
            if (record == null)
                break;
            this.processRecord(bc_records, record);
            bc_str = record.getStringAttribute("BX");
        }
        if (bc_str == null)
            continue;
        while (true) {
            record = iter.hasNext() ? iter.next() : null;
            if (record == null)
                break;
            if (bc_str.equals(record.getStringAttribute("BX"))) {
                this.processRecord(bc_records, record);
            } else
                break;
        }
        if (bc_records.isEmpty())
            continue;
        readsOnBarcode.put(bc_records.get(0).getStringAttribute("BX"), bc_records.size());
        executor.submit(new MoleculeConstructor(bc_records));
    }
    try {
        iter.close();
        inputSam.close();
        this.waitFor();
        oos.close();
        oob.close();
        BufferedWriter bw_summary = Utils.getBufferedWriter(this.out_stats + ".summary");
        bw_summary.write("##Reads     mapped: " + mappedReads + "\n");
        bw_summary.write("##Reads   unmapped: " + unmappedReads + "\n");
        bw_summary.write("##Reads duplicated: " + duplicatedReads + "\n");
        bw_summary.write("##Reads  bad pairs: " + badpaired + "\n");
        bw_summary.write("##Reads by pseudo-chromosomes:\n");
        for (int i = 0; i < mappedReadsOnChromosome.length; i++) {
            bw_summary.write(" #" + seqDic.getSequence(i).getSequenceName() + " " + mappedReadsOnChromosome[i] + "\n");
        }
        bw_summary.write("##Reads by barcodes:\n");
        for (Map.Entry<String, Integer> entry : readsOnBarcode.entrySet()) {
            bw_summary.write(" #" + entry.getKey() + " " + entry.getValue() + "\n");
        }
        bw_summary.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    myLogger.info("Completed. " + counter + " bam records processed.");
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BufferedWriter(java.io.BufferedWriter) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map)

Example 27 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project polyGembler by c-zhou.

the class TenXSamtools method runSort.

private void runSort() {
    // TODO Auto-generated method stub
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    final SamReader inputSam = factory.open(new File(this.bam_in));
    final SAMFileHeader sort_header = inputSam.getFileHeader();
    switch(this.sort_order) {
        case coordinate:
            sort_header.setSortOrder(SortOrder.coordinate);
            break;
        case queryname:
            sort_header.setSortOrder(SortOrder.queryname);
            break;
        case barcode:
            sort_header.setSortOrder(SortOrder.unknown);
            break;
    }
    SAMRecordIterator iter = inputSam.iterator();
    long record_inCount = 0;
    SAMRecord[] buff = new SAMRecord[this.batch_size];
    int k = 0;
    SAMRecord temp = iter.hasNext() ? iter.next() : null;
    this.initial_thread_pool();
    while (temp != null) {
        buff[k++] = temp;
        record_inCount++;
        temp = iter.hasNext() ? iter.next() : null;
        if (k == this.batch_size || temp == null) {
            executor.submit(new Runnable() {

                private SAMRecord[] records;

                @Override
                public void run() {
                    // TODO Auto-generated method stub
                    try {
                        Arrays.sort(records, comprator);
                        final SAMFileWriter outputSam;
                        synchronized (lock) {
                            outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(bam_out + String.format("%08d", batch++)));
                        }
                        int count = 0;
                        for (SAMRecord record : records) {
                            if (record != null) {
                                count++;
                                outputSam.addAlignment(record);
                            }
                        }
                        outputSam.close();
                        synchronized (lock) {
                            record_count += count;
                        }
                        myLogger.info("[" + Thread.currentThread().getName() + "] " + record_count + " records processed.");
                    } catch (Exception e) {
                        Thread t = Thread.currentThread();
                        t.getUncaughtExceptionHandler().uncaughtException(t, e);
                        e.printStackTrace();
                        executor.shutdown();
                        System.exit(1);
                    }
                }

                public Runnable init(SAMRecord[] buff) {
                    // TODO Auto-generated method stub
                    this.records = buff;
                    return (this);
                }
            }.init(buff));
            k = 0;
            buff = new SAMRecord[this.batch_size];
        }
    }
    iter.close();
    try {
        inputSam.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    myLogger.info(record_inCount + " records read from " + this.bam_in);
    this.waitFor();
    // merge all batches
    myLogger.info("Merge " + batch + " files.");
    final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(this.bam_out));
    final SamReader[] batchSam = new SamReader[batch];
    final SAMRecordIterator[] iterSam = new SAMRecordIterator[batch];
    final boolean[] reachFileEnd = new boolean[batch];
    final TreeMap<SAMRecord, Integer> treeMap = new TreeMap<SAMRecord, Integer>(this.comprator);
    for (int i = 0; i != batch; i++) {
        batchSam[i] = factory.open(new File(this.bam_out + String.format("%08d", i)));
        iterSam[i] = batchSam[i].iterator();
        if (iterSam[i].hasNext())
            treeMap.put(iterSam[i].next(), i);
        reachFileEnd[i] = !iterSam[i].hasNext();
    }
    Entry<SAMRecord, Integer> firstEntry;
    int bch, nReachFileEnd = 0;
    for (boolean b : reachFileEnd) if (b)
        nReachFileEnd++;
    long record_outCount = 0;
    while (!treeMap.isEmpty()) {
        firstEntry = treeMap.pollFirstEntry();
        outputSam.addAlignment(firstEntry.getKey());
        record_outCount++;
        bch = firstEntry.getValue();
        if (!reachFileEnd[bch]) {
            treeMap.put(iterSam[bch].next(), bch);
            if (!iterSam[bch].hasNext()) {
                reachFileEnd[bch] = true;
                nReachFileEnd++;
            }
        }
        if (treeMap.isEmpty() && nReachFileEnd != batch) {
            for (int i = 0; i != batch; i++) {
                if (!reachFileEnd[i]) {
                    treeMap.put(iterSam[i].next(), i);
                    if (!iterSam[i].hasNext()) {
                        reachFileEnd[i] = true;
                        nReachFileEnd++;
                    }
                }
            }
        }
    }
    try {
        outputSam.close();
        for (int i = 0; i != batch; i++) {
            iterSam[i].close();
            batchSam[i].close();
            new File(this.bam_out + String.format("%08d", i)).delete();
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    myLogger.info(record_outCount + " records written to " + this.bam_out);
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) TreeMap(java.util.TreeMap) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 28 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project gridss by PapenfussLab.

the class IntermediateFilesTest method createBAM.

public void createBAM(File file, SAMFileHeader header, SAMRecord... data) {
    SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeSAMOrBAMWriter(header, true, file);
    if (header.getSortOrder() == SortOrder.coordinate) {
        SortingCollection<SAMRecord> presort = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordCoordinateComparator(), 100000000, testFolder.getRoot());
        for (SAMRecord r : data) {
            presort.add(r);
        }
        presort.doneAdding();
        for (SAMRecord r : presort) {
            writer.addAlignment(r);
        }
    } else {
        for (SAMRecord r : data) {
            writer.addAlignment(r);
        }
    }
    writer.close();
}
Also used : SAMRecordCoordinateComparator(htsjdk.samtools.SAMRecordCoordinateComparator) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory)

Example 29 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project gridss by PapenfussLab.

the class VcfBreakendToReadPair method writeVisualisationBam.

public void writeVisualisationBam(GenomicProcessingContext pc, File vcf, File bam, File bamFiltered) throws IOException {
    File working = FileSystemContext.getWorkingFileFor(bam);
    File workingFiltered = FileSystemContext.getWorkingFileFor(bamFiltered);
    VCFFileReader vcfReader = new VCFFileReader(vcf, false);
    CloseableIterator<VariantContext> it = vcfReader.iterator();
    SAMFileWriter writer = null;
    SAMFileWriter writerFiltered = null;
    try {
        SAMFileWriterFactory factory = pc.getSamFileWriterFactory(true);
        SAMFileHeader header = pc.getBasicSamHeader();
        writer = factory.makeSAMOrBAMWriter(header, false, working);
        writerFiltered = factory.makeSAMOrBAMWriter(header, false, workingFiltered);
        while (it.hasNext()) {
            IdsvVariantContext variant = IdsvVariantContext.create(pc, null, it.next());
            if (variant instanceof VariantContextDirectedBreakpoint) {
                VariantContextDirectedBreakpoint bp = (VariantContextDirectedBreakpoint) variant;
                if (bp.isFiltered()) {
                    writerFiltered.addAlignment(bp.asSamRecord(header));
                } else {
                    writer.addAlignment(bp.asSamRecord(header));
                }
            }
        }
        writer.close();
        writerFiltered.close();
        // Correct mate pairing since asSAMRecord() does not factor in mate anchor cigar
        new FixMate().fix(working, bam);
        new FixMate().fix(workingFiltered, bamFiltered);
    } finally {
        CloserUtil.close(writer);
        CloserUtil.close(writerFiltered);
        CloserUtil.close(it);
        CloserUtil.close(vcfReader);
        FileHelper.delete(working, true);
        FileHelper.delete(workingFiltered, true);
    }
}
Also used : SAMFileWriter(htsjdk.samtools.SAMFileWriter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 30 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project gridss by PapenfussLab.

the class MultipleSamFileCommandLineProgram method ensureSequenceDictionary.

public static void ensureSequenceDictionary(File fa) throws IOException {
    File output = new File(fa.getAbsolutePath() + ".dict");
    try (ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(fa)) {
        SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
        if (dictionary == null) {
            log.info("Attempting to generate missing sequence dictionary for ", fa);
            try {
                final SAMSequenceDictionary sequences = new CreateSequenceDictionary().makeSequenceDictionary(fa);
                final SAMFileHeader samHeader = new SAMFileHeader();
                samHeader.setSequenceDictionary(sequences);
                try (SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(samHeader, false, output)) {
                }
            } catch (Exception e) {
                log.error("Missing sequence dictionary for ", fa, " and creation of sequencing dictionary failed.", "Please run the Picard tools CreateSequenceDictionary utility to create", output);
                throw e;
            }
            log.info("Created sequence dictionary ", output);
        } else {
            log.debug("Found sequence dictionary for ", fa);
        }
    }
}
Also used : CreateSequenceDictionary(picard.sam.CreateSequenceDictionary) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) File(java.io.File) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ConfigurationException(org.apache.commons.configuration.ConfigurationException)

Aggregations

SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)33 SAMFileWriter (htsjdk.samtools.SAMFileWriter)26 File (java.io.File)26 SAMRecord (htsjdk.samtools.SAMRecord)22 SAMFileHeader (htsjdk.samtools.SAMFileHeader)20 SamReader (htsjdk.samtools.SamReader)17 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)14 IOException (java.io.IOException)14 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)8 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)7 ArrayList (java.util.ArrayList)7 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)5 Test (org.testng.annotations.Test)5 Interval (htsjdk.samtools.util.Interval)4 BufferedReader (java.io.BufferedReader)4 List (java.util.List)4 BAMIndex (htsjdk.samtools.BAMIndex)3 DefaultSAMRecordFactory (htsjdk.samtools.DefaultSAMRecordFactory)3 SAMProgramRecord (htsjdk.samtools.SAMProgramRecord)3