Search in sources :

Example 46 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project polyGembler by c-zhou.

the class SamFileSplit method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    Utils.makeOutputDir(bam_out);
    final File[] beds = new File(bed_in).listFiles();
    final String[] out_prefix = new String[beds.length];
    for (int i = 0; i < beds.length; i++) {
        out_prefix[i] = bam_out + "/" + beds[i].getName().replaceAll(".bed$", "");
        Utils.makeOutputDir(out_prefix[i]);
    }
    final File[] bams = new File(bam_in).listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.endsWith(".bam");
        }
    });
    this.initial_thread_pool();
    for (File bam : bams) {
        executor.submit(new Runnable() {

            private File bam;

            @Override
            public void run() {
                // TODO Auto-generated method stub
                try {
                    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
                    final SamReader inputSam = factory.open(bam);
                    final SAMFileHeader header = inputSam.getFileHeader();
                    final SAMRecordIterator iter = inputSam.iterator();
                    final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
                    final SAMFileWriter[] outputSam = new SAMFileWriter[beds.length];
                    final SAMSequenceDictionary[] seqdics = new SAMSequenceDictionary[beds.length];
                    final Map<String, Integer> outMap = new HashMap<String, Integer>();
                    final String out = bam.getName();
                    for (int i = 0; i < beds.length; i++) {
                        Set<String> bed_seq = new HashSet<String>();
                        String tmp;
                        BufferedReader br = new BufferedReader(new FileReader(beds[i]));
                        String line;
                        while ((line = br.readLine()) != null) {
                            tmp = line.split("\\s+")[0];
                            bed_seq.add(tmp);
                            outMap.put(tmp, i);
                        }
                        br.close();
                        final SAMFileHeader header_i = new SAMFileHeader();
                        final SAMSequenceDictionary seqdic_i = new SAMSequenceDictionary();
                        header_i.setAttribute("VN", header.getAttribute("VN"));
                        header_i.setAttribute("SO", header.getAttribute("SO"));
                        List<SAMSequenceRecord> seqs = seqdic.getSequences();
                        for (SAMSequenceRecord seq : seqs) if (bed_seq.contains(seq.getSequenceName()))
                            seqdic_i.addSequence(seq);
                        header_i.setSequenceDictionary(seqdic_i);
                        for (SAMReadGroupRecord rg : header.getReadGroups()) header_i.addReadGroup(rg);
                        for (SAMProgramRecord pg : header.getProgramRecords()) header_i.addProgramRecord(pg);
                        outputSam[i] = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_i, true, new File(out_prefix[i] + "/" + out));
                        seqdics[i] = seqdic_i;
                    }
                    Set<String> refs = outMap.keySet();
                    String ref;
                    int f;
                    while (iter.hasNext()) {
                        SAMRecord rec = iter.next();
                        if (refs.contains(ref = rec.getReferenceName())) {
                            f = outMap.get(ref);
                            rec.setReferenceIndex(seqdics[f].getSequenceIndex(ref));
                            outputSam[f].addAlignment(rec);
                        }
                    }
                    iter.close();
                    inputSam.close();
                    for (int i = 0; i < outputSam.length; i++) outputSam[i].close();
                    myLogger.info(out + " return true");
                } catch (Exception e) {
                    Thread t = Thread.currentThread();
                    t.getUncaughtExceptionHandler().uncaughtException(t, e);
                    e.printStackTrace();
                    executor.shutdown();
                    System.exit(1);
                }
            }

            public Runnable init(File bam) {
                this.bam = bam;
                return (this);
            }
        }.init(bam));
    }
    this.waitFor();
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) FilenameFilter(java.io.FilenameFilter) SamReader(htsjdk.samtools.SamReader) FileReader(java.io.FileReader) HashSet(java.util.HashSet) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 47 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project polyGembler by c-zhou.

the class TenXSamtools method runSort.

private void runSort() {
    // TODO Auto-generated method stub
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    final SamReader inputSam = factory.open(new File(this.bam_in));
    final SAMFileHeader sort_header = inputSam.getFileHeader();
    switch(this.sort_order) {
        case coordinate:
            sort_header.setSortOrder(SortOrder.coordinate);
            break;
        case queryname:
            sort_header.setSortOrder(SortOrder.queryname);
            break;
        case barcode:
            sort_header.setSortOrder(SortOrder.unknown);
            break;
    }
    SAMRecordIterator iter = inputSam.iterator();
    long record_inCount = 0;
    SAMRecord[] buff = new SAMRecord[this.batch_size];
    int k = 0;
    SAMRecord temp = iter.hasNext() ? iter.next() : null;
    this.initial_thread_pool();
    while (temp != null) {
        buff[k++] = temp;
        record_inCount++;
        temp = iter.hasNext() ? iter.next() : null;
        if (k == this.batch_size || temp == null) {
            executor.submit(new Runnable() {

                private SAMRecord[] records;

                @Override
                public void run() {
                    // TODO Auto-generated method stub
                    try {
                        Arrays.sort(records, comprator);
                        final SAMFileWriter outputSam;
                        synchronized (lock) {
                            outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(bam_out + String.format("%08d", batch++)));
                        }
                        int count = 0;
                        for (SAMRecord record : records) {
                            if (record != null) {
                                count++;
                                outputSam.addAlignment(record);
                            }
                        }
                        outputSam.close();
                        synchronized (lock) {
                            record_count += count;
                        }
                        myLogger.info("[" + Thread.currentThread().getName() + "] " + record_count + " records processed.");
                    } catch (Exception e) {
                        Thread t = Thread.currentThread();
                        t.getUncaughtExceptionHandler().uncaughtException(t, e);
                        e.printStackTrace();
                        executor.shutdown();
                        System.exit(1);
                    }
                }

                public Runnable init(SAMRecord[] buff) {
                    // TODO Auto-generated method stub
                    this.records = buff;
                    return (this);
                }
            }.init(buff));
            k = 0;
            buff = new SAMRecord[this.batch_size];
        }
    }
    iter.close();
    try {
        inputSam.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    myLogger.info(record_inCount + " records read from " + this.bam_in);
    this.waitFor();
    // merge all batches
    myLogger.info("Merge " + batch + " files.");
    final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(this.bam_out));
    final SamReader[] batchSam = new SamReader[batch];
    final SAMRecordIterator[] iterSam = new SAMRecordIterator[batch];
    final boolean[] reachFileEnd = new boolean[batch];
    final TreeMap<SAMRecord, Integer> treeMap = new TreeMap<SAMRecord, Integer>(this.comprator);
    for (int i = 0; i != batch; i++) {
        batchSam[i] = factory.open(new File(this.bam_out + String.format("%08d", i)));
        iterSam[i] = batchSam[i].iterator();
        if (iterSam[i].hasNext())
            treeMap.put(iterSam[i].next(), i);
        reachFileEnd[i] = !iterSam[i].hasNext();
    }
    Entry<SAMRecord, Integer> firstEntry;
    int bch, nReachFileEnd = 0;
    for (boolean b : reachFileEnd) if (b)
        nReachFileEnd++;
    long record_outCount = 0;
    while (!treeMap.isEmpty()) {
        firstEntry = treeMap.pollFirstEntry();
        outputSam.addAlignment(firstEntry.getKey());
        record_outCount++;
        bch = firstEntry.getValue();
        if (!reachFileEnd[bch]) {
            treeMap.put(iterSam[bch].next(), bch);
            if (!iterSam[bch].hasNext()) {
                reachFileEnd[bch] = true;
                nReachFileEnd++;
            }
        }
        if (treeMap.isEmpty() && nReachFileEnd != batch) {
            for (int i = 0; i != batch; i++) {
                if (!reachFileEnd[i]) {
                    treeMap.put(iterSam[i].next(), i);
                    if (!iterSam[i].hasNext()) {
                        reachFileEnd[i] = true;
                        nReachFileEnd++;
                    }
                }
            }
        }
    }
    try {
        outputSam.close();
        for (int i = 0; i != batch; i++) {
            iterSam[i].close();
            batchSam[i].close();
            new File(this.bam_out + String.format("%08d", i)).delete();
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    myLogger.info(record_outCount + " records written to " + this.bam_out);
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) TreeMap(java.util.TreeMap) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 48 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.

the class IntermediateFilesTest method createBAM.

public void createBAM(File file, SAMFileHeader header, SAMRecord... data) {
    SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeSAMOrBAMWriter(header, true, file);
    if (header.getSortOrder() == SortOrder.coordinate) {
        SortingCollection<SAMRecord> presort = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordCoordinateComparator(), 100000000, testFolder.getRoot());
        for (SAMRecord r : data) {
            presort.add(r);
        }
        presort.doneAdding();
        for (SAMRecord r : presort) {
            writer.addAlignment(r);
        }
    } else {
        for (SAMRecord r : data) {
            writer.addAlignment(r);
        }
    }
    writer.close();
}
Also used : SAMRecordCoordinateComparator(htsjdk.samtools.SAMRecordCoordinateComparator) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory)

Example 49 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.

the class SplitReadRealigner method mergeSupplementaryAlignment.

private void mergeSupplementaryAlignment(File input, List<File> aligned, File output) throws IOException {
    log.info("Merging split read alignments for ", output);
    File suppMerged = FileSystemContext.getWorkingFileFor(output, "gridss.tmp.SplitReadAligner.sa.");
    File tmpoutput = FileSystemContext.getWorkingFileFor(output);
    tmpFiles.add(suppMerged);
    tmpFiles.add(tmpoutput);
    List<SamReader> suppReaders = new ArrayList<>();
    List<PeekingIterator<SAMRecord>> suppIt = new ArrayList<>();
    SAMFileHeader header;
    try (SamReader reader = readerFactory.open(input)) {
        header = reader.getFileHeader();
        for (File sf : aligned) {
            SamReader suppReader = readerFactory.open(sf);
            suppReaders.add(suppReader);
            suppIt.add(new AsyncBufferedIterator<>(new NmTagIterator(suppReader.iterator(), pc.getReference()), sf.getName()));
        }
        try (SAMFileWriter inputWriter = writerFactory.makeSAMOrBAMWriter(header, true, tmpoutput)) {
            try (SAMFileWriter suppWriter = writerFactory.makeSAMOrBAMWriter(header, false, suppMerged)) {
                try (AsyncBufferedIterator<SAMRecord> bufferedIt = new AsyncBufferedIterator<>(new NmTagIterator(reader.iterator(), pc.getReference()), input.getName())) {
                    mergeSupplementaryAlignment(bufferedIt, suppIt, inputWriter, suppWriter);
                }
            }
        }
    } finally {
        for (Iterator<SAMRecord> it : suppIt) {
            CloserUtil.close(it);
        }
        for (SamReader sr : suppReaders) {
            sr.close();
        }
    }
    if (header.getSortOrder() != null && header.getSortOrder() != SortOrder.unsorted) {
        File suppMergedsorted = FileSystemContext.getWorkingFileFor(output, "gridss.tmp.SplitReadAligner.sorted.sa.");
        tmpFiles.add(suppMergedsorted);
        SAMFileUtil.sort(pc.getFileSystemContext(), suppMerged, suppMergedsorted, header.getSortOrder());
        FileHelper.move(suppMergedsorted, suppMerged, true);
    }
    SAMFileUtil.merge(ImmutableList.of(tmpoutput, suppMerged), output);
}
Also used : NmTagIterator(au.edu.wehi.idsv.sam.NmTagIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) AsyncBufferedIterator(au.edu.wehi.idsv.util.AsyncBufferedIterator) PeekingIterator(com.google.common.collect.PeekingIterator) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 50 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.

the class VcfBreakendToReadPair method writeVisualisationBam.

public void writeVisualisationBam(GenomicProcessingContext pc, File vcf, File bam, File bamFiltered) throws IOException {
    File working = FileSystemContext.getWorkingFileFor(bam);
    File workingFiltered = FileSystemContext.getWorkingFileFor(bamFiltered);
    VCFFileReader vcfReader = new VCFFileReader(vcf, false);
    CloseableIterator<VariantContext> it = vcfReader.iterator();
    SAMFileWriter writer = null;
    SAMFileWriter writerFiltered = null;
    try {
        SAMFileWriterFactory factory = pc.getSamFileWriterFactory(true);
        SAMFileHeader header = pc.getBasicSamHeader();
        writer = factory.makeSAMOrBAMWriter(header, false, working);
        writerFiltered = factory.makeSAMOrBAMWriter(header, false, workingFiltered);
        while (it.hasNext()) {
            IdsvVariantContext variant = IdsvVariantContext.create(pc, null, it.next());
            if (variant instanceof VariantContextDirectedBreakpoint) {
                VariantContextDirectedBreakpoint bp = (VariantContextDirectedBreakpoint) variant;
                if (bp.isFiltered()) {
                    writerFiltered.addAlignment(bp.asSamRecord(header));
                } else {
                    writer.addAlignment(bp.asSamRecord(header));
                }
            }
        }
        writer.close();
        writerFiltered.close();
        // Correct mate pairing since asSAMRecord() does not factor in mate anchor cigar
        new FixMate().fix(working, bam);
        new FixMate().fix(workingFiltered, bamFiltered);
    } finally {
        CloserUtil.close(writer);
        CloserUtil.close(writerFiltered);
        CloserUtil.close(it);
        CloserUtil.close(vcfReader);
        FileHelper.delete(working, true);
        FileHelper.delete(workingFiltered, true);
    }
}
Also used : SAMFileWriter(htsjdk.samtools.SAMFileWriter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Aggregations

SAMFileWriter (htsjdk.samtools.SAMFileWriter)76 SAMRecord (htsjdk.samtools.SAMRecord)63 SAMFileHeader (htsjdk.samtools.SAMFileHeader)55 SamReader (htsjdk.samtools.SamReader)55 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)46 File (java.io.File)40 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)27 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)25 IOException (java.io.IOException)22 ArrayList (java.util.ArrayList)20 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Cigar (htsjdk.samtools.Cigar)13 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)13 CigarElement (htsjdk.samtools.CigarElement)12 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 Interval (htsjdk.samtools.util.Interval)9 PrintWriter (java.io.PrintWriter)9 List (java.util.List)9 HashMap (java.util.HashMap)8