Search in sources :

Example 41 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project polyGembler by c-zhou.

the class TenXSamtools method runSort.

private void runSort() {
    // TODO Auto-generated method stub
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    final SamReader inputSam = factory.open(new File(this.bam_in));
    final SAMFileHeader sort_header = inputSam.getFileHeader();
    switch(this.sort_order) {
        case coordinate:
            sort_header.setSortOrder(SortOrder.coordinate);
            break;
        case queryname:
            sort_header.setSortOrder(SortOrder.queryname);
            break;
        case barcode:
            sort_header.setSortOrder(SortOrder.unknown);
            break;
    }
    SAMRecordIterator iter = inputSam.iterator();
    long record_inCount = 0;
    SAMRecord[] buff = new SAMRecord[this.batch_size];
    int k = 0;
    SAMRecord temp = iter.hasNext() ? iter.next() : null;
    this.initial_thread_pool();
    while (temp != null) {
        buff[k++] = temp;
        record_inCount++;
        temp = iter.hasNext() ? iter.next() : null;
        if (k == this.batch_size || temp == null) {
            executor.submit(new Runnable() {

                private SAMRecord[] records;

                @Override
                public void run() {
                    // TODO Auto-generated method stub
                    try {
                        Arrays.sort(records, comprator);
                        final SAMFileWriter outputSam;
                        synchronized (lock) {
                            outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(bam_out + String.format("%08d", batch++)));
                        }
                        int count = 0;
                        for (SAMRecord record : records) {
                            if (record != null) {
                                count++;
                                outputSam.addAlignment(record);
                            }
                        }
                        outputSam.close();
                        synchronized (lock) {
                            record_count += count;
                        }
                        myLogger.info("[" + Thread.currentThread().getName() + "] " + record_count + " records processed.");
                    } catch (Exception e) {
                        Thread t = Thread.currentThread();
                        t.getUncaughtExceptionHandler().uncaughtException(t, e);
                        e.printStackTrace();
                        executor.shutdown();
                        System.exit(1);
                    }
                }

                public Runnable init(SAMRecord[] buff) {
                    // TODO Auto-generated method stub
                    this.records = buff;
                    return (this);
                }
            }.init(buff));
            k = 0;
            buff = new SAMRecord[this.batch_size];
        }
    }
    iter.close();
    try {
        inputSam.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    myLogger.info(record_inCount + " records read from " + this.bam_in);
    this.waitFor();
    // merge all batches
    myLogger.info("Merge " + batch + " files.");
    final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(this.bam_out));
    final SamReader[] batchSam = new SamReader[batch];
    final SAMRecordIterator[] iterSam = new SAMRecordIterator[batch];
    final boolean[] reachFileEnd = new boolean[batch];
    final TreeMap<SAMRecord, Integer> treeMap = new TreeMap<SAMRecord, Integer>(this.comprator);
    for (int i = 0; i != batch; i++) {
        batchSam[i] = factory.open(new File(this.bam_out + String.format("%08d", i)));
        iterSam[i] = batchSam[i].iterator();
        if (iterSam[i].hasNext())
            treeMap.put(iterSam[i].next(), i);
        reachFileEnd[i] = !iterSam[i].hasNext();
    }
    Entry<SAMRecord, Integer> firstEntry;
    int bch, nReachFileEnd = 0;
    for (boolean b : reachFileEnd) if (b)
        nReachFileEnd++;
    long record_outCount = 0;
    while (!treeMap.isEmpty()) {
        firstEntry = treeMap.pollFirstEntry();
        outputSam.addAlignment(firstEntry.getKey());
        record_outCount++;
        bch = firstEntry.getValue();
        if (!reachFileEnd[bch]) {
            treeMap.put(iterSam[bch].next(), bch);
            if (!iterSam[bch].hasNext()) {
                reachFileEnd[bch] = true;
                nReachFileEnd++;
            }
        }
        if (treeMap.isEmpty() && nReachFileEnd != batch) {
            for (int i = 0; i != batch; i++) {
                if (!reachFileEnd[i]) {
                    treeMap.put(iterSam[i].next(), i);
                    if (!iterSam[i].hasNext()) {
                        reachFileEnd[i] = true;
                        nReachFileEnd++;
                    }
                }
            }
        }
    }
    try {
        outputSam.close();
        for (int i = 0; i != batch; i++) {
            iterSam[i].close();
            batchSam[i].close();
            new File(this.bam_out + String.format("%08d", i)).delete();
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    myLogger.info(record_outCount + " records written to " + this.bam_out);
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) TreeMap(java.util.TreeMap) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 42 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project polyGembler by c-zhou.

the class AssemblyGraph method validateAssemblyGraph.

private static void validateAssemblyGraph(String query_file, String bam_in) {
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    qry_seqs = Sequence.parseFastaFileWithRevCmpAsMap(query_file);
    final SamReader in1 = factory.open(new File(bam_in));
    SAMRecordIterator iter1 = in1.iterator();
    final List<SAMRecord> buffered_records = new ArrayList<SAMRecord>();
    SAMRecord tmp_record1 = iter1.hasNext() ? iter1.next() : null;
    String record_id = tmp_record1.getReadName();
    SAMRecord r1, r2;
    int i1, i2;
    StringBuilder oos = new StringBuilder();
    String source, sink;
    final BFSShortestPath<Integer, DefaultWeightedEdge> bfs = new BFSShortestPath<Integer, DefaultWeightedEdge>(assembly_graph);
    while (tmp_record1 != null) {
        buffered_records.clear();
        buffered_records.add(tmp_record1);
        while ((tmp_record1 = iter1.hasNext() ? iter1.next() : null) != null && tmp_record1.getReadName().equals(record_id)) {
            buffered_records.add(tmp_record1);
        }
        if (buffered_records.size() == 1) {
            if (tmp_record1 != null)
                record_id = tmp_record1.getReadName();
            continue;
        }
        // sort records by head clipping size
        buffered_records.sort(new Comparator<SAMRecord>() {

            @Override
            public int compare(SAMRecord r0, SAMRecord r1) {
                // TODO Auto-generated method stub
                return Integer.compare(head_clip(r0), head_clip(r1));
            }

            private int head_clip(SAMRecord r) {
                CigarElement cigar = r.getReadNegativeStrandFlag() ? r.getCigar().getLastCigarElement() : r.getCigar().getFirstCigarElement();
                return cigar.getOperator().isClipping() ? cigar.getLength() : 0;
            }
        });
        // calculate distance for adjacent alignment record pairs
        r1 = buffered_records.get(0);
        // for writing
        oos.setLength(0);
        oos.append(record_id);
        oos.append("[");
        oos.append(buffered_records.size());
        oos.append(", ");
        oos.append(readLength(r1));
        // find reference index
        source = r1.getReferenceName() + (r1.getReadNegativeStrandFlag() ? "'" : "");
        i1 = contig_coordinate.getKey(source);
        oos.append(", ");
        oos.append(source);
        oos.append("] ");
        for (int i = 1; i < buffered_records.size(); i++) {
            r2 = buffered_records.get(i);
            sink = r2.getReferenceName() + (r2.getReadNegativeStrandFlag() ? "'" : "");
            i2 = contig_coordinate.getKey(sink);
            oos.append(", (");
            GraphPath<Integer, DefaultWeightedEdge> e = bfs.getPath(i1, i2);
            oos.append(sink);
            oos.append(",");
            oos.append(e == null ? "Inf" : e.getLength());
            oos.append(",");
            oos.append(pathLength(e));
            oos.append(")");
            i1 = i2;
        }
        oos.append("\n");
        System.out.println(oos.toString());
        if (tmp_record1 != null)
            record_id = tmp_record1.getReadName();
    }
    try {
        iter1.close();
        in1.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) IOException(java.io.IOException) CigarElement(htsjdk.samtools.CigarElement) DefaultWeightedEdge(org.jgrapht.graph.DefaultWeightedEdge) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) BFSShortestPath(cz1.ngs.model.BFSShortestPath) File(java.io.File)

Example 43 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project gridss by PapenfussLab.

the class ReadsToBedpe method doWork.

@Override
protected int doWork() {
    log.debug("Setting language-neutral locale");
    java.util.Locale.setDefault(Locale.ROOT);
    validateParameters();
    SamReaderFactory readerFactory = SamReaderFactory.make();
    try {
        try (SamReader reader = readerFactory.open(INPUT)) {
            SAMFileHeader header = reader.getFileHeader();
            SAMSequenceDictionary dict = header.getSequenceDictionary();
            // ExecutorService threadpool = Executors.newFixedThreadPool(WORKER_THREADS, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("Worker-%d").build());
            try (CloseableIterator<SAMRecord> rawit = new AsyncBufferedIterator<SAMRecord>(reader.iterator(), 3, 64)) {
                ProgressLoggingSAMRecordIterator logit = new ProgressLoggingSAMRecordIterator(rawit, new ProgressLogger(log));
                // ParallelTransformIterator<SAMRecord, List<String>> it = new ParallelTransformIterator<>(logit, r -> asBedPe(dict, r), 16 + 2 * WORKER_THREADS, threadpool);
                Iterator<List<String>> it = Iterators.transform(logit, r -> asBedPe(dict, r));
                int i = 0;
                try (BufferedWriter writer = new BufferedWriter(new FileWriter(OUTPUT))) {
                    while (it.hasNext()) {
                        for (String line : it.next()) {
                            if (line != null) {
                                writer.write(line);
                                writer.write('\n');
                            }
                        }
                        i++;
                    }
                    if (i % 1000 == 0) {
                        writer.flush();
                    }
                }
            }
        }
    } catch (IOException e) {
        log.error(e);
        return -1;
    }
    return 0;
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) FileWriter(java.io.FileWriter) AsyncBufferedIterator(au.edu.wehi.idsv.util.AsyncBufferedIterator) ProgressLogger(htsjdk.samtools.util.ProgressLogger) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DirectedBreakpoint(au.edu.wehi.idsv.DirectedBreakpoint) BufferedWriter(java.io.BufferedWriter) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) List(java.util.List) ProgressLoggingSAMRecordIterator(au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 44 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project gridss by PapenfussLab.

the class SoftClipsToSplitReads method doWork.

@Override
protected int doWork() {
    log.debug("Setting language-neutral locale");
    java.util.Locale.setDefault(Locale.ROOT);
    validateParameters();
    GenomicProcessingContext pc = new GenomicProcessingContext(getFileSystemContext(), REFERENCE_SEQUENCE, getReference());
    pc.setCommandLineProgram(this);
    pc.setFilterDuplicates(IGNORE_DUPLICATES);
    SplitReadRealigner realigner = new SplitReadRealigner(pc);
    realigner.setMinSoftClipLength(MIN_CLIP_LENGTH);
    realigner.setMinSoftClipQuality(MIN_CLIP_QUAL);
    realigner.setProcessSecondaryAlignments(PROCESS_SECONDARY_ALIGNMENTS);
    realigner.setWorkerThreads(WORKER_THREADS);
    try {
        SamReaderFactory readerFactory = SamReaderFactory.make();
        SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
        if (ALIGNER_STREAMING) {
            ExternalProcessStreamingAligner aligner = new ExternalProcessStreamingAligner(readerFactory, ALIGNER_COMMAND_LINE, REFERENCE_SEQUENCE, WORKER_THREADS);
            realigner.createSupplementaryAlignments(aligner, INPUT, OUTPUT);
        } else {
            ExternalProcessFastqAligner aligner = new ExternalProcessFastqAligner(readerFactory, writerFactory, ALIGNER_COMMAND_LINE);
            realigner.createSupplementaryAlignments(aligner, INPUT, OUTPUT);
        }
    } catch (IOException e) {
        log.error(e);
        return -1;
    }
    return 0;
}
Also used : ExternalProcessStreamingAligner(au.edu.wehi.idsv.alignment.ExternalProcessStreamingAligner) SplitReadRealigner(au.edu.wehi.idsv.SplitReadRealigner) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) ExternalProcessFastqAligner(au.edu.wehi.idsv.alignment.ExternalProcessFastqAligner) GenomicProcessingContext(au.edu.wehi.idsv.GenomicProcessingContext)

Example 45 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project gridss by PapenfussLab.

the class SubsetToMissing method doWork.

@Override
protected int doWork() {
    long stop = Long.MAX_VALUE;
    if (STOP_AFTER != null && (long) STOP_AFTER > 0) {
        stop = STOP_AFTER;
    }
    log.debug("Setting language-neutral locale");
    java.util.Locale.setDefault(Locale.ROOT);
    if (TMP_DIR == null || TMP_DIR.size() == 0) {
        TMP_DIR = Lists.newArrayList(new File("."));
    }
    SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
    SamReader input = factory.open(INPUT);
    Iterator<SAMRecord> intputit = new AsyncBufferedIterator<SAMRecord>(input.iterator(), 2, 16384);
    SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(input.getFileHeader(), true, OUTPUT);
    LongSet hashtable;
    if (PREALLOCATE != null) {
        log.info("Preallocating hash table");
        hashtable = new LongOpenHashBigSet(PREALLOCATE);
    } else {
        hashtable = new LongOpenHashBigSet();
    }
    for (File file : LOOKUP) {
        log.info("Loading lookup hashes for " + file.getAbsolutePath());
        SamReader lookup = factory.open(file);
        AsyncBufferedIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(lookup.iterator(), 2, 16384);
        File cache = new File(file.getAbsolutePath() + ".SubsetToMissing.cache");
        if (cache.exists()) {
            log.info("Loading lookup hashes from cache");
            long n = stop;
            DataInputStream dis = null;
            try {
                long loadCount = 0;
                dis = new DataInputStream(new BufferedInputStream(new FileInputStream(cache)));
                while (n-- > 0) {
                    hashtable.add(dis.readLong());
                    if (loadCount % 10000000 == 0) {
                        log.info(String.format("Loaded %d from cache", loadCount));
                    }
                }
            } catch (EOFException e) {
                try {
                    if (dis != null)
                        dis.close();
                } catch (IOException e1) {
                    log.error(e1);
                }
            } catch (IOException e) {
                log.error(e);
            }
        } else {
            long n = stop;
            ProgressLoggingSAMRecordIterator loggedit = new ProgressLoggingSAMRecordIterator(it, new ProgressLogger(log));
            try {
                DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(cache)));
                while (loggedit.hasNext() && n-- > 0) {
                    long recordhash = hash(loggedit.next());
                    hashtable.add(recordhash);
                    dos.writeLong(recordhash);
                }
                dos.close();
            } catch (Exception e) {
                log.error(e, "Failed to load lookup. Running with partial results");
            }
            loggedit.close();
        }
        it.close();
    }
    long filtered = 0;
    log.info("Processing input");
    intputit = new ProgressLoggingSAMRecordIterator(intputit, new ProgressLogger(log));
    long n = stop;
    while (intputit.hasNext() && n-- > 0) {
        SAMRecord r = intputit.next();
        if (!hashtable.contains(hash(r))) {
            out.addAlignment(r);
        } else {
            filtered++;
            if (filtered % 1000000 == 0) {
                log.info(String.format("Filtered %d reads", filtered));
            }
        }
    }
    log.info("Closing output");
    out.close();
    return 0;
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) DataOutputStream(java.io.DataOutputStream) LongSet(it.unimi.dsi.fastutil.longs.LongSet) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ProgressLogger(htsjdk.samtools.util.ProgressLogger) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) EOFException(java.io.EOFException) SamReader(htsjdk.samtools.SamReader) LongOpenHashBigSet(it.unimi.dsi.fastutil.longs.LongOpenHashBigSet) BufferedInputStream(java.io.BufferedInputStream) SAMRecord(htsjdk.samtools.SAMRecord) FileOutputStream(java.io.FileOutputStream) EOFException(java.io.EOFException) ProgressLoggingSAMRecordIterator(au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream)

Aggregations

SamReaderFactory (htsjdk.samtools.SamReaderFactory)57 SamReader (htsjdk.samtools.SamReader)51 File (java.io.File)43 SAMRecord (htsjdk.samtools.SAMRecord)27 IOException (java.io.IOException)26 SAMFileHeader (htsjdk.samtools.SAMFileHeader)18 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)17 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)17 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)14 ArrayList (java.util.ArrayList)13 List (java.util.List)11 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)10 HashSet (java.util.HashSet)10 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)9 SAMFileWriter (htsjdk.samtools.SAMFileWriter)8 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)8 URL (java.net.URL)8 HashMap (java.util.HashMap)8 BufferedReader (java.io.BufferedReader)7 PrintWriter (java.io.PrintWriter)7