Search in sources :

Example 31 with SamReader

use of htsjdk.samtools.SamReader in project hmftools by hartwigmedical.

the class BamSlicerApplication method sliceFromURLs.

private static void sliceFromURLs(@NotNull final URL indexUrl, @NotNull final URL bamUrl, @NotNull final CommandLine cmd) throws IOException {
    final File indexFile = downloadIndex(indexUrl);
    indexFile.deleteOnExit();
    final SamReader reader = SamReaderFactory.makeDefault().open(SamInputResource.of(bamUrl).index(indexFile));
    final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));
    final BAMIndex bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false);
    final Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd);
    final Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd);
    final List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk);
    final SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks);
    queryIntervalsAndSpan.ifPresent(pair -> {
        LOGGER.info("Slicing bam on bed regions...");
        final CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray());
        writeToSlice(writer, bedIterator);
        LOGGER.info("Done writing bed slices.");
    });
    unmappedChunk.ifPresent(chunk -> {
        LOGGER.info("Slicing unmapped reads...");
        final CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped();
        writeToSlice(writer, unmappedIterator);
        LOGGER.info("Done writing unmapped reads.");
    });
    reader.close();
    writer.close();
    cachingReader.close();
}
Also used : SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) QueryInterval(htsjdk.samtools.QueryInterval) Chunk(htsjdk.samtools.Chunk) SamReader(htsjdk.samtools.SamReader) DiskBasedBAMFileIndex(htsjdk.samtools.DiskBasedBAMFileIndex) SAMRecord(htsjdk.samtools.SAMRecord) BAMIndex(htsjdk.samtools.BAMIndex) File(java.io.File) Pair(org.apache.commons.lang3.tuple.Pair)

Example 32 with SamReader

use of htsjdk.samtools.SamReader in project polyGembler by c-zhou.

the class SamFileExtract method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    final SamReader inputSam = factory.open(new File(bam_in));
    final SAMFileHeader header = inputSam.getFileHeader();
    final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
    final SAMFileHeader header_out = new SAMFileHeader();
    final SAMSequenceDictionary seqdic_out = new SAMSequenceDictionary();
    SAMRecordIterator iter = inputSam.iterator();
    File bed_file = new File(bed_in);
    final Set<String> extract = new HashSet<String>();
    try (BufferedReader br = new BufferedReader(new FileReader(bed_file))) {
        String line;
        while ((line = br.readLine()) != null) extract.add(line.split("\\s+")[0]);
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }
    header_out.setAttribute("VN", header.getAttribute("VN"));
    header_out.setAttribute("SO", header.getAttribute("SO"));
    List<SAMSequenceRecord> seqs = seqdic.getSequences();
    for (SAMSequenceRecord seq : seqs) if (extract.contains(seq.getSequenceName()))
        seqdic_out.addSequence(seq);
    header_out.setSequenceDictionary(seqdic_out);
    for (SAMReadGroupRecord rg : header.getReadGroups()) header_out.addReadGroup(rg);
    for (SAMProgramRecord pg : header.getProgramRecords()) header_out.addProgramRecord(pg);
    final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_out, true, new File(bam_out));
    while (iter.hasNext()) {
        SAMRecord rec = iter.next();
        if (extract.contains(rec.getReferenceName()))
            outputSam.addAlignment(rec);
    }
    iter.close();
    try {
        inputSam.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    outputSam.close();
    System.err.println(bam_in + " return true");
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 33 with SamReader

use of htsjdk.samtools.SamReader in project polyGembler by c-zhou.

the class SamFileFilter method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    File folder = new File(input_dir);
    File[] listOfFiles = folder.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.endsWith(".bam");
        }
    });
    int bam_n = listOfFiles.length;
    this.initial_thread_pool();
    for (int i = 0; i < bam_n; i++) {
        executor.submit(new Runnable() {

            private String bam_file;

            @Override
            public void run() {
                // TODO Auto-generated method stub
                try {
                    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
                    final SamReader inputSam = factory.open(new File(input_dir + "/" + bam_file));
                    final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(inputSam.getFileHeader(), true, new File(output_dir + "/" + bam_file));
                    final BufferedReader filter_br = Utils.getBufferedReader(filter_file);
                    SAMRecordIterator iter = inputSam.iterator();
                    String line = filter_br.readLine();
                    if (line == null) {
                        filter_br.close();
                        iter.close();
                        inputSam.close();
                        outputSam.close();
                        return;
                    }
                    String[] s = line.split("\\s+");
                    String chr = s[0];
                    long end_pos = Long.parseLong(s[1]);
                    SAMRecord samr;
                    while (iter.hasNext()) {
                        samr = iter.next();
                        if (samr.getReferenceName().equals(chr) && samr.getAlignmentEnd() < end_pos) {
                            outputSam.addAlignment(samr);
                            continue;
                        }
                        if (!samr.getReferenceName().equals(chr) || samr.getAlignmentStart() > end_pos) {
                            line = null;
                            while ((line = filter_br.readLine()) != null && end_pos < samr.getAlignmentStart()) {
                                s = line.split("\\s");
                                chr = s[0];
                                end_pos = Long.parseLong(s[1]);
                            }
                            if (line != null) {
                                if (samr.getReferenceName().equals(chr) && samr.getAlignmentEnd() < end_pos)
                                    outputSam.addAlignment(samr);
                            } else {
                                outputSam.addAlignment(samr);
                                while (iter.hasNext()) outputSam.addAlignment(iter.next());
                                break;
                            }
                        }
                    }
                    filter_br.close();
                    iter.close();
                    inputSam.close();
                    outputSam.close();
                } catch (Exception e) {
                    Thread t = Thread.currentThread();
                    t.getUncaughtExceptionHandler().uncaughtException(t, e);
                    e.printStackTrace();
                    executor.shutdown();
                    System.exit(1);
                }
            }

            public Runnable init(String bam_file) {
                // TODO Auto-generated method stub
                this.bam_file = bam_file;
                return this;
            }
        }.init(listOfFiles[i].getName()));
        throw new RuntimeException("incorrect tools!!!!");
    }
    this.waitFor();
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) FilenameFilter(java.io.FilenameFilter) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) File(java.io.File)

Example 34 with SamReader

use of htsjdk.samtools.SamReader in project polyGembler by c-zhou.

the class Consensus method parseDataLibrary.

private void parseDataLibrary(String[] args) {
    // TODO Auto-generated method stub
    final Map<Integer, String> bamLib = new HashMap<Integer, String>();
    final Map<Integer, Integer> insLib = new HashMap<Integer, Integer>();
    final Map<Integer, Integer> wLib = new HashMap<Integer, Integer>();
    for (int i = 0; i < args.length; i++) {
        String arg = args[i];
        if (arg.matches(bam_lib)) {
            Matcher m = bam_pat.matcher(arg);
            m.find();
            int lib = Integer.parseInt(m.group(1));
            bamLib.put(lib, args[++i]);
        }
        if (arg.matches(ins_lib)) {
            Matcher m = ins_pat.matcher(arg);
            m.find();
            int lib = Integer.parseInt(m.group(1));
            insLib.put(lib, Integer.parseInt(args[++i]));
        }
        if (arg.matches(w_lib)) {
            Matcher m = w_pat.matcher(arg);
            m.find();
            int lib = Integer.parseInt(m.group(1));
            wLib.put(lib, Integer.parseInt(args[++i]));
        }
    }
    int n = bamLib.size();
    if (wLib.size() != n) {
        printUsage();
        throw new IllegalArgumentException("Library parameters do not match!!!");
    }
    this.bam_list = new String[n];
    this.ins_thres = new int[n];
    this.link_w = new double[n];
    this.read_paired = new boolean[n];
    int i = 0;
    for (Integer key : bamLib.keySet()) {
        bam_list[i] = bamLib.get(key);
        final SamReader in1 = factory.open(new File(bam_list[i]));
        SAMRecordIterator iter1 = in1.iterator();
        SAMRecord record;
        while (iter1.hasNext()) {
            record = iter1.next();
            if (!record.getReadUnmappedFlag()) {
                if (record.getReadPairedFlag())
                    read_paired[i] = true;
                break;
            }
        }
        try {
            iter1.close();
            in1.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        if (!insLib.containsKey(key) && read_paired[i] || !wLib.containsKey(key)) {
            printUsage();
            throw new IllegalArgumentException("Library parameters do not match!!!");
        }
        ins_thres[i] = read_paired[i] ? insLib.get(key) : Integer.MAX_VALUE;
        link_w[i] = 1.0 / wLib.get(key);
        i++;
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) Matcher(java.util.regex.Matcher) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Example 35 with SamReader

use of htsjdk.samtools.SamReader in project polyGembler by c-zhou.

the class SamToTaxa method distributeSortedBam.

private void distributeSortedBam() {
    // TODO Auto-generated method stub
    long start = System.currentTimeMillis();
    try {
        File indexFile = new File(myIndexFileName);
        myLogger.info("Using the following index file:");
        myLogger.info(indexFile.getAbsolutePath());
        File samFile = new File(mySamFileName);
        myLogger.info("Using the following BAM file:");
        myLogger.info(samFile.getAbsolutePath());
        final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
        final SamReader inputSam = factory.open(samFile);
        header_sam = inputSam.getFileHeader();
        header_sam.setSortOrder(SortOrder.unsorted);
        indexReader = Utils.getBufferedReader(indexFile, 65536);
        String line = indexReader.readLine();
        String[] samples = line.split("\\s+");
        for (int i = 1; i < samples.length; i++) {
            String taxa = samples[i];
            SAMFileHeader header = header_sam.clone();
            SAMReadGroupRecord rg = new SAMReadGroupRecord(taxa);
            rg.setSample(taxa);
            header.addReadGroup(rg);
            bam_writers.put(taxa, new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, new File(myOutputDir + System.getProperty("file.separator") + taxa + ".bam")));
            locks.put(taxa, new Object());
        }
        SAMRecordIterator iter = inputSam.iterator();
        cache();
        this.initial_thread_pool();
        final int block = 10000;
        int bS = 0;
        SAMRecord[] Qs = new SAMRecord[block];
        SAMRecord temp = iter.next();
        int k = 0;
        allReads = 0;
        long tag;
        while (temp != null) {
            allReads++;
            Qs[k] = temp;
            temp = iter.hasNext() ? iter.next() : null;
            k++;
            tag = temp == null ? 0 : Long.parseLong(temp.getReadName());
            if (k == block || temp == null || tag >= cursor) {
                executor.submit(new Runnable() {

                    private SAMRecord[] sam;

                    private int block_i;

                    @Override
                    public void run() {
                        // TODO Auto-generated method stub
                        long tag;
                        Tag tagObj;
                        String taxa;
                        for (int i = 0; i < sam.length; i++) {
                            try {
                                if (sam[i] == null)
                                    break;
                                tag = Long.parseLong(sam[i].getReadName());
                                tagObj = indexMap.get(tag);
                                int l = tagObj.taxa.length;
                                long cov = 0;
                                for (int t = 0; t < l; t++) cov += tagObj.count[t];
                                if (cov > maxCov) {
                                    myLogger.info("?tag id, " + tag + "::tag sequence, " + sam[i].getReadString() + "::aligned to " + sam[i].getReferenceName() + ":" + sam[i].getAlignmentStart() + "-" + sam[i].getAlignmentEnd() + "::omitted due to abnormal high coverage, " + cov);
                                    continue;
                                }
                                for (int t = 0; t < l; t++) {
                                    taxa = tagObj.taxa[t];
                                    sam[i].setAttribute("RG", taxa);
                                    int p = tagObj.count[t];
                                    synchronized (locks.get(taxa)) {
                                        for (int r = 0; r < p; r++) bam_writers.get(taxa).addAlignment(sam[i]);
                                    }
                                }
                            } catch (Exception e) {
                                Thread t = Thread.currentThread();
                                t.getUncaughtExceptionHandler().uncaughtException(t, e);
                                e.printStackTrace();
                                executor.shutdown();
                                System.exit(1);
                            }
                        }
                        if (block * (block_i + 1) % 1000000 == 0)
                            myLogger.info("Tag processed: " + block * (block_i + 1));
                    }

                    public Runnable init(SAMRecord[] sam, int bS) {
                        this.sam = sam;
                        this.block_i = bS;
                        return (this);
                    }
                }.init(Qs, bS));
                k = 0;
                Qs = new SAMRecord[block];
                bS++;
                if (temp == null || tag >= cursor) {
                    this.waitFor();
                    if (temp != null) {
                        cache();
                        this.initial_thread_pool();
                    }
                }
            }
        }
        iter.close();
        inputSam.close();
        indexReader.close();
        // executor.awaitTermination(365, TimeUnit.DAYS);
        for (String key : bam_writers.keySet()) bam_writers.get(key).close();
        myLogger.info("Total number of reads in lane=" + allReads);
        myLogger.info("Process took " + (System.currentTimeMillis() - start) / 1000 + " seconds.");
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Aggregations

SamReader (htsjdk.samtools.SamReader)211 SAMRecord (htsjdk.samtools.SAMRecord)137 File (java.io.File)111 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)89 SAMFileHeader (htsjdk.samtools.SAMFileHeader)83 IOException (java.io.IOException)71 SamReaderFactory (htsjdk.samtools.SamReaderFactory)65 ArrayList (java.util.ArrayList)63 SAMFileWriter (htsjdk.samtools.SAMFileWriter)58 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)44 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)42 List (java.util.List)39 CigarElement (htsjdk.samtools.CigarElement)32 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)32 HashMap (java.util.HashMap)31 Cigar (htsjdk.samtools.Cigar)30 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)30 PrintWriter (java.io.PrintWriter)27 Interval (htsjdk.samtools.util.Interval)26 HashSet (java.util.HashSet)26