Search in sources :

Example 6 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class SamFileFilter method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    File folder = new File(input_dir);
    File[] listOfFiles = folder.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.endsWith(".bam");
        }
    });
    int bam_n = listOfFiles.length;
    this.initial_thread_pool();
    for (int i = 0; i < bam_n; i++) {
        executor.submit(new Runnable() {

            private String bam_file;

            @Override
            public void run() {
                // TODO Auto-generated method stub
                try {
                    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
                    final SamReader inputSam = factory.open(new File(input_dir + "/" + bam_file));
                    final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(inputSam.getFileHeader(), true, new File(output_dir + "/" + bam_file));
                    final BufferedReader filter_br = Utils.getBufferedReader(filter_file);
                    SAMRecordIterator iter = inputSam.iterator();
                    String line = filter_br.readLine();
                    if (line == null) {
                        filter_br.close();
                        iter.close();
                        inputSam.close();
                        outputSam.close();
                        return;
                    }
                    String[] s = line.split("\\s+");
                    String chr = s[0];
                    long end_pos = Long.parseLong(s[1]);
                    SAMRecord samr;
                    while (iter.hasNext()) {
                        samr = iter.next();
                        if (samr.getReferenceName().equals(chr) && samr.getAlignmentEnd() < end_pos) {
                            outputSam.addAlignment(samr);
                            continue;
                        }
                        if (!samr.getReferenceName().equals(chr) || samr.getAlignmentStart() > end_pos) {
                            line = null;
                            while ((line = filter_br.readLine()) != null && end_pos < samr.getAlignmentStart()) {
                                s = line.split("\\s");
                                chr = s[0];
                                end_pos = Long.parseLong(s[1]);
                            }
                            if (line != null) {
                                if (samr.getReferenceName().equals(chr) && samr.getAlignmentEnd() < end_pos)
                                    outputSam.addAlignment(samr);
                            } else {
                                outputSam.addAlignment(samr);
                                while (iter.hasNext()) outputSam.addAlignment(iter.next());
                                break;
                            }
                        }
                    }
                    filter_br.close();
                    iter.close();
                    inputSam.close();
                    outputSam.close();
                } catch (Exception e) {
                    Thread t = Thread.currentThread();
                    t.getUncaughtExceptionHandler().uncaughtException(t, e);
                    e.printStackTrace();
                    executor.shutdown();
                    System.exit(1);
                }
            }

            public Runnable init(String bam_file) {
                // TODO Auto-generated method stub
                this.bam_file = bam_file;
                return this;
            }
        }.init(listOfFiles[i].getName()));
        throw new RuntimeException("incorrect tools!!!!");
    }
    this.waitFor();
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) FilenameFilter(java.io.FilenameFilter) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) File(java.io.File)

Example 7 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class Consensus method parseDataLibrary.

private void parseDataLibrary(String[] args) {
    // TODO Auto-generated method stub
    final Map<Integer, String> bamLib = new HashMap<Integer, String>();
    final Map<Integer, Integer> insLib = new HashMap<Integer, Integer>();
    final Map<Integer, Integer> wLib = new HashMap<Integer, Integer>();
    for (int i = 0; i < args.length; i++) {
        String arg = args[i];
        if (arg.matches(bam_lib)) {
            Matcher m = bam_pat.matcher(arg);
            m.find();
            int lib = Integer.parseInt(m.group(1));
            bamLib.put(lib, args[++i]);
        }
        if (arg.matches(ins_lib)) {
            Matcher m = ins_pat.matcher(arg);
            m.find();
            int lib = Integer.parseInt(m.group(1));
            insLib.put(lib, Integer.parseInt(args[++i]));
        }
        if (arg.matches(w_lib)) {
            Matcher m = w_pat.matcher(arg);
            m.find();
            int lib = Integer.parseInt(m.group(1));
            wLib.put(lib, Integer.parseInt(args[++i]));
        }
    }
    int n = bamLib.size();
    if (wLib.size() != n) {
        printUsage();
        throw new IllegalArgumentException("Library parameters do not match!!!");
    }
    this.bam_list = new String[n];
    this.ins_thres = new int[n];
    this.link_w = new double[n];
    this.read_paired = new boolean[n];
    int i = 0;
    for (Integer key : bamLib.keySet()) {
        bam_list[i] = bamLib.get(key);
        final SamReader in1 = factory.open(new File(bam_list[i]));
        SAMRecordIterator iter1 = in1.iterator();
        SAMRecord record;
        while (iter1.hasNext()) {
            record = iter1.next();
            if (!record.getReadUnmappedFlag()) {
                if (record.getReadPairedFlag())
                    read_paired[i] = true;
                break;
            }
        }
        try {
            iter1.close();
            in1.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        if (!insLib.containsKey(key) && read_paired[i] || !wLib.containsKey(key)) {
            printUsage();
            throw new IllegalArgumentException("Library parameters do not match!!!");
        }
        ins_thres[i] = read_paired[i] ? insLib.get(key) : Integer.MAX_VALUE;
        link_w[i] = 1.0 / wLib.get(key);
        i++;
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) Matcher(java.util.regex.Matcher) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Example 8 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class SamToTaxa method distributeSortedBam.

private void distributeSortedBam() {
    // TODO Auto-generated method stub
    long start = System.currentTimeMillis();
    try {
        File indexFile = new File(myIndexFileName);
        myLogger.info("Using the following index file:");
        myLogger.info(indexFile.getAbsolutePath());
        File samFile = new File(mySamFileName);
        myLogger.info("Using the following BAM file:");
        myLogger.info(samFile.getAbsolutePath());
        final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
        final SamReader inputSam = factory.open(samFile);
        header_sam = inputSam.getFileHeader();
        header_sam.setSortOrder(SortOrder.unsorted);
        indexReader = Utils.getBufferedReader(indexFile, 65536);
        String line = indexReader.readLine();
        String[] samples = line.split("\\s+");
        for (int i = 1; i < samples.length; i++) {
            String taxa = samples[i];
            SAMFileHeader header = header_sam.clone();
            SAMReadGroupRecord rg = new SAMReadGroupRecord(taxa);
            rg.setSample(taxa);
            header.addReadGroup(rg);
            bam_writers.put(taxa, new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, new File(myOutputDir + System.getProperty("file.separator") + taxa + ".bam")));
            locks.put(taxa, new Object());
        }
        SAMRecordIterator iter = inputSam.iterator();
        cache();
        this.initial_thread_pool();
        final int block = 10000;
        int bS = 0;
        SAMRecord[] Qs = new SAMRecord[block];
        SAMRecord temp = iter.next();
        int k = 0;
        allReads = 0;
        long tag;
        while (temp != null) {
            allReads++;
            Qs[k] = temp;
            temp = iter.hasNext() ? iter.next() : null;
            k++;
            tag = temp == null ? 0 : Long.parseLong(temp.getReadName());
            if (k == block || temp == null || tag >= cursor) {
                executor.submit(new Runnable() {

                    private SAMRecord[] sam;

                    private int block_i;

                    @Override
                    public void run() {
                        // TODO Auto-generated method stub
                        long tag;
                        Tag tagObj;
                        String taxa;
                        for (int i = 0; i < sam.length; i++) {
                            try {
                                if (sam[i] == null)
                                    break;
                                tag = Long.parseLong(sam[i].getReadName());
                                tagObj = indexMap.get(tag);
                                int l = tagObj.taxa.length;
                                long cov = 0;
                                for (int t = 0; t < l; t++) cov += tagObj.count[t];
                                if (cov > maxCov) {
                                    myLogger.info("?tag id, " + tag + "::tag sequence, " + sam[i].getReadString() + "::aligned to " + sam[i].getReferenceName() + ":" + sam[i].getAlignmentStart() + "-" + sam[i].getAlignmentEnd() + "::omitted due to abnormal high coverage, " + cov);
                                    continue;
                                }
                                for (int t = 0; t < l; t++) {
                                    taxa = tagObj.taxa[t];
                                    sam[i].setAttribute("RG", taxa);
                                    int p = tagObj.count[t];
                                    synchronized (locks.get(taxa)) {
                                        for (int r = 0; r < p; r++) bam_writers.get(taxa).addAlignment(sam[i]);
                                    }
                                }
                            } catch (Exception e) {
                                Thread t = Thread.currentThread();
                                t.getUncaughtExceptionHandler().uncaughtException(t, e);
                                e.printStackTrace();
                                executor.shutdown();
                                System.exit(1);
                            }
                        }
                        if (block * (block_i + 1) % 1000000 == 0)
                            myLogger.info("Tag processed: " + block * (block_i + 1));
                    }

                    public Runnable init(SAMRecord[] sam, int bS) {
                        this.sam = sam;
                        this.block_i = bS;
                        return (this);
                    }
                }.init(Qs, bS));
                k = 0;
                Qs = new SAMRecord[block];
                bS++;
                if (temp == null || tag >= cursor) {
                    this.waitFor();
                    if (temp != null) {
                        cache();
                        this.initial_thread_pool();
                    }
                }
            }
        }
        iter.close();
        inputSam.close();
        indexReader.close();
        // executor.awaitTermination(365, TimeUnit.DAYS);
        for (String key : bam_writers.keySet()) bam_writers.get(key).close();
        myLogger.info("Total number of reads in lane=" + allReads);
        myLogger.info("Process took " + (System.currentTimeMillis() - start) / 1000 + " seconds.");
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 9 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class GenomeComparison method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    final SamReader inputSam1 = factory.open(new File(in_bam1));
    SAMRecordIterator iter1 = inputSam1.iterator();
    final SamReader inputSam2 = factory.open(new File(in_bam2));
    SAMRecordIterator iter2 = inputSam2.iterator();
    this.initial_thread_pool();
    try {
        bw_con = new BufferedWriter(new FileWriter(out_f + ".txt"));
        bw_1 = new BufferedWriter(new FileWriter(out_f + ".1txt"));
        bw_2 = new BufferedWriter(new FileWriter(out_f + ".2txt"));
        SAMRecord record1 = iter1.next(), record2 = iter2.next();
        List<SAMRecord> barcoded_records1 = new ArrayList<SAMRecord>(), barcoded_records2 = new ArrayList<SAMRecord>();
        barcoded_records1.add(record1);
        barcoded_records2.add(record2);
        String barcode1 = record1.getStringAttribute("BX"), barcode2 = record2.getStringAttribute("BX");
        jobs: while (true) {
            if (barcode1.compareTo(barcode2) < 0) {
                // output barcode 1 up to barcode 1
                while (iter1.hasNext()) {
                    while ((record1 = iter1.next()).getStringAttribute("BX").equals(barcode1)) {
                        barcoded_records1.add(record1);
                        if (!iter1.hasNext())
                            break;
                    }
                    // TODO barcoded records 1 output
                    Molecule[] mols = extractMoleculeFromList(barcoded_records1);
                    for (int i = 0; i != mols.length; i++) bw_1.write(mols[i].chr_id + ":" + mols[i].chr_start + "-" + mols[i].chr_end + "\n");
                    barcoded_records1.clear();
                    if (iter1.hasNext()) {
                        barcoded_records1.add(record1);
                        barcode1 = record1.getStringAttribute("BX");
                    } else
                        break jobs;
                    if (barcode1.compareTo(barcode2) >= 0)
                        break;
                }
                if (!barcode1.equals(barcode2))
                    continue;
            } else if (barcode1.compareTo(barcode2) > 0) {
                // output barcode 2 up to barcode 2
                while (iter2.hasNext()) {
                    while ((record2 = iter2.next()).getStringAttribute("BX").equals(barcode2)) {
                        barcoded_records2.add(record2);
                        if (!iter2.hasNext())
                            break;
                    }
                    // TODO barcoded records 2 output
                    Molecule[] mols = extractMoleculeFromList(barcoded_records2);
                    for (int i = 0; i != mols.length; i++) bw_2.write(mols[i].chr_id + ":" + mols[i].chr_start + "-" + mols[i].chr_end + "\n");
                    barcoded_records2.clear();
                    if (iter2.hasNext()) {
                        barcoded_records2.add(record2);
                        barcode2 = record2.getStringAttribute("BX");
                    } else
                        break jobs;
                    if (barcode2.compareTo(barcode1) >= 0)
                        break;
                }
                if (!barcode1.equals(barcode2))
                    continue;
            }
            while ((record1 = iter1.next()).getStringAttribute("BX").equals(barcode1)) {
                barcoded_records1.add(record1);
                if (!iter1.hasNext())
                    break;
            }
            while ((record2 = iter2.next()).getStringAttribute("BX").equals(barcode2)) {
                barcoded_records2.add(record2);
                if (!iter2.hasNext())
                    break;
            }
            executor.submit(new Runnable() {

                List<SAMRecord> list1;

                List<SAMRecord> list2;

                @Override
                public void run() {
                    // TODO Auto-generated method stub
                    try {
                        Molecule[] mols1 = extractMoleculeFromList(list1);
                        Molecule[] mols2 = extractMoleculeFromList(list2);
                        int n1 = mols1.length, n2 = mols2.length;
                        double insec;
                        Set<Integer> m1 = new HashSet<Integer>();
                        Set<Integer> m2 = new HashSet<Integer>();
                        for (int i = 0; i != n1; i++) {
                            for (int j = 0; j != n2; j++) {
                                if ((insec = intersect(mols1[i], mols2[j])) >= overlap_frac) {
                                    bw_con.write(mols1[i].chr_id + ":" + mols1[i].chr_start + "-" + mols1[i].chr_end + "\t" + mols2[j].chr_id + ":" + mols2[j].chr_start + "-" + mols2[j].chr_end + "\t" + insec + "\n");
                                    m1.add(i);
                                    m2.add(j);
                                }
                            }
                        }
                        for (int i = 0; i != n1; i++) {
                            if (!m1.contains(i))
                                bw_1.write(mols1[i].chr_id + ":" + mols1[i].chr_start + "-" + mols1[i].chr_end + "\n");
                        }
                        for (int i = 0; i != n2; i++) {
                            if (!m2.contains(i))
                                bw_2.write(mols2[i].chr_id + ":" + mols2[i].chr_start + "-" + mols2[i].chr_end + "\n");
                        }
                    } catch (Exception e) {
                        Thread t = Thread.currentThread();
                        t.getUncaughtExceptionHandler().uncaughtException(t, e);
                        e.printStackTrace();
                        executor.shutdown();
                        System.exit(1);
                    }
                }

                public Runnable init(List<SAMRecord> list1, List<SAMRecord> list2) {
                    // TODO Auto-generated method stub
                    this.list1 = list1;
                    this.list2 = list2;
                    return this;
                }
            }.init(new ArrayList<SAMRecord>(barcoded_records1), new ArrayList<SAMRecord>(barcoded_records2)));
            if (!iter1.hasNext() || !iter2.hasNext())
                break jobs;
            barcoded_records1.clear();
            barcoded_records2.clear();
            barcoded_records1.add(record1);
            barcoded_records2.add(record2);
            barcode1 = record1.getStringAttribute("BX");
            barcode2 = record2.getStringAttribute("BX");
        }
        if (!iter1.hasNext()) {
            // TODO output remaining iter2
            while (iter2.hasNext()) {
                while ((record2 = iter2.next()).getStringAttribute("BX").equals(barcode2)) {
                    barcoded_records2.add(record2);
                    if (!iter2.hasNext())
                        break;
                }
                Molecule[] mols = extractMoleculeFromList(barcoded_records2);
                for (int i = 0; i != mols.length; i++) bw_2.write(mols[i].chr_id + ":" + mols[i].chr_start + "-" + mols[i].chr_end + "\n");
                barcoded_records2.clear();
                if (iter2.hasNext()) {
                    barcoded_records2.add(record2);
                    barcode2 = record2.getStringAttribute("BX");
                }
            }
        }
        if (!iter2.hasNext()) {
            // TODO output remaining iter1
            while (iter1.hasNext()) {
                while ((record1 = iter1.next()).getStringAttribute("BX").equals(barcode1)) {
                    barcoded_records1.add(record1);
                    if (!iter1.hasNext())
                        break;
                }
                Molecule[] mols = extractMoleculeFromList(barcoded_records1);
                for (int i = 0; i != mols.length; i++) bw_1.write(mols[i].chr_id + ":" + mols[i].chr_start + "-" + mols[i].chr_end + "\n");
                barcoded_records1.clear();
                if (iter1.hasNext()) {
                    barcoded_records1.add(record1);
                    barcode1 = record1.getStringAttribute("BX");
                }
            }
        }
        this.waitFor();
        inputSam1.close();
        inputSam2.close();
        bw_con.close();
        bw_1.close();
        bw_2.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) IOException(java.io.IOException) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Example 10 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class TenXMoleculeStats method runSNPCaller.

private void runSNPCaller() {
    // TODO Auto-generated method stub
    final SamReader inputSam = factory.open(new File(this.in_bam));
    if (inputSam.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate)
        throw new RuntimeException("Sort BAM file before SNP calling!!!");
    final SAMSequenceDictionary seqDic = inputSam.getFileHeader().getSequenceDictionary();
    mappedReadsOnChromosome = new long[seqDic.size()];
    oos = Utils.getBufferedWriter(out_stats + ".txt");
    hap = Utils.getBufferedWriter(out_stats + ".haps");
    vcf = Utils.getBufferedWriter(out_stats + ".snps");
    SAMFileHeader header = inputSam.getFileHeader();
    header.setSortOrder(SortOrder.unknown);
    oob = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, new File(out_stats + ".bam"));
    try {
        inputSam.close();
    } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    this.initial_thread_pool();
    for (int i = 0; i < seqDic.size(); i++) {
        executor.submit(new Runnable() {

            private int chr_num;

            @Override
            public void run() {
                // TODO Auto-generated method stub
                try {
                    final SAMSequenceRecord refSeq = seqDic.getSequence(chr_num);
                    final String seqnm = refSeq.getSequenceName();
                    final int seqln = refSeq.getSequenceLength();
                    final Set<Integer> snp_pos = readSNPs(in_vcf, seqnm);
                    final SamReader inputSam = factory.open(new File(in_bam));
                    final SAMRecordIterator iter = inputSam.queryOverlapping(seqnm, 0, seqln);
                    // do NOT call SNPs, read it from file instead
                    final Map<String, List<SAMRecord>> bc_record = new HashMap<String, List<SAMRecord>>();
                    final Map<String, Integer> bc_position = new HashMap<String, Integer>();
                    String bc;
                    SAMRecord tmp_record;
                    int pos;
                    while (iter.hasNext()) {
                        tmp_record = iter.next();
                        if (!processRecord(tmp_record))
                            continue;
                        bc = tmp_record.getStringAttribute("BX");
                        if (bc == null)
                            continue;
                        pos = tmp_record.getAlignmentStart();
                        if (mappedReads % 10000 == 0) {
                            Set<String> bc_removal = new HashSet<String>();
                            for (Map.Entry<String, List<SAMRecord>> entry : bc_record.entrySet()) {
                                String b = entry.getKey();
                                List<SAMRecord> records = entry.getValue();
                                if (bc_position.get(b) + max_gap < pos) {
                                    processMolecule(b, records, snp_pos);
                                    bc_removal.add(b);
                                }
                            }
                            for (String b : bc_removal) {
                                bc_record.remove(b);
                                bc_position.remove(b);
                            }
                        }
                        if (bc_record.containsKey(bc)) {
                            List<SAMRecord> records = bc_record.get(bc);
                            if (bc_position.get(bc) + max_gap < pos) {
                                processMolecule(bc, records, snp_pos);
                                records.clear();
                                bc_position.remove(bc);
                            }
                        } else
                            bc_record.put(bc, new ArrayList<SAMRecord>());
                        bc_record.get(bc).add(tmp_record);
                        bc_position.put(bc, Math.max(tmp_record.getAlignmentEnd(), bc_position.containsKey(bc) ? bc_position.get(bc) : 0));
                    }
                    if (!bc_record.isEmpty())
                        for (Map.Entry<String, List<SAMRecord>> entry : bc_record.entrySet()) processMolecule(entry.getKey(), entry.getValue(), snp_pos);
                    /**
                     *						// do NOT call SNPs, read it from file instead
                     *						int pos;
                     *						SAMRecord buffer = iter.hasNext() ? iter.next() : null;
                     *						int bufferS = buffer==null ? Integer.MAX_VALUE : buffer.getAlignmentStart();
                     *						final Set<SAMRecord> record_pool = new HashSet<SAMRecord>();
                     *						final Set<SAMRecord> tmp_removal = new HashSet<SAMRecord>();
                     *
                     *						final int[] allele_stats = new int[5];
                     *
                     *						final Map<String, TreeSet<SNP>> bc_snp = new HashMap<String, TreeSet<SNP>>();
                     *
                     *						String dna_seq, bc;
                     *						int tmp_int;
                     *						String snp_str;
                     *						char nucl;
                     *						for(int p=0; p!=seqln; p++) {
                     *							if(p%1000000==0) {
                     *								System.err.println(seqnm+":"+p);
                     *								// TODO
                     *								// process bc_snp
                     *								Set<String> bc_removal = new HashSet<String>();
                     *								for(Map.Entry<String, TreeSet<SNP>> entry : bc_snp.entrySet()) {
                     *									if(entry.getValue().last().position+max_gap<p) {
                     *										processMolecule(entry.getKey(), entry.getValue());
                     *										bc_removal.add(entry.getKey());
                     *									}
                     *								}
                     *								for(String b : bc_removal) bc_snp.remove(b);
                     *							}
                     *							pos = p+1; // BAM format is 1-based coordination
                     *
                     *							while(bufferS<=pos) {
                     *								// TODO
                     *								// buffer records upto this position
                     *								while( buffer!=null &&
                     *										buffer.getAlignmentStart()<=pos) {
                     *									if( processRecord(buffer) )
                     *										record_pool.add(buffer);
                     *									buffer=iter.hasNext()?iter.next():null;
                     *									bufferS = buffer==null ? Integer.MAX_VALUE :
                     *										buffer.getAlignmentStart();
                     *								}
                     *							}
                     *							if(record_pool.isEmpty()) continue;
                     *
                     *							Arrays.fill(allele_stats, 0);
                     *							tmp_removal.clear();
                     *							for(SAMRecord record : record_pool) {
                     *
                     *								if(record.getAlignmentEnd()<pos) {
                     *									tmp_removal.add(record);
                     *									continue;
                     *								}
                     *								dna_seq = record.getReadString();
                     *								tmp_int = record.getReadPositionAtReferencePosition(pos);
                     *								nucl = tmp_int==0?'D':Character.toUpperCase(dna_seq.charAt(tmp_int-1));
                     *								switch(nucl) {
                     *								case 'A':
                     *									allele_stats[0]++;
                     *									break;
                     *								case 'C':
                     *									allele_stats[1]++;
                     *									break;
                     *								case 'G':
                     *									allele_stats[2]++;
                     *									break;
                     *								case 'T':
                     *									allele_stats[3]++;
                     *									break;
                     *								case 'D':
                     *									allele_stats[4]++;
                     *									break;
                     *								case 'N':
                     *									// do nothing here
                     *									break;
                     *								default:
                     *									throw new RuntimeException("!!!");
                     *								}
                     *							}
                     *							record_pool.removeAll(tmp_removal);
                     *
                     *							snp_str = feasibleSNP(allele_stats);
                     *
                     *							// not two alleles or minor allele frequency is smaller than threshold
                     *							// don't call indels
                     *							if(snp_str==null) continue;
                     *
                     *							vcf.write(seqnm+"\t"+p+"\t"+snp_str+"\n");
                     *
                     *							for(SAMRecord record : record_pool) {
                     *								bc = record.getStringAttribute("BX");
                     *								if(bc==null) continue;
                     *								dna_seq = record.getReadString();
                     *								tmp_int = record.getReadPositionAtReferencePosition(pos);
                     *								if(tmp_int==0) continue;
                     *								nucl = Character.toUpperCase(dna_seq.charAt(tmp_int-1));
                     *								if(bc_snp.containsKey(bc)) {
                     *									TreeSet<SNP> bc_set = bc_snp.get(bc);
                     *									if(bc_set.last().position+max_gap<p) {
                     *										// TODO
                     *										// gap exceeds max_gap
                     *										// write molecule out
                     *										processMolecule(bc, bc_set);
                     *										bc_set.clear();
                     *									}
                     *								} else {
                     *									bc_snp.put(bc, new TreeSet<SNP>(new SNP.PositionComparator()));
                     *								}
                     *								bc_snp.get(bc).add(new SNP(p, nucl, record));
                     *
                     *							}
                     *						}
                     *
                     *						for(Map.Entry<String, TreeSet<SNP>> entry : bc_snp.entrySet())
                     *							processMolecule(entry.getKey(), entry.getValue());
                     */
                    iter.close();
                    inputSam.close();
                } catch (Exception e) {
                    Thread t = Thread.currentThread();
                    t.getUncaughtExceptionHandler().uncaughtException(t, e);
                    e.printStackTrace();
                    executor.shutdown();
                    System.exit(1);
                }
            }

            private void processMolecule(final String bc, final List<SAMRecord> records, final Set<Integer> snp_pos) {
                // TODO Auto-generated method stub
                MoleculeConstructor molCon = new MoleculeConstructor(records);
                if (molCon.getMolecularLength() < oo_thresh)
                    return;
                molCon.run();
            }

            /**
             *				private void processMolecule(final String bc, final TreeSet<SNP> snp_set)
             *						throws IOException {
             *					// TODO Auto-generated method stub
             *					// write molecule haplotypes
             *					Set<SAMRecord> records = new HashSet<SAMRecord>();
             *					List<String> snp_str = new ArrayList<String>();
             *					SNP snp;
             *					int tmp_position = -1;
             *					while(!snp_set.isEmpty()) {
             *						snp = snp_set.pollFirst();
             *						records.add(snp.alignment);
             *						if(tmp_position==snp.position) {
             *							snp_str.remove(snp_str.size()-1);
             *						} else {
             *							snp_str.add(snp.position+","+
             *									snp.allele);
             *							tmp_position = snp.position;
             *						}
             *					}
             *					// write molecule statistics
             *					MoleculeConstructor molCon  =
             *							new MoleculeConstructor(new ArrayList<SAMRecord>(records));
             *					if(molCon.getMolecularLength()<oo_thresh) return;
             *					molCon.run();
             *
             *					if(snp_str.size()<2) return;
             *					StringBuilder os = new StringBuilder();
             *					os.append(bc);
             *					for(String ss : snp_str) {
             *						os.append(" ");
             *						os.append( ss);
             *					}
             *					os.append("\n");
             *					hap.write(os.toString());
             *				}
             */
            public Runnable init(final int i) {
                // TODO Auto-generated method stub
                this.chr_num = i;
                return this;
            }

            private String feasibleSNP(int[] ints) {
                // TODO Auto-generated method stub
                if (ints[4] > 0)
                    return null;
                List<Integer> obs = new ArrayList<Integer>();
                for (int i = 0; i < 4; i++) if (ints[i] > 0)
                    obs.add(i);
                if (obs.size() != 2)
                    return null;
                int ref = obs.get(0), alt = obs.get(1);
                int d = ints[ref] + ints[alt];
                if (d < min_depth)
                    return null;
                double maf = (double) ints[ref] / d;
                if (maf > 0.5)
                    maf = 1 - maf;
                if (maf < min_maf)
                    return null;
                return Sequence.nucleotide[ref] + "," + Sequence.nucleotide[alt] + "\t" + ints[ref] + "," + ints[alt];
            }
        }.init(i));
    }
    try {
        inputSam.close();
        this.waitFor();
        oos.close();
        hap.close();
        vcf.close();
        oob.close();
        BufferedWriter bw_summary = Utils.getBufferedWriter(this.out_stats + ".summary");
        bw_summary.write("##Reads     mapped: " + mappedReads + "\n");
        bw_summary.write("##Reads   unmapped: " + unmappedReads + "\n");
        bw_summary.write("##Reads duplicated: " + duplicatedReads + "\n");
        bw_summary.write("##Reads  bad pairs: " + badpaired + "\n");
        bw_summary.write("##Reads by pseudo-chromosomes:\n");
        for (int i = 0; i < mappedReadsOnChromosome.length; i++) {
            bw_summary.write(" #" + seqDic.getSequence(i).getSequenceName() + " " + mappedReadsOnChromosome[i] + "\n");
        }
        bw_summary.write("##Reads by barcodes:\n");
        for (Map.Entry<String, Integer> entry : readsOnBarcode.entrySet()) {
            bw_summary.write(" #" + entry.getKey() + " " + entry.getValue() + "\n");
        }
        bw_summary.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) Set(java.util.Set) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BufferedWriter(java.io.BufferedWriter) SamReader(htsjdk.samtools.SamReader) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) IOException(java.io.IOException) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)107 SAMRecord (htsjdk.samtools.SAMRecord)92 SamReader (htsjdk.samtools.SamReader)83 SAMFileHeader (htsjdk.samtools.SAMFileHeader)49 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)47 File (java.io.File)47 SAMFileWriter (htsjdk.samtools.SAMFileWriter)45 IOException (java.io.IOException)41 ArrayList (java.util.ArrayList)34 CigarElement (htsjdk.samtools.CigarElement)30 Cigar (htsjdk.samtools.Cigar)26 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)24 SamReaderFactory (htsjdk.samtools.SamReaderFactory)21 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)18 CigarOperator (htsjdk.samtools.CigarOperator)16 Interval (htsjdk.samtools.util.Interval)16 PrintWriter (java.io.PrintWriter)15 HashMap (java.util.HashMap)15 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)14 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)14