Search in sources :

Example 61 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class Consensus method buildReadPairGraph.

public void buildReadPairGraph(String bam_in, String sz_in, String out, // int max_gap,
int mapQ_thres, int max_NM) {
    try {
        /**
         *			BufferedReader br_tmp = new BufferedReader(new FileReader(new File(contig_file)));
         *			int contig_num = 0;
         *			String line;
         *			while( (line=br_tmp.readLine())!=null )
         *				if(line.startsWith(">")) contig_num++;
         *			br_tmp.close();
         *
         *			BufferedReader br_contig_file = new BufferedReader(new FileReader(new File(contig_file)));
         *			String[] contig_id = new String[contig_num];
         *			String[] contig_str = new String[contig_num];
         *			int[] contig_sz = new int[contig_num];
         *			int c = 0;
         *			line = br_contig_file.readLine();
         *			while( line!=null ) {
         *				if(line.startsWith(">")) {
         *					contig_id[c] = line.replace(">", "").trim();
         *					StringBuilder bs = new StringBuilder();
         *					while( (line=br_contig_file.readLine())!=null
         *							&& !line.startsWith(">")) {
         *						bs.append(line.trim());
         *					}
         *					contig_str[c] = bs.toString();
         *					contig_sz[c] = bs.length();
         *					c++;
         *				}
         *			}
         *			br_contig_file.close();
         */
        String[] bam_files = bam_in.trim().split(";");
        String[] s = sz_in.trim().split(";");
        final Map<String, Integer> szs = new HashMap<String, Integer>();
        for (int i = 0; i != bam_files.length; i++) szs.put(bam_files[i], Integer.parseInt(s[i]));
        final Map<Long, Integer> links = new HashMap<Long, Integer>();
        this.initial_thread_pool();
        for (String bam_file : bam_files) {
            this.executor.submit(new Runnable() {

                private String bam_file;

                @Override
                public void run() {
                    System.err.println("Process file ... " + bam_file);
                    SAMRecord tmp_record, sam_record, mate_record;
                    String sam_id;
                    final List<SAMRecord> record_pool = new ArrayList<SAMRecord>();
                    int sam_ref, mate_ref;
                    long key_ref;
                    final SamReader in1 = factory.open(new File(bam_file));
                    final List<SAMSequenceRecord> refSeq = in1.getFileHeader().getSequenceDictionary().getSequences();
                    final int[] refSz = new int[refSeq.size()];
                    for (int i = 0; i != refSz.length; i++) refSz[i] = refSeq.get(i).getSequenceLength();
                    SAMRecordIterator iter1 = in1.iterator();
                    tmp_record = iter1.next();
                    while (tmp_record != null) {
                        record_pool.clear();
                        record_pool.add(tmp_record);
                        sam_id = tmp_record.getReadName();
                        while ((tmp_record = iter1.hasNext() ? iter1.next() : null) != null && tmp_record.getReadName().equals(sam_id)) record_pool.add(tmp_record);
                        sam_record = null;
                        mate_record = null;
                        for (SAMRecord record : record_pool) {
                            if (record.getFirstOfPairFlag() && !record.getNotPrimaryAlignmentFlag())
                                sam_record = record;
                            if (record.getSecondOfPairFlag() && !record.getNotPrimaryAlignmentFlag())
                                mate_record = record;
                        }
                        if (sam_record == null || mate_record == null)
                            throw new RuntimeException(record_pool.get(0).getSAMString() + "!!!");
                        sam_ref = sam_record.getReferenceIndex();
                        mate_ref = mate_record.getReferenceIndex();
                        if (sam_record.getReadUnmappedFlag() || mate_record.getReadUnmappedFlag() || sam_ref == mate_ref || sam_record.getMappingQuality() < mapQ_thres || mate_record.getMappingQuality() < mapQ_thres || sam_record.getIntegerAttribute("NM") > max_NM || mate_record.getIntegerAttribute("NM") > max_NM)
                            continue;
                        if (sam_ref > mate_ref) {
                            int tmp_int = sam_ref;
                            sam_ref = mate_ref;
                            mate_ref = tmp_int;
                            SAMRecord tmp_rec = sam_record;
                            sam_record = mate_record;
                            mate_record = tmp_rec;
                        }
                        int infSz;
                        double maxSz = insz_thres * szs.get(bam_file);
                        key_ref = 0L;
                        /**
                         *FF*
                         * ---=>---		---=>--
                         */
                        if (!sam_record.getReadNegativeStrandFlag() && !mate_record.getReadNegativeStrandFlag()) {
                            infSz = refSz[sam_ref] - sam_record.getAlignmentStart() + refSz[mate_ref] - mate_record.getAlignmentStart();
                            if (infSz <= maxSz) {
                                // 1 - end
                                key_ref = 1;
                                key_ref <<= 31;
                                key_ref += sam_ref;
                                key_ref <<= 1;
                                // 1 - end
                                key_ref += 1;
                                key_ref <<= 31;
                                key_ref += mate_ref;
                            }
                        } else /**
                         *FR*
                         * ---=>---		---<=--
                         */
                        if (!sam_record.getReadNegativeStrandFlag() && mate_record.getReadNegativeStrandFlag()) {
                            infSz = refSz[sam_ref] - sam_record.getAlignmentStart() + mate_record.getAlignmentEnd();
                            if (infSz <= maxSz) {
                                // 1 - end
                                key_ref = 1;
                                key_ref <<= 31;
                                key_ref += sam_ref;
                                key_ref <<= 1;
                                // 0 - start
                                key_ref += 0;
                                key_ref <<= 31;
                                key_ref += mate_ref;
                            }
                        } else /**
                         *RF*
                         * ---<=---		---=>--
                         */
                        if (sam_record.getReadNegativeStrandFlag() && !mate_record.getReadNegativeStrandFlag()) {
                            infSz = sam_record.getAlignmentEnd() + refSz[mate_ref] - mate_record.getAlignmentStart();
                            if (infSz <= maxSz) {
                                // 0 - start
                                key_ref = 0;
                                key_ref <<= 31;
                                key_ref += sam_ref;
                                key_ref <<= 1;
                                // 1 - end
                                key_ref += 1;
                                key_ref <<= 31;
                                key_ref += mate_ref;
                            }
                        } else /**
                         *RR*
                         * ---<=---		---<=--
                         */
                        if (sam_record.getReadNegativeStrandFlag() && mate_record.getReadNegativeStrandFlag()) {
                            infSz = sam_record.getAlignmentEnd() + mate_record.getAlignmentEnd();
                            if (infSz <= maxSz) {
                                // 0 - start
                                key_ref = 0;
                                key_ref <<= 31;
                                key_ref += sam_ref;
                                key_ref <<= 1;
                                // 0 - start
                                key_ref += 0;
                                key_ref <<= 31;
                                key_ref += mate_ref;
                            }
                        }
                        if (key_ref == 0L)
                            continue;
                        synchronized (lock) {
                            if (links.containsKey(key_ref))
                                links.put(key_ref, links.get(key_ref) + 1);
                            else
                                links.put(key_ref, 1);
                        }
                    }
                    iter1.close();
                    try {
                        in1.close();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                    System.err.println("Process file ... " + bam_file + " done.");
                }

                public Runnable init(final String bam_file) {
                    this.bam_file = bam_file;
                    return (this);
                }
            }.init(bam_file));
        }
        this.waitFor();
        int mate_ref, sam_ref, sam_dir, mate_dir;
        long key_ref;
        BufferedWriter bw = new BufferedWriter(new FileWriter(new File(out)));
        for (Map.Entry<Long, Integer> entry : links.entrySet()) {
            key_ref = entry.getKey();
            mate_ref = (int) (key_ref & refMask);
            key_ref >>= 31;
            mate_dir = (int) (key_ref & dirMask);
            key_ref >>= 1;
            sam_ref = (int) (key_ref & refMask);
            key_ref >>= 31;
            sam_dir = (int) (key_ref & dirMask);
            bw.write(sam_ref + "\t" + mate_ref + "\t" + sam_dir + "\t" + mate_dir + "\t" + entry.getValue() + "\n");
        }
        bw.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BufferedWriter(java.io.BufferedWriter) SamReader(htsjdk.samtools.SamReader) IOException(java.io.IOException) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 62 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class BAMstats method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    try {
        final SamReader in1 = factory.open(new File(bam_in1));
        final SamReader in2 = factory.open(new File(bam_in2));
        SAMRecordIterator iter1 = in1.iterator();
        SAMRecordIterator iter2 = in2.iterator();
        SAMRecord tmp_record1 = iter1.next(), tmp_record2 = iter2.next();
        final SAMRecord[] sam_record1 = new SAMRecord[2], sam_record2 = new SAMRecord[2];
        final float[] sam_as1 = new float[2], sam_as2 = new float[2];
        final boolean[] sam_mapped1 = new boolean[2], sam_mapped2 = new boolean[2];
        String record_id1 = tmp_record1.getReadName(), record_id2 = tmp_record2.getReadName();
        int i_sz;
        float i_as;
        boolean b1, b2;
        while (true) {
            if (record_counter % 4000000 == 0)
                myLogger.info(record_counter + " processed.");
            while (record_id1 != null && record_id2 != null && !record_id1.equals(record_id2)) {
                while (record_id1 != null && record_id1.compareTo(record_id2) < 0) {
                    tmp_record1 = bufferRecord(iter1, tmp_record1, sam_record1, sam_as1, sam_mapped1, record_id1);
                    processBuffer(sam_record1, sam_as1, sam_mapped1, 0);
                    record_id1 = tmp_record1 == null ? null : tmp_record1.getReadName();
                }
                if (record_id1 == null)
                    break;
                while (record_id2 != null && record_id2.compareTo(record_id1) < 0) {
                    tmp_record2 = bufferRecord(iter2, tmp_record2, sam_record2, sam_as2, sam_mapped2, record_id2);
                    processBuffer(sam_record2, sam_as2, sam_mapped2, 1);
                    record_id2 = tmp_record2 == null ? null : tmp_record2.getReadName();
                }
            }
            if (record_id1 == null || record_id2 == null)
                break;
            tmp_record1 = bufferRecord(iter1, tmp_record1, sam_record1, sam_as1, sam_mapped1, record_id1);
            tmp_record2 = bufferRecord(iter2, tmp_record2, sam_record2, sam_as2, sam_mapped2, record_id2);
            record_id1 = tmp_record1 == null ? null : tmp_record1.getReadName();
            record_id2 = tmp_record2 == null ? null : tmp_record2.getReadName();
            if ((sam_record2[0] == null || sam_record2[1] == null) && (sam_record1[0] == null || sam_record1[1] == null))
                continue;
            if ((sam_record1[0] == null || sam_record1[1] == null) && sam_record2[0] != null && sam_record2[1] != null) {
                processBuffer(sam_record2, sam_as2, sam_mapped2, 1);
                continue;
            }
            if ((sam_record2[0] == null || sam_record2[1] == null) && sam_record1[0] != null && sam_record1[1] != null) {
                processBuffer(sam_record1, sam_as1, sam_mapped1, 0);
                continue;
            }
            if (sam_record1[0].getDuplicateReadFlag() || sam_record1[1].getDuplicateReadFlag() || sam_record2[0].getDuplicateReadFlag() || sam_record2[1].getDuplicateReadFlag() || sam_record1[0].getReadString().replaceAll("N+$", "").replaceAll("^N+", "").length() < min_seqLen || sam_record1[1].getReadString().replaceAll("N+$", "").replaceAll("^N+", "").length() < min_seqLen || sam_record2[0].getReadString().replaceAll("N+$", "").replaceAll("^N+", "").length() < min_seqLen || sam_record2[1].getReadString().replaceAll("N+$", "").replaceAll("^N+", "").length() < min_seqLen)
                // we need both reads longer than 36
                continue;
            record_counter += 2;
            if (sam_mapped1[0])
                record_mapped_asInf[0]++;
            if (sam_mapped1[1])
                record_mapped_asInf[0]++;
            if (sam_mapped2[0])
                record_mapped_asInf[1]++;
            if (sam_mapped2[1])
                record_mapped_asInf[1]++;
            if (sam_mapped1[0] && sam_mapped2[0])
                record_mapped_asInf[2]++;
            if (sam_mapped1[1] && sam_mapped2[1])
                record_mapped_asInf[2]++;
            if (b1 = (sam_mapped1[0] && sam_mapped1[1] && !sam_record1[0].getReferenceName().equals("Chr00") && !sam_record1[1].getReferenceName().equals("Chr00"))) {
                i_sz = Math.abs(sam_record1[0].getInferredInsertSize());
                if (!insert_size_c1.containsKey(i_sz))
                    insert_size_c1.put(i_sz, 1);
                else
                    insert_size_c1.put(i_sz, insert_size_c1.get(i_sz) + 1);
            }
            if (b2 = (sam_mapped2[0] && sam_mapped2[1] && !sam_record2[0].getReferenceName().equals("Chr00") && !sam_record2[1].getReferenceName().equals("Chr00"))) {
                i_sz = Math.abs(sam_record2[0].getInferredInsertSize());
                if (!insert_size_c2.containsKey(i_sz))
                    insert_size_c2.put(i_sz, 1);
                else
                    insert_size_c2.put(i_sz, insert_size_c2.get(i_sz) + 1);
            }
            if (!(b1 & b2))
                continue;
            i_sz = Math.abs(sam_record1[0].getInferredInsertSize()) - Math.abs(sam_record2[0].getInferredInsertSize());
            if (!insert_size_cdiff.containsKey(i_sz))
                insert_size_cdiff.put(i_sz, 1);
            else
                insert_size_cdiff.put(i_sz, insert_size_cdiff.get(i_sz) + 1);
            // alignment score for pairs always the same
            i_as = sam_as1[0] - sam_as2[0];
            if (!alignment_score_cdiff.containsKey(i_as))
                alignment_score_cdiff.put(i_as, 1);
            else
                alignment_score_cdiff.put(i_as, alignment_score_cdiff.get(i_as) + 1);
            int i = i_as > 0 ? 0 : (i_as < 0 ? 1 : 2);
            if (sam_mapped1[0] && !sam_mapped2[0])
                record_mapped_as0[0]++;
            if (!sam_mapped1[0] && sam_mapped2[0])
                record_mapped_as0[1]++;
            if (sam_mapped1[0] && sam_mapped2[0])
                record_mapped_as0[i]++;
            if (sam_mapped1[1] && !sam_mapped2[1])
                record_mapped_as0[0]++;
            if (!sam_mapped1[1] && sam_mapped2[1])
                record_mapped_as0[1]++;
            if (sam_mapped1[1] && sam_mapped2[1])
                record_mapped_as0[i]++;
        }
        if (record_id1 == null) {
            while (record_id2 != null) {
                tmp_record2 = bufferRecord(iter2, tmp_record2, sam_record2, sam_as2, sam_mapped2, record_id2);
                processBuffer(sam_record2, sam_as2, sam_mapped2, 1);
                record_id2 = tmp_record2 == null ? null : tmp_record2.getReadName();
            }
        }
        if (record_id2 == null) {
            while (record_id1 != null) {
                tmp_record1 = bufferRecord(iter1, tmp_record1, sam_record1, sam_as1, sam_mapped1, record_id1);
                processBuffer(sam_record1, sam_as1, sam_mapped1, 0);
                record_id1 = tmp_record1 == null ? null : tmp_record1.getReadName();
            }
        }
        iter1.close();
        iter2.close();
        in1.close();
        in2.close();
        BufferedWriter bw = new BufferedWriter(new FileWriter(out));
        bw.write("###record_total\n" + record_counter + "\n");
        bw.write("###record_mapped ( 0 )\n" + record_mapped_as0[0] + ", " + record_mapped_as0[1] + ", " + record_mapped_as0[2] + "\n");
        bw.write("###record_mapped (Inf)\n" + record_mapped_asInf[0] + ", " + record_mapped_asInf[1] + ", " + record_mapped_asInf[2] + "\n");
        bw.write("###alignment_score_c1\n");
        for (float i : alignment_score_c1.keySet()) bw.write(i + "\t" + alignment_score_c1.get(i) + "\n");
        bw.write("###alignment_score_c2\n");
        for (float i : alignment_score_c2.keySet()) bw.write(i + "\t" + alignment_score_c2.get(i) + "\n");
        bw.write("###alignment_score_cdiff\n");
        for (float i : alignment_score_cdiff.keySet()) bw.write(i + "\t" + alignment_score_cdiff.get(i) + "\n");
        bw.write("###insert_size_c1\n");
        for (int i : insert_size_c1.keySet()) bw.write(i + "\t" + insert_size_c1.get(i) + "\n");
        bw.write("###insert_size_c2\n");
        for (int i : insert_size_c2.keySet()) bw.write(i + "\t" + insert_size_c2.get(i) + "\n");
        bw.write("###insert_size_cdiff\n");
        for (int i : insert_size_cdiff.keySet()) bw.write(i + "\t" + insert_size_cdiff.get(i) + "\n");
        bw.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) FileWriter(java.io.FileWriter) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Example 63 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class TenXMoleculeStats method runBCSortedStats.

public void runBCSortedStats() {
    // TODO Auto-generated method stub
    final SamReader inputSam = factory.open(new File(this.in_bam));
    final SAMSequenceDictionary seqDic = inputSam.getFileHeader().getSequenceDictionary();
    mappedReadsOnChromosome = new long[seqDic.size()];
    /**
     *		try {
     *			oos = Utils.getGZIPBufferedWriter(out_stats);
     *		} catch (IOException e1) {
     *			// TODO Auto-generated catch block
     *			e1.printStackTrace();
     *		}
     */
    oos = Utils.getBufferedWriter(out_stats + ".txt");
    oob = new SAMFileWriterFactory().makeSAMOrBAMWriter(inputSam.getFileHeader(), true, new File(out_stats + ".bam"));
    SAMRecordIterator iter = inputSam.iterator();
    SAMRecord record = iter.next();
    String bc_str;
    this.initial_thread_pool();
    while (record != null) {
        // if(mappedReads%1000000==0) myLogger.info(mappedReads+" mapped records processed.");
        List<SAMRecord> bc_records = new ArrayList<SAMRecord>();
        bc_str = record.getStringAttribute("BX");
        this.processRecord(bc_records, record);
        while (bc_str == null) {
            record = iter.hasNext() ? iter.next() : null;
            if (record == null)
                break;
            this.processRecord(bc_records, record);
            bc_str = record.getStringAttribute("BX");
        }
        if (bc_str == null)
            continue;
        while (true) {
            record = iter.hasNext() ? iter.next() : null;
            if (record == null)
                break;
            if (bc_str.equals(record.getStringAttribute("BX"))) {
                this.processRecord(bc_records, record);
            } else
                break;
        }
        if (bc_records.isEmpty())
            continue;
        readsOnBarcode.put(bc_records.get(0).getStringAttribute("BX"), bc_records.size());
        executor.submit(new MoleculeConstructor(bc_records));
    }
    try {
        iter.close();
        inputSam.close();
        this.waitFor();
        oos.close();
        oob.close();
        BufferedWriter bw_summary = Utils.getBufferedWriter(this.out_stats + ".summary");
        bw_summary.write("##Reads     mapped: " + mappedReads + "\n");
        bw_summary.write("##Reads   unmapped: " + unmappedReads + "\n");
        bw_summary.write("##Reads duplicated: " + duplicatedReads + "\n");
        bw_summary.write("##Reads  bad pairs: " + badpaired + "\n");
        bw_summary.write("##Reads by pseudo-chromosomes:\n");
        for (int i = 0; i < mappedReadsOnChromosome.length; i++) {
            bw_summary.write(" #" + seqDic.getSequence(i).getSequenceName() + " " + mappedReadsOnChromosome[i] + "\n");
        }
        bw_summary.write("##Reads by barcodes:\n");
        for (Map.Entry<String, Integer> entry : readsOnBarcode.entrySet()) {
            bw_summary.write(" #" + entry.getKey() + " " + entry.getValue() + "\n");
        }
        bw_summary.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    myLogger.info("Completed. " + counter + " bam records processed.");
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BufferedWriter(java.io.BufferedWriter) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map)

Example 64 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class TenXSamtools method runSort.

private void runSort() {
    // TODO Auto-generated method stub
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    final SamReader inputSam = factory.open(new File(this.bam_in));
    final SAMFileHeader sort_header = inputSam.getFileHeader();
    switch(this.sort_order) {
        case coordinate:
            sort_header.setSortOrder(SortOrder.coordinate);
            break;
        case queryname:
            sort_header.setSortOrder(SortOrder.queryname);
            break;
        case barcode:
            sort_header.setSortOrder(SortOrder.unknown);
            break;
    }
    SAMRecordIterator iter = inputSam.iterator();
    long record_inCount = 0;
    SAMRecord[] buff = new SAMRecord[this.batch_size];
    int k = 0;
    SAMRecord temp = iter.hasNext() ? iter.next() : null;
    this.initial_thread_pool();
    while (temp != null) {
        buff[k++] = temp;
        record_inCount++;
        temp = iter.hasNext() ? iter.next() : null;
        if (k == this.batch_size || temp == null) {
            executor.submit(new Runnable() {

                private SAMRecord[] records;

                @Override
                public void run() {
                    // TODO Auto-generated method stub
                    try {
                        Arrays.sort(records, comprator);
                        final SAMFileWriter outputSam;
                        synchronized (lock) {
                            outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(bam_out + String.format("%08d", batch++)));
                        }
                        int count = 0;
                        for (SAMRecord record : records) {
                            if (record != null) {
                                count++;
                                outputSam.addAlignment(record);
                            }
                        }
                        outputSam.close();
                        synchronized (lock) {
                            record_count += count;
                        }
                        myLogger.info("[" + Thread.currentThread().getName() + "] " + record_count + " records processed.");
                    } catch (Exception e) {
                        Thread t = Thread.currentThread();
                        t.getUncaughtExceptionHandler().uncaughtException(t, e);
                        e.printStackTrace();
                        executor.shutdown();
                        System.exit(1);
                    }
                }

                public Runnable init(SAMRecord[] buff) {
                    // TODO Auto-generated method stub
                    this.records = buff;
                    return (this);
                }
            }.init(buff));
            k = 0;
            buff = new SAMRecord[this.batch_size];
        }
    }
    iter.close();
    try {
        inputSam.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    myLogger.info(record_inCount + " records read from " + this.bam_in);
    this.waitFor();
    // merge all batches
    myLogger.info("Merge " + batch + " files.");
    final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(this.bam_out));
    final SamReader[] batchSam = new SamReader[batch];
    final SAMRecordIterator[] iterSam = new SAMRecordIterator[batch];
    final boolean[] reachFileEnd = new boolean[batch];
    final TreeMap<SAMRecord, Integer> treeMap = new TreeMap<SAMRecord, Integer>(this.comprator);
    for (int i = 0; i != batch; i++) {
        batchSam[i] = factory.open(new File(this.bam_out + String.format("%08d", i)));
        iterSam[i] = batchSam[i].iterator();
        if (iterSam[i].hasNext())
            treeMap.put(iterSam[i].next(), i);
        reachFileEnd[i] = !iterSam[i].hasNext();
    }
    Entry<SAMRecord, Integer> firstEntry;
    int bch, nReachFileEnd = 0;
    for (boolean b : reachFileEnd) if (b)
        nReachFileEnd++;
    long record_outCount = 0;
    while (!treeMap.isEmpty()) {
        firstEntry = treeMap.pollFirstEntry();
        outputSam.addAlignment(firstEntry.getKey());
        record_outCount++;
        bch = firstEntry.getValue();
        if (!reachFileEnd[bch]) {
            treeMap.put(iterSam[bch].next(), bch);
            if (!iterSam[bch].hasNext()) {
                reachFileEnd[bch] = true;
                nReachFileEnd++;
            }
        }
        if (treeMap.isEmpty() && nReachFileEnd != batch) {
            for (int i = 0; i != batch; i++) {
                if (!reachFileEnd[i]) {
                    treeMap.put(iterSam[i].next(), i);
                    if (!iterSam[i].hasNext()) {
                        reachFileEnd[i] = true;
                        nReachFileEnd++;
                    }
                }
            }
        }
    }
    try {
        outputSam.close();
        for (int i = 0; i != batch; i++) {
            iterSam[i].close();
            batchSam[i].close();
            new File(this.bam_out + String.format("%08d", i)).delete();
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    myLogger.info(record_outCount + " records written to " + this.bam_out);
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) TreeMap(java.util.TreeMap) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 65 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class AssemblyGraph method validateAssemblyGraph.

private static void validateAssemblyGraph(String query_file, String bam_in) {
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    qry_seqs = Sequence.parseFastaFileWithRevCmpAsMap(query_file);
    final SamReader in1 = factory.open(new File(bam_in));
    SAMRecordIterator iter1 = in1.iterator();
    final List<SAMRecord> buffered_records = new ArrayList<SAMRecord>();
    SAMRecord tmp_record1 = iter1.hasNext() ? iter1.next() : null;
    String record_id = tmp_record1.getReadName();
    SAMRecord r1, r2;
    int i1, i2;
    StringBuilder oos = new StringBuilder();
    String source, sink;
    final BFSShortestPath<Integer, DefaultWeightedEdge> bfs = new BFSShortestPath<Integer, DefaultWeightedEdge>(assembly_graph);
    while (tmp_record1 != null) {
        buffered_records.clear();
        buffered_records.add(tmp_record1);
        while ((tmp_record1 = iter1.hasNext() ? iter1.next() : null) != null && tmp_record1.getReadName().equals(record_id)) {
            buffered_records.add(tmp_record1);
        }
        if (buffered_records.size() == 1) {
            if (tmp_record1 != null)
                record_id = tmp_record1.getReadName();
            continue;
        }
        // sort records by head clipping size
        buffered_records.sort(new Comparator<SAMRecord>() {

            @Override
            public int compare(SAMRecord r0, SAMRecord r1) {
                // TODO Auto-generated method stub
                return Integer.compare(head_clip(r0), head_clip(r1));
            }

            private int head_clip(SAMRecord r) {
                CigarElement cigar = r.getReadNegativeStrandFlag() ? r.getCigar().getLastCigarElement() : r.getCigar().getFirstCigarElement();
                return cigar.getOperator().isClipping() ? cigar.getLength() : 0;
            }
        });
        // calculate distance for adjacent alignment record pairs
        r1 = buffered_records.get(0);
        // for writing
        oos.setLength(0);
        oos.append(record_id);
        oos.append("[");
        oos.append(buffered_records.size());
        oos.append(", ");
        oos.append(readLength(r1));
        // find reference index
        source = r1.getReferenceName() + (r1.getReadNegativeStrandFlag() ? "'" : "");
        i1 = contig_coordinate.getKey(source);
        oos.append(", ");
        oos.append(source);
        oos.append("] ");
        for (int i = 1; i < buffered_records.size(); i++) {
            r2 = buffered_records.get(i);
            sink = r2.getReferenceName() + (r2.getReadNegativeStrandFlag() ? "'" : "");
            i2 = contig_coordinate.getKey(sink);
            oos.append(", (");
            GraphPath<Integer, DefaultWeightedEdge> e = bfs.getPath(i1, i2);
            oos.append(sink);
            oos.append(",");
            oos.append(e == null ? "Inf" : e.getLength());
            oos.append(",");
            oos.append(pathLength(e));
            oos.append(")");
            i1 = i2;
        }
        oos.append("\n");
        System.out.println(oos.toString());
        if (tmp_record1 != null)
            record_id = tmp_record1.getReadName();
    }
    try {
        iter1.close();
        in1.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) IOException(java.io.IOException) CigarElement(htsjdk.samtools.CigarElement) DefaultWeightedEdge(org.jgrapht.graph.DefaultWeightedEdge) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) BFSShortestPath(cz1.ngs.model.BFSShortestPath) File(java.io.File)

Aggregations

SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)107 SAMRecord (htsjdk.samtools.SAMRecord)92 SamReader (htsjdk.samtools.SamReader)83 SAMFileHeader (htsjdk.samtools.SAMFileHeader)49 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)47 File (java.io.File)47 SAMFileWriter (htsjdk.samtools.SAMFileWriter)45 IOException (java.io.IOException)41 ArrayList (java.util.ArrayList)34 CigarElement (htsjdk.samtools.CigarElement)30 Cigar (htsjdk.samtools.Cigar)26 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)24 SamReaderFactory (htsjdk.samtools.SamReaderFactory)21 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)18 CigarOperator (htsjdk.samtools.CigarOperator)16 Interval (htsjdk.samtools.util.Interval)16 PrintWriter (java.io.PrintWriter)15 HashMap (java.util.HashMap)15 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)14 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)14