Search in sources :

Example 56 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project ASCIIGenome by dariober.

the class Utils method initRegionFromFile.

/**
 * Get the first chrom string from first line of input file. As you add support for more filetypes you should update
 * this function. This method is very dirty and shouldn't be trusted 100%
 * @throws InvalidGenomicCoordsException
 * @throws SQLException
 * @throws InvalidRecordException
 * @throws InvalidCommandLineException
 * @throws ClassNotFoundException
 */
@SuppressWarnings("unused")
public static String initRegionFromFile(String x) throws IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidCommandLineException, InvalidRecordException, SQLException {
    UrlValidator urlValidator = new UrlValidator();
    String region = "";
    TrackFormat fmt = Utils.getFileTypeFromName(x);
    if (fmt.equals(TrackFormat.BAM)) {
        SamReader samReader;
        if (urlValidator.isValid(x)) {
            samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(new URL(x)));
        } else {
            SamReaderFactory srf = SamReaderFactory.make();
            srf.validationStringency(ValidationStringency.SILENT);
            samReader = srf.open(new File(x));
        }
        // Default: Start from the first contig in dictionary
        region = samReader.getFileHeader().getSequence(0).getSequenceName();
        SAMRecordIterator iter = samReader.iterator();
        if (iter.hasNext()) {
            // If there are records in this BAM, init from first record
            SAMRecord rec = iter.next();
            region = rec.getContig() + ":" + rec.getAlignmentStart();
            samReader.close();
        }
        return region;
    } else if (fmt.equals(TrackFormat.BIGWIG) && !urlValidator.isValid(x)) {
        // Loading from URL is painfully slow so do not initialize from URL
        return initRegionFromBigWig(x);
    } else if (fmt.equals(TrackFormat.BIGBED) && !urlValidator.isValid(x)) {
        // Loading from URL is painfully slow so do not initialize from URL
        return initRegionFromBigBed(x);
    } else if (urlValidator.isValid(x) && (fmt.equals(TrackFormat.BIGWIG) || fmt.equals(TrackFormat.BIGBED))) {
        System.err.println("Refusing to initialize from URL");
        throw new InvalidGenomicCoordsException();
    } else if (fmt.equals(TrackFormat.TDF)) {
        Iterator<String> iter = TDFReader.getReader(x).getChromosomeNames().iterator();
        while (iter.hasNext()) {
            region = iter.next();
            if (!region.equals("All")) {
                return region;
            }
        }
        System.err.println("Cannot initialize from " + x);
        throw new RuntimeException();
    } else {
        // Input file appears to be a generic interval file. We expect chrom to be in column 1
        // VCF files are also included here since they are either gzip or plain ASCII.
        BufferedReader br;
        GZIPInputStream gzipStream;
        if (x.toLowerCase().endsWith(".gz") || x.toLowerCase().endsWith(".bgz")) {
            if (urlValidator.isValid(x)) {
                gzipStream = new GZIPInputStream(new URL(x).openStream());
            } else {
                InputStream fileStream = new FileInputStream(x);
                gzipStream = new GZIPInputStream(fileStream);
            }
            Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
            br = new BufferedReader(decoder);
        } else {
            if (urlValidator.isValid(x)) {
                InputStream instream = new URL(x).openStream();
                Reader decoder = new InputStreamReader(instream, "UTF-8");
                br = new BufferedReader(decoder);
            } else {
                br = new BufferedReader(new FileReader(x));
            }
        }
        String line;
        while ((line = br.readLine()) != null) {
            line = line.trim();
            if (line.startsWith("#") || line.isEmpty() || line.startsWith("track ")) {
                continue;
            }
            if (fmt.equals(TrackFormat.VCF)) {
                region = line.split("\t")[0] + ":" + line.split("\t")[1];
            } else {
                IntervalFeature feature = new IntervalFeature(line, fmt, null);
                region = feature.getChrom() + ":" + feature.getFrom();
            }
            br.close();
            return region;
        }
        if (line == null) {
            // This means the input has no records
            region = "Undefined_contig";
            if (fmt.equals(TrackFormat.VCF)) {
                SAMSequenceDictionary seqdict = getVCFHeader(x).getSequenceDictionary();
                if (seqdict != null) {
                    Iterator<SAMSequenceRecord> iter = seqdict.getSequences().iterator();
                    if (iter.hasNext()) {
                        region = iter.next().getSequenceName();
                    }
                }
            }
            return region;
        }
    }
    System.err.println("Cannot initialize from " + x);
    throw new RuntimeException();
}
Also used : TrackFormat(tracks.TrackFormat) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) InputStreamReader(java.io.InputStreamReader) GZIPInputStream(java.util.zip.GZIPInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) Reader(java.io.Reader) AbstractFeatureReader(htsjdk.tribble.AbstractFeatureReader) TabixReader(htsjdk.tribble.readers.TabixReader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) TDFReader(org.broad.igv.tdf.TDFReader) BBFileReader(org.broad.igv.bbfile.BBFileReader) SamReader(htsjdk.samtools.SamReader) FileReader(java.io.FileReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) URL(java.net.URL) FileInputStream(java.io.FileInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) UrlValidator(org.apache.commons.validator.routines.UrlValidator) InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException) BufferedReader(java.io.BufferedReader) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) BBFileReader(org.broad.igv.bbfile.BBFileReader) FileReader(java.io.FileReader) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IntervalFeature(tracks.IntervalFeature)

Example 57 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class SAMtools method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    final SamReader inputSam = factory.open(new File(mySamFile));
    samHeader = inputSam.getFileHeader();
    samHeader.setSortOrder(SortOrder.unsorted);
    SAMRecordIterator iter = inputSam.iterator();
    Set<Entry<String, String>> attr = samHeader.getAttributes();
    List<SAMReadGroupRecord> rgs = samHeader.getReadGroups();
    SAMReadGroupRecord rg = new SAMReadGroupRecord("cz1");
    rg.setSample("cz1");
    samHeader.addReadGroup(rg);
    // samHeader.setAttribute("RG", "cz1");
    final SAMFileWriter outSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(samHeader, true, new File(myOutput));
    for (int i = 0; i < 100; i++) {
        SAMRecord record = iter.next();
        List<SAMTagAndValue> tags = record.getAttributes();
        record.setAttribute("RG", "cz1");
        List<SAMTagAndValue> tags2 = record.getAttributes();
        outSam.addAlignment(record);
    }
    myLogger.info("exit...");
    try {
        inputSam.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    outSam.close();
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Entry(java.util.Map.Entry) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) SAMTagAndValue(htsjdk.samtools.SAMRecord.SAMTagAndValue)

Example 58 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class SamFileSplit method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    Utils.makeOutputDir(bam_out);
    final File[] beds = new File(bed_in).listFiles();
    final String[] out_prefix = new String[beds.length];
    for (int i = 0; i < beds.length; i++) {
        out_prefix[i] = bam_out + "/" + beds[i].getName().replaceAll(".bed$", "");
        Utils.makeOutputDir(out_prefix[i]);
    }
    final File[] bams = new File(bam_in).listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.endsWith(".bam");
        }
    });
    this.initial_thread_pool();
    for (File bam : bams) {
        executor.submit(new Runnable() {

            private File bam;

            @Override
            public void run() {
                // TODO Auto-generated method stub
                try {
                    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
                    final SamReader inputSam = factory.open(bam);
                    final SAMFileHeader header = inputSam.getFileHeader();
                    final SAMRecordIterator iter = inputSam.iterator();
                    final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
                    final SAMFileWriter[] outputSam = new SAMFileWriter[beds.length];
                    final SAMSequenceDictionary[] seqdics = new SAMSequenceDictionary[beds.length];
                    final Map<String, Integer> outMap = new HashMap<String, Integer>();
                    final String out = bam.getName();
                    for (int i = 0; i < beds.length; i++) {
                        Set<String> bed_seq = new HashSet<String>();
                        String tmp;
                        BufferedReader br = new BufferedReader(new FileReader(beds[i]));
                        String line;
                        while ((line = br.readLine()) != null) {
                            tmp = line.split("\\s+")[0];
                            bed_seq.add(tmp);
                            outMap.put(tmp, i);
                        }
                        br.close();
                        final SAMFileHeader header_i = new SAMFileHeader();
                        final SAMSequenceDictionary seqdic_i = new SAMSequenceDictionary();
                        header_i.setAttribute("VN", header.getAttribute("VN"));
                        header_i.setAttribute("SO", header.getAttribute("SO"));
                        List<SAMSequenceRecord> seqs = seqdic.getSequences();
                        for (SAMSequenceRecord seq : seqs) if (bed_seq.contains(seq.getSequenceName()))
                            seqdic_i.addSequence(seq);
                        header_i.setSequenceDictionary(seqdic_i);
                        for (SAMReadGroupRecord rg : header.getReadGroups()) header_i.addReadGroup(rg);
                        for (SAMProgramRecord pg : header.getProgramRecords()) header_i.addProgramRecord(pg);
                        outputSam[i] = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_i, true, new File(out_prefix[i] + "/" + out));
                        seqdics[i] = seqdic_i;
                    }
                    Set<String> refs = outMap.keySet();
                    String ref;
                    int f;
                    while (iter.hasNext()) {
                        SAMRecord rec = iter.next();
                        if (refs.contains(ref = rec.getReferenceName())) {
                            f = outMap.get(ref);
                            rec.setReferenceIndex(seqdics[f].getSequenceIndex(ref));
                            outputSam[f].addAlignment(rec);
                        }
                    }
                    iter.close();
                    inputSam.close();
                    for (int i = 0; i < outputSam.length; i++) outputSam[i].close();
                    myLogger.info(out + " return true");
                } catch (Exception e) {
                    Thread t = Thread.currentThread();
                    t.getUncaughtExceptionHandler().uncaughtException(t, e);
                    e.printStackTrace();
                    executor.shutdown();
                    System.exit(1);
                }
            }

            public Runnable init(File bam) {
                this.bam = bam;
                return (this);
            }
        }.init(bam));
    }
    this.waitFor();
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) FilenameFilter(java.io.FilenameFilter) SamReader(htsjdk.samtools.SamReader) FileReader(java.io.FileReader) HashSet(java.util.HashSet) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 59 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class Anchor method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    // read assembly graph file
    final GFA gfa = new GFA(query_file, asm_graph);
    qry_seqs = gfa.getSequenceMap();
    sub_seqs = Sequence.parseFastaFileAsMap(subject_file);
    myLogger.info("  GFA vertices: " + gfa.vertexSet().size());
    myLogger.info("  GFA edges   : " + gfa.edgeSet().size());
    // myLogger.info("  GFA edges --- ");
    // for(OverlapEdge olap : gfa.edgeSet())
    // myLogger.info(olap.olapInfo().toString());
    // find 'N/n's in subject/reference sequences
    // which could have impact on parsing the blast records
    sub_gaps = new HashMap<String, TreeRangeSet<Integer>>();
    for (Map.Entry<String, Sequence> entry : sub_seqs.entrySet()) {
        String seq_sn = entry.getKey();
        String seq_str = entry.getValue().seq_str();
        final TreeRangeSet<Integer> tmp_rangeSet = TreeRangeSet.create();
        for (int j = 0; j < seq_str.length(); j++) {
            if (seq_str.charAt(j) == 'N' || seq_str.charAt(j) == 'n')
                // blast record is 1-based closed coordination
                tmp_rangeSet.add(Range.closed(j + 1, j + 1).canonical(DiscreteDomain.integers()));
        }
        int seq_ln = seq_str.length();
        final TreeRangeSet<Integer> range_set = TreeRangeSet.create();
        for (Range<Integer> range : tmp_rangeSet.asRanges()) {
            int lowerend = range.hasLowerBound() ? Math.max(0, range.lowerEndpoint() - gap_buff) : 0;
            int upperend = range.hasUpperBound() ? Math.min(seq_ln, range.upperEndpoint() + gap_buff - 1) : seq_ln;
            range_set.add(Range.closed(lowerend, upperend).canonical(DiscreteDomain.integers()));
        }
        sub_gaps.put(seq_sn, range_set);
    }
    // read alignment file and place the query sequences
    final Map<String, Set<SAMSegment>> initPlace = new HashMap<String, Set<SAMSegment>>();
    final Map<String, List<SAMSegment>> initPseudoAssembly = new HashMap<String, List<SAMSegment>>();
    for (String sub_seq : sub_seqs.keySet()) initPseudoAssembly.put(sub_seq, new ArrayList<SAMSegment>());
    try {
        final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
        final SamReader in1 = factory.open(new File(align_file));
        final SAMRecordIterator iter1 = in1.iterator();
        String qry;
        int qry_ln;
        double min_aln;
        final List<SAMSegment> buff = new ArrayList<SAMSegment>();
        SAMRecord rc = iter1.next();
        while (rc != null) {
            qry = rc.getReadName();
            qry_ln = qry_seqs.get(qry).seq_ln();
            buff.clear();
            if (!rc.getReadUnmappedFlag())
                buff.add(SAMSegment.samRecord(rc, true, qry_ln));
            while ((rc = iter1.next()) != null && rc.getReadName().equals(qry)) {
                buff.add(SAMSegment.samRecord(rc, true, qry_ln));
            }
            if (buff.isEmpty())
                continue;
            min_aln = 0.9 * buff.get(0).qlength();
            // keep alignment fragment that has qual>0
            Set<SAMSegment> init_f = new HashSet<SAMSegment>();
            Set<SAMSegment> init_r = new HashSet<SAMSegment>();
            for (SAMSegment record : buff) {
                if (record.qual() == 0 && record.qlength() < min_aln)
                    continue;
                if (record.qseqid().equals(qry))
                    init_f.add(record);
                else
                    init_r.add(record);
                initPseudoAssembly.get(record.sseqid()).add(record);
            }
            if (!init_f.isEmpty())
                initPlace.put(qry, init_f);
            if (!init_r.isEmpty())
                initPlace.put(qry + "'", init_r);
        }
        iter1.close();
        in1.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    // Collections.sort(initPseudoAssembly.get("1_pilon"), new AlignmentSegment.SubjectCoordinationComparator());
    // if(debug) {
    // for(SAMSegment record : initPseudoAssembly.get("1_pilon")) {
    // System.out.println(record.qseqid()+":"+record.sstart()+"-"+record.send());
    // }
    // }
    final Set<SAMSegment> contained = new HashSet<SAMSegment>();
    final Set<SAMSegment> placed = new HashSet<SAMSegment>();
    final int flank_size = 10000;
    int distance;
    for (String sub_seq : sub_seqs.keySet()) {
        // sub_seq = "Chr10";
        if (sub_seq.equals("Chr00"))
            continue;
        myLogger.info(">>>>>>>>>>>>>" + sub_seq + "<<<<<<<<<<<<<<<<");
        final List<SAMSegment> seq_by_sub = initPseudoAssembly.get(sub_seq);
        Collections.sort(seq_by_sub, new AlignmentSegment.SubjectCoordinationComparator());
        placed.clear();
        int nSeq = seq_by_sub.size();
        double edge_penalty, edge_score;
        SAMSegment root_seq, source_seq, target_seq;
        Set<SAMSegment> target_seqs;
        Set<OverlapEdge> outgoing;
        TraceableEdge edge;
        String root_seqid, source_seqid, target_seqid;
        TraceableVertex<String> root_vertex, source_vertex, target_vertex;
        Deque<SAMSegment> deque = new ArrayDeque<SAMSegment>();
        final List<TraceableVertex<String>> traceable = new ArrayList<TraceableVertex<String>>();
        for (int i = 0; i < nSeq; i++) {
            root_seq = seq_by_sub.get(i);
            root_seqid = root_seq.qseqid();
            if (placed.contains(root_seq))
                continue;
            final TraceableDirectedWeightedPseudograph<String> razor = new TraceableDirectedWeightedPseudograph<String>(TraceableEdge.class);
            // final ListenableDirectedWeightedGraph<TraceableVertex<String>, DefaultWeightedEdge> razor =
            // new ListenableDirectedWeightedGraph<TraceableVertex<String>, DefaultWeightedEdge>(DefaultWeightedEdge.class);
            // JGraphModelAdapter<TraceableVertex<String>, DefaultWeightedEdge> jgAdapter =
            // new JGraphModelAdapter<TraceableVertex<String>, DefaultWeightedEdge>(razor);
            // JGraph jgraph = new JGraph(jgAdapter);
            deque.clear();
            deque.push(root_seq);
            contained.clear();
            while (!deque.isEmpty()) {
                source_seq = deque.pop();
                source_seqid = source_seq.qseqid();
                if (contained.contains(source_seq))
                    continue;
                contained.add(source_seq);
                source_vertex = new TraceableVertex<String>(source_seqid);
                source_vertex.setSAMSegment(source_seq);
                if (!razor.containsVertex(source_vertex))
                    razor.addVertex(source_vertex);
                outgoing = gfa.outgoingEdgesOf(source_seqid);
                for (OverlapEdge out : outgoing) {
                    target_seqid = gfa.getEdgeTarget(out);
                    if (!initPlace.containsKey(target_seqid))
                        continue;
                    target_seqs = initPlace.get(target_seqid);
                    distance = Integer.MAX_VALUE;
                    target_seq = null;
                    for (SAMSegment seq : target_seqs) {
                        int d = AlignmentSegment.sdistance(source_seq, seq);
                        if (d < distance) {
                            distance = d;
                            target_seq = seq;
                        }
                    }
                    if (distance <= flank_size) {
                        target_vertex = new TraceableVertex<String>(target_seqid);
                        target_vertex.setSAMSegment(target_seq);
                        if (!razor.containsVertex(target_vertex))
                            razor.addVertex(target_vertex);
                        if (razor.containsEdge(source_vertex, target_vertex))
                            continue;
                        edge = razor.addEdge(source_vertex, target_vertex);
                        // calculate edge weight
                        // higher weight edges are those,
                        /**
                         **
                         *							//       1.  large/long alignment segments vertices
                         *							// TODO: 2*. small gaps on the reference
                         *							edge_weight = qry_seqs.get(source_seqid).seq_ln()+
                         *									qry_seqs.get(target_seqid).seq_ln()-
                         *									gfa.getEdge(source_seqid, target_seqid).olap();
                         */
                        // TODO: 1*. large/long alignment segments vertices
                        // 2.  small gaps on the reference
                        edge_penalty = AlignmentSegment.sdistance(source_seq, target_seq);
                        edge.setPenalty(edge_penalty);
                        edge_score = qry_seqs.get(source_seqid).seq_ln() + qry_seqs.get(target_seqid).seq_ln() - gfa.getEdge(source_seqid, target_seqid).olap();
                        edge.setScore(edge_score);
                        deque.push(target_seq);
                    }
                }
            }
            if (ddebug)
                myLogger.info(root_seqid + " " + razor.vertexSet().size() + " " + razor.edgeSet().size() + " done");
            // JFrame frame = new JFrame();
            // frame.getContentPane().add(jgraph);
            // frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
            // frame.pack();
            // frame.setVisible(true);
            // "pseudo"-DFS to find the route with the highest score
            final Map<String, TraceableVertex<String>> razv_map = new HashMap<String, TraceableVertex<String>>();
            for (TraceableVertex<String> v : razor.vertexSet()) razv_map.put(v.getId(), v);
            // we use a bidirectional hashmap to simulate the deque
            // this is because we may need to do deletions
            // Deque<TraceableVertex<String>> queue = new ArrayDeque<TraceableVertex<String>>();
            final TreeBidiMap<Long, TraceableVertex<String>> bidiQ = new TreeBidiMap<Long, TraceableVertex<String>>();
            root_vertex = razv_map.get(root_seqid);
            root_vertex.setSAMSegment(root_seq);
            root_vertex.setScore(qry_seqs.get(root_seqid).seq_ln());
            root_vertex.setPenalty(0);
            root_vertex.setStatus(true);
            bidiQ.put(0L, root_vertex);
            double max_ws = Double.NEGATIVE_INFINITY, source_penalty, target_penalty, source_score, target_score, penalty, score, target_ws, source_ws, ws;
            int source_ln;
            Set<TraceableEdge> out_edges;
            TraceableVertex<String> opt_vertex = null;
            long sizeQ;
            boolean isLeaf;
            if (ddebug)
                for (TraceableEdge e : razor.edgeSet()) myLogger.info(e.toString() + "(" + razor.getEdgeSource(e).getSAMSegment().toString() + "|" + razor.getEdgeTarget(e).getSAMSegment().toString() + "|" + e.getScore() + "-" + e.getPenalty() + ")");
            while (!bidiQ.isEmpty()) {
                sizeQ = bidiQ.lastKey();
                source_vertex = bidiQ.get(sizeQ);
                bidiQ.remove(sizeQ);
                source_ln = qry_seqs.get(source_vertex.getId()).seq_ln();
                source_score = source_vertex.getScore() - source_ln;
                source_penalty = source_vertex.getPenalty();
                source_ws = source_score - source_penalty;
                isLeaf = true;
                out_edges = razor.outgoingEdgesOf(source_vertex);
                for (TraceableEdge out : out_edges) {
                    // this is not right because graph edges are immutable?
                    // target_vertex = razor.getEdgeTarget(out);
                    target_vertex = razv_map.get(razor.getEdgeTarget(out).getId());
                    target_score = target_vertex.getScore();
                    target_penalty = target_vertex.getPenalty();
                    target_ws = target_score - target_penalty;
                    edge_penalty = out.getPenalty();
                    penalty = source_penalty + edge_penalty;
                    edge_score = out.getScore();
                    score = source_score + edge_score;
                    ws = score - penalty;
                    if (edge_penalty > flank_size || target_vertex.getStatus() && (ws <= target_ws || isLoopback(razor, source_vertex, target_vertex)))
                        continue;
                    isLeaf = false;
                    target_vertex.setBackTrace(source_vertex);
                    target_vertex.setScore(score);
                    target_vertex.setPenalty(penalty);
                    target_vertex.setStatus(true);
                    bidiQ.put(sizeQ++, target_vertex);
                }
                if (isLeaf && source_ws > max_ws) {
                    penalty = source_vertex.getPenalty();
                    score = source_vertex.getScore();
                    max_ws = source_ws;
                    opt_vertex = source_vertex;
                    if (ddebug) {
                        String trace = opt_vertex.toString() + ":" + opt_vertex.getSAMSegment().sstart() + "-" + opt_vertex.getSAMSegment().send() + "(" + opt_vertex.getScore() + "-" + opt_vertex.getPenalty() + ")";
                        TraceableVertex<String> optx = opt_vertex;
                        while ((optx = optx.getBackTrace()) != null) {
                            trace += "," + optx.toString() + ":" + optx.getSAMSegment().sstart() + "-" + optx.getSAMSegment().send() + "(" + optx.getScore() + "-" + optx.getPenalty() + ")";
                        }
                        myLogger.info("trace back [" + score + ", " + penalty + "]: " + trace);
                    }
                }
            }
            traceable.add(opt_vertex);
            Set<TraceableVertex<String>> optx = new HashSet<TraceableVertex<String>>();
            optx.add(opt_vertex);
            while ((opt_vertex = opt_vertex.getBackTrace()) != null) optx.add(opt_vertex);
            for (TraceableVertex<String> v : optx) placed.add(v.getSAMSegment());
        }
        // sort traceable by size
        Collections.sort(traceable, new Comparator<TraceableVertex<String>>() {

            @Override
            public int compare(TraceableVertex<String> t0, TraceableVertex<String> t1) {
                // TODO Auto-generated method stub
                return Double.compare(t1.getScore(), t0.getScore());
            }
        });
        if (debug) {
            for (TraceableVertex<String> opt_vertex : traceable) {
                double score = opt_vertex.getScore();
                double penalty = opt_vertex.getPenalty();
                String trace = opt_vertex.toString() + ":" + opt_vertex.getSAMSegment().sstart() + "-" + opt_vertex.getSAMSegment().send() + "(" + opt_vertex.getScore() + "-" + opt_vertex.getPenalty() + ")";
                while ((opt_vertex = opt_vertex.getBackTrace()) != null) {
                    trace += "," + opt_vertex.toString() + ":" + opt_vertex.getSAMSegment().sstart() + "-" + opt_vertex.getSAMSegment().send() + "(" + opt_vertex.getScore() + "-" + opt_vertex.getPenalty() + ")";
                }
                myLogger.info("trace back [" + score + ", " + penalty + "]: " + trace);
            }
        }
        // we generate a compound alignment record for each traceable
        for (TraceableVertex<String> opt_vertex : traceable) {
        }
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) OverlapEdge(cz1.ngs.model.OverlapEdge) Sequence(cz1.ngs.model.Sequence) SAMSegment(cz1.ngs.model.SAMSegment) AlignmentSegment(cz1.ngs.model.AlignmentSegment) TraceableVertex(cz1.ngs.model.TraceableVertex) SAMRecord(htsjdk.samtools.SAMRecord) HashMap(java.util.HashMap) Map(java.util.Map) BidiMap(org.apache.commons.collections4.BidiMap) TreeBidiMap(org.apache.commons.collections4.bidimap.TreeBidiMap) File(java.io.File) GFA(cz1.ngs.model.GFA) TreeRangeSet(com.google.common.collect.TreeRangeSet) HashSet(java.util.HashSet) Set(java.util.Set) TreeRangeSet(com.google.common.collect.TreeRangeSet) TraceableEdge(cz1.ngs.model.TraceableEdge) SamReader(htsjdk.samtools.SamReader) TraceableDirectedWeightedPseudograph(cz1.ngs.model.TraceableDirectedWeightedPseudograph) TreeBidiMap(org.apache.commons.collections4.bidimap.TreeBidiMap) SamReaderFactory(htsjdk.samtools.SamReaderFactory) IOException(java.io.IOException) ArrayDeque(java.util.ArrayDeque)

Example 60 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.

the class Consensus method buildReadPairGraph.

public void buildReadPairGraph(String bam_in, String sz_in, String out, // int max_gap,
int mapQ_thres, int max_NM) {
    try {
        /**
         *			BufferedReader br_tmp = new BufferedReader(new FileReader(new File(contig_file)));
         *			int contig_num = 0;
         *			String line;
         *			while( (line=br_tmp.readLine())!=null )
         *				if(line.startsWith(">")) contig_num++;
         *			br_tmp.close();
         *
         *			BufferedReader br_contig_file = new BufferedReader(new FileReader(new File(contig_file)));
         *			String[] contig_id = new String[contig_num];
         *			String[] contig_str = new String[contig_num];
         *			int[] contig_sz = new int[contig_num];
         *			int c = 0;
         *			line = br_contig_file.readLine();
         *			while( line!=null ) {
         *				if(line.startsWith(">")) {
         *					contig_id[c] = line.replace(">", "").trim();
         *					StringBuilder bs = new StringBuilder();
         *					while( (line=br_contig_file.readLine())!=null
         *							&& !line.startsWith(">")) {
         *						bs.append(line.trim());
         *					}
         *					contig_str[c] = bs.toString();
         *					contig_sz[c] = bs.length();
         *					c++;
         *				}
         *			}
         *			br_contig_file.close();
         */
        String[] bam_files = bam_in.trim().split(";");
        String[] s = sz_in.trim().split(";");
        final Map<String, Integer> szs = new HashMap<String, Integer>();
        for (int i = 0; i != bam_files.length; i++) szs.put(bam_files[i], Integer.parseInt(s[i]));
        final Map<Long, Integer> links = new HashMap<Long, Integer>();
        this.initial_thread_pool();
        for (String bam_file : bam_files) {
            this.executor.submit(new Runnable() {

                private String bam_file;

                @Override
                public void run() {
                    System.err.println("Process file ... " + bam_file);
                    SAMRecord tmp_record, sam_record, mate_record;
                    String sam_id;
                    final List<SAMRecord> record_pool = new ArrayList<SAMRecord>();
                    int sam_ref, mate_ref;
                    long key_ref;
                    final SamReader in1 = factory.open(new File(bam_file));
                    final List<SAMSequenceRecord> refSeq = in1.getFileHeader().getSequenceDictionary().getSequences();
                    final int[] refSz = new int[refSeq.size()];
                    for (int i = 0; i != refSz.length; i++) refSz[i] = refSeq.get(i).getSequenceLength();
                    SAMRecordIterator iter1 = in1.iterator();
                    tmp_record = iter1.next();
                    while (tmp_record != null) {
                        record_pool.clear();
                        record_pool.add(tmp_record);
                        sam_id = tmp_record.getReadName();
                        while ((tmp_record = iter1.hasNext() ? iter1.next() : null) != null && tmp_record.getReadName().equals(sam_id)) record_pool.add(tmp_record);
                        sam_record = null;
                        mate_record = null;
                        for (SAMRecord record : record_pool) {
                            if (record.getFirstOfPairFlag() && !record.getNotPrimaryAlignmentFlag())
                                sam_record = record;
                            if (record.getSecondOfPairFlag() && !record.getNotPrimaryAlignmentFlag())
                                mate_record = record;
                        }
                        if (sam_record == null || mate_record == null)
                            throw new RuntimeException(record_pool.get(0).getSAMString() + "!!!");
                        sam_ref = sam_record.getReferenceIndex();
                        mate_ref = mate_record.getReferenceIndex();
                        if (sam_record.getReadUnmappedFlag() || mate_record.getReadUnmappedFlag() || sam_ref == mate_ref || sam_record.getMappingQuality() < mapQ_thres || mate_record.getMappingQuality() < mapQ_thres || sam_record.getIntegerAttribute("NM") > max_NM || mate_record.getIntegerAttribute("NM") > max_NM)
                            continue;
                        if (sam_ref > mate_ref) {
                            int tmp_int = sam_ref;
                            sam_ref = mate_ref;
                            mate_ref = tmp_int;
                            SAMRecord tmp_rec = sam_record;
                            sam_record = mate_record;
                            mate_record = tmp_rec;
                        }
                        int infSz;
                        double maxSz = insz_thres * szs.get(bam_file);
                        key_ref = 0L;
                        /**
                         *FF*
                         * ---=>---		---=>--
                         */
                        if (!sam_record.getReadNegativeStrandFlag() && !mate_record.getReadNegativeStrandFlag()) {
                            infSz = refSz[sam_ref] - sam_record.getAlignmentStart() + refSz[mate_ref] - mate_record.getAlignmentStart();
                            if (infSz <= maxSz) {
                                // 1 - end
                                key_ref = 1;
                                key_ref <<= 31;
                                key_ref += sam_ref;
                                key_ref <<= 1;
                                // 1 - end
                                key_ref += 1;
                                key_ref <<= 31;
                                key_ref += mate_ref;
                            }
                        } else /**
                         *FR*
                         * ---=>---		---<=--
                         */
                        if (!sam_record.getReadNegativeStrandFlag() && mate_record.getReadNegativeStrandFlag()) {
                            infSz = refSz[sam_ref] - sam_record.getAlignmentStart() + mate_record.getAlignmentEnd();
                            if (infSz <= maxSz) {
                                // 1 - end
                                key_ref = 1;
                                key_ref <<= 31;
                                key_ref += sam_ref;
                                key_ref <<= 1;
                                // 0 - start
                                key_ref += 0;
                                key_ref <<= 31;
                                key_ref += mate_ref;
                            }
                        } else /**
                         *RF*
                         * ---<=---		---=>--
                         */
                        if (sam_record.getReadNegativeStrandFlag() && !mate_record.getReadNegativeStrandFlag()) {
                            infSz = sam_record.getAlignmentEnd() + refSz[mate_ref] - mate_record.getAlignmentStart();
                            if (infSz <= maxSz) {
                                // 0 - start
                                key_ref = 0;
                                key_ref <<= 31;
                                key_ref += sam_ref;
                                key_ref <<= 1;
                                // 1 - end
                                key_ref += 1;
                                key_ref <<= 31;
                                key_ref += mate_ref;
                            }
                        } else /**
                         *RR*
                         * ---<=---		---<=--
                         */
                        if (sam_record.getReadNegativeStrandFlag() && mate_record.getReadNegativeStrandFlag()) {
                            infSz = sam_record.getAlignmentEnd() + mate_record.getAlignmentEnd();
                            if (infSz <= maxSz) {
                                // 0 - start
                                key_ref = 0;
                                key_ref <<= 31;
                                key_ref += sam_ref;
                                key_ref <<= 1;
                                // 0 - start
                                key_ref += 0;
                                key_ref <<= 31;
                                key_ref += mate_ref;
                            }
                        }
                        if (key_ref == 0L)
                            continue;
                        synchronized (lock) {
                            if (links.containsKey(key_ref))
                                links.put(key_ref, links.get(key_ref) + 1);
                            else
                                links.put(key_ref, 1);
                        }
                    }
                    iter1.close();
                    try {
                        in1.close();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                    System.err.println("Process file ... " + bam_file + " done.");
                }

                public Runnable init(final String bam_file) {
                    this.bam_file = bam_file;
                    return (this);
                }
            }.init(bam_file));
        }
        this.waitFor();
        int mate_ref, sam_ref, sam_dir, mate_dir;
        long key_ref;
        BufferedWriter bw = new BufferedWriter(new FileWriter(new File(out)));
        for (Map.Entry<Long, Integer> entry : links.entrySet()) {
            key_ref = entry.getKey();
            mate_ref = (int) (key_ref & refMask);
            key_ref >>= 31;
            mate_dir = (int) (key_ref & dirMask);
            key_ref >>= 1;
            sam_ref = (int) (key_ref & refMask);
            key_ref >>= 31;
            sam_dir = (int) (key_ref & dirMask);
            bw.write(sam_ref + "\t" + mate_ref + "\t" + sam_dir + "\t" + mate_dir + "\t" + entry.getValue() + "\n");
        }
        bw.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BufferedWriter(java.io.BufferedWriter) SamReader(htsjdk.samtools.SamReader) IOException(java.io.IOException) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Aggregations

SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)107 SAMRecord (htsjdk.samtools.SAMRecord)92 SamReader (htsjdk.samtools.SamReader)83 SAMFileHeader (htsjdk.samtools.SAMFileHeader)49 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)47 File (java.io.File)47 SAMFileWriter (htsjdk.samtools.SAMFileWriter)45 IOException (java.io.IOException)41 ArrayList (java.util.ArrayList)34 CigarElement (htsjdk.samtools.CigarElement)30 Cigar (htsjdk.samtools.Cigar)26 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)24 SamReaderFactory (htsjdk.samtools.SamReaderFactory)21 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)18 CigarOperator (htsjdk.samtools.CigarOperator)16 Interval (htsjdk.samtools.util.Interval)16 PrintWriter (java.io.PrintWriter)15 HashMap (java.util.HashMap)15 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)14 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)14