use of htsjdk.samtools.SAMRecordIterator in project ASCIIGenome by dariober.
the class Utils method initRegionFromFile.
/**
* Get the first chrom string from first line of input file. As you add support for more filetypes you should update
* this function. This method is very dirty and shouldn't be trusted 100%
* @throws InvalidGenomicCoordsException
* @throws SQLException
* @throws InvalidRecordException
* @throws InvalidCommandLineException
* @throws ClassNotFoundException
*/
@SuppressWarnings("unused")
public static String initRegionFromFile(String x) throws IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidCommandLineException, InvalidRecordException, SQLException {
UrlValidator urlValidator = new UrlValidator();
String region = "";
TrackFormat fmt = Utils.getFileTypeFromName(x);
if (fmt.equals(TrackFormat.BAM)) {
SamReader samReader;
if (urlValidator.isValid(x)) {
samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(new URL(x)));
} else {
SamReaderFactory srf = SamReaderFactory.make();
srf.validationStringency(ValidationStringency.SILENT);
samReader = srf.open(new File(x));
}
// Default: Start from the first contig in dictionary
region = samReader.getFileHeader().getSequence(0).getSequenceName();
SAMRecordIterator iter = samReader.iterator();
if (iter.hasNext()) {
// If there are records in this BAM, init from first record
SAMRecord rec = iter.next();
region = rec.getContig() + ":" + rec.getAlignmentStart();
samReader.close();
}
return region;
} else if (fmt.equals(TrackFormat.BIGWIG) && !urlValidator.isValid(x)) {
// Loading from URL is painfully slow so do not initialize from URL
return initRegionFromBigWig(x);
} else if (fmt.equals(TrackFormat.BIGBED) && !urlValidator.isValid(x)) {
// Loading from URL is painfully slow so do not initialize from URL
return initRegionFromBigBed(x);
} else if (urlValidator.isValid(x) && (fmt.equals(TrackFormat.BIGWIG) || fmt.equals(TrackFormat.BIGBED))) {
System.err.println("Refusing to initialize from URL");
throw new InvalidGenomicCoordsException();
} else if (fmt.equals(TrackFormat.TDF)) {
Iterator<String> iter = TDFReader.getReader(x).getChromosomeNames().iterator();
while (iter.hasNext()) {
region = iter.next();
if (!region.equals("All")) {
return region;
}
}
System.err.println("Cannot initialize from " + x);
throw new RuntimeException();
} else {
// Input file appears to be a generic interval file. We expect chrom to be in column 1
// VCF files are also included here since they are either gzip or plain ASCII.
BufferedReader br;
GZIPInputStream gzipStream;
if (x.toLowerCase().endsWith(".gz") || x.toLowerCase().endsWith(".bgz")) {
if (urlValidator.isValid(x)) {
gzipStream = new GZIPInputStream(new URL(x).openStream());
} else {
InputStream fileStream = new FileInputStream(x);
gzipStream = new GZIPInputStream(fileStream);
}
Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
br = new BufferedReader(decoder);
} else {
if (urlValidator.isValid(x)) {
InputStream instream = new URL(x).openStream();
Reader decoder = new InputStreamReader(instream, "UTF-8");
br = new BufferedReader(decoder);
} else {
br = new BufferedReader(new FileReader(x));
}
}
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if (line.startsWith("#") || line.isEmpty() || line.startsWith("track ")) {
continue;
}
if (fmt.equals(TrackFormat.VCF)) {
region = line.split("\t")[0] + ":" + line.split("\t")[1];
} else {
IntervalFeature feature = new IntervalFeature(line, fmt, null);
region = feature.getChrom() + ":" + feature.getFrom();
}
br.close();
return region;
}
if (line == null) {
// This means the input has no records
region = "Undefined_contig";
if (fmt.equals(TrackFormat.VCF)) {
SAMSequenceDictionary seqdict = getVCFHeader(x).getSequenceDictionary();
if (seqdict != null) {
Iterator<SAMSequenceRecord> iter = seqdict.getSequences().iterator();
if (iter.hasNext()) {
region = iter.next().getSequenceName();
}
}
}
return region;
}
}
System.err.println("Cannot initialize from " + x);
throw new RuntimeException();
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class SAMtools method run.
@Override
public void run() {
// TODO Auto-generated method stub
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(new File(mySamFile));
samHeader = inputSam.getFileHeader();
samHeader.setSortOrder(SortOrder.unsorted);
SAMRecordIterator iter = inputSam.iterator();
Set<Entry<String, String>> attr = samHeader.getAttributes();
List<SAMReadGroupRecord> rgs = samHeader.getReadGroups();
SAMReadGroupRecord rg = new SAMReadGroupRecord("cz1");
rg.setSample("cz1");
samHeader.addReadGroup(rg);
// samHeader.setAttribute("RG", "cz1");
final SAMFileWriter outSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(samHeader, true, new File(myOutput));
for (int i = 0; i < 100; i++) {
SAMRecord record = iter.next();
List<SAMTagAndValue> tags = record.getAttributes();
record.setAttribute("RG", "cz1");
List<SAMTagAndValue> tags2 = record.getAttributes();
outSam.addAlignment(record);
}
myLogger.info("exit...");
try {
inputSam.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
outSam.close();
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class SamFileSplit method run.
@Override
public void run() {
// TODO Auto-generated method stub
Utils.makeOutputDir(bam_out);
final File[] beds = new File(bed_in).listFiles();
final String[] out_prefix = new String[beds.length];
for (int i = 0; i < beds.length; i++) {
out_prefix[i] = bam_out + "/" + beds[i].getName().replaceAll(".bed$", "");
Utils.makeOutputDir(out_prefix[i]);
}
final File[] bams = new File(bam_in).listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith(".bam");
}
});
this.initial_thread_pool();
for (File bam : bams) {
executor.submit(new Runnable() {
private File bam;
@Override
public void run() {
// TODO Auto-generated method stub
try {
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(bam);
final SAMFileHeader header = inputSam.getFileHeader();
final SAMRecordIterator iter = inputSam.iterator();
final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
final SAMFileWriter[] outputSam = new SAMFileWriter[beds.length];
final SAMSequenceDictionary[] seqdics = new SAMSequenceDictionary[beds.length];
final Map<String, Integer> outMap = new HashMap<String, Integer>();
final String out = bam.getName();
for (int i = 0; i < beds.length; i++) {
Set<String> bed_seq = new HashSet<String>();
String tmp;
BufferedReader br = new BufferedReader(new FileReader(beds[i]));
String line;
while ((line = br.readLine()) != null) {
tmp = line.split("\\s+")[0];
bed_seq.add(tmp);
outMap.put(tmp, i);
}
br.close();
final SAMFileHeader header_i = new SAMFileHeader();
final SAMSequenceDictionary seqdic_i = new SAMSequenceDictionary();
header_i.setAttribute("VN", header.getAttribute("VN"));
header_i.setAttribute("SO", header.getAttribute("SO"));
List<SAMSequenceRecord> seqs = seqdic.getSequences();
for (SAMSequenceRecord seq : seqs) if (bed_seq.contains(seq.getSequenceName()))
seqdic_i.addSequence(seq);
header_i.setSequenceDictionary(seqdic_i);
for (SAMReadGroupRecord rg : header.getReadGroups()) header_i.addReadGroup(rg);
for (SAMProgramRecord pg : header.getProgramRecords()) header_i.addProgramRecord(pg);
outputSam[i] = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_i, true, new File(out_prefix[i] + "/" + out));
seqdics[i] = seqdic_i;
}
Set<String> refs = outMap.keySet();
String ref;
int f;
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (refs.contains(ref = rec.getReferenceName())) {
f = outMap.get(ref);
rec.setReferenceIndex(seqdics[f].getSequenceIndex(ref));
outputSam[f].addAlignment(rec);
}
}
iter.close();
inputSam.close();
for (int i = 0; i < outputSam.length; i++) outputSam[i].close();
myLogger.info(out + " return true");
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
public Runnable init(File bam) {
this.bam = bam;
return (this);
}
}.init(bam));
}
this.waitFor();
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class Anchor method run.
@Override
public void run() {
// TODO Auto-generated method stub
// read assembly graph file
final GFA gfa = new GFA(query_file, asm_graph);
qry_seqs = gfa.getSequenceMap();
sub_seqs = Sequence.parseFastaFileAsMap(subject_file);
myLogger.info(" GFA vertices: " + gfa.vertexSet().size());
myLogger.info(" GFA edges : " + gfa.edgeSet().size());
// myLogger.info(" GFA edges --- ");
// for(OverlapEdge olap : gfa.edgeSet())
// myLogger.info(olap.olapInfo().toString());
// find 'N/n's in subject/reference sequences
// which could have impact on parsing the blast records
sub_gaps = new HashMap<String, TreeRangeSet<Integer>>();
for (Map.Entry<String, Sequence> entry : sub_seqs.entrySet()) {
String seq_sn = entry.getKey();
String seq_str = entry.getValue().seq_str();
final TreeRangeSet<Integer> tmp_rangeSet = TreeRangeSet.create();
for (int j = 0; j < seq_str.length(); j++) {
if (seq_str.charAt(j) == 'N' || seq_str.charAt(j) == 'n')
// blast record is 1-based closed coordination
tmp_rangeSet.add(Range.closed(j + 1, j + 1).canonical(DiscreteDomain.integers()));
}
int seq_ln = seq_str.length();
final TreeRangeSet<Integer> range_set = TreeRangeSet.create();
for (Range<Integer> range : tmp_rangeSet.asRanges()) {
int lowerend = range.hasLowerBound() ? Math.max(0, range.lowerEndpoint() - gap_buff) : 0;
int upperend = range.hasUpperBound() ? Math.min(seq_ln, range.upperEndpoint() + gap_buff - 1) : seq_ln;
range_set.add(Range.closed(lowerend, upperend).canonical(DiscreteDomain.integers()));
}
sub_gaps.put(seq_sn, range_set);
}
// read alignment file and place the query sequences
final Map<String, Set<SAMSegment>> initPlace = new HashMap<String, Set<SAMSegment>>();
final Map<String, List<SAMSegment>> initPseudoAssembly = new HashMap<String, List<SAMSegment>>();
for (String sub_seq : sub_seqs.keySet()) initPseudoAssembly.put(sub_seq, new ArrayList<SAMSegment>());
try {
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader in1 = factory.open(new File(align_file));
final SAMRecordIterator iter1 = in1.iterator();
String qry;
int qry_ln;
double min_aln;
final List<SAMSegment> buff = new ArrayList<SAMSegment>();
SAMRecord rc = iter1.next();
while (rc != null) {
qry = rc.getReadName();
qry_ln = qry_seqs.get(qry).seq_ln();
buff.clear();
if (!rc.getReadUnmappedFlag())
buff.add(SAMSegment.samRecord(rc, true, qry_ln));
while ((rc = iter1.next()) != null && rc.getReadName().equals(qry)) {
buff.add(SAMSegment.samRecord(rc, true, qry_ln));
}
if (buff.isEmpty())
continue;
min_aln = 0.9 * buff.get(0).qlength();
// keep alignment fragment that has qual>0
Set<SAMSegment> init_f = new HashSet<SAMSegment>();
Set<SAMSegment> init_r = new HashSet<SAMSegment>();
for (SAMSegment record : buff) {
if (record.qual() == 0 && record.qlength() < min_aln)
continue;
if (record.qseqid().equals(qry))
init_f.add(record);
else
init_r.add(record);
initPseudoAssembly.get(record.sseqid()).add(record);
}
if (!init_f.isEmpty())
initPlace.put(qry, init_f);
if (!init_r.isEmpty())
initPlace.put(qry + "'", init_r);
}
iter1.close();
in1.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// Collections.sort(initPseudoAssembly.get("1_pilon"), new AlignmentSegment.SubjectCoordinationComparator());
// if(debug) {
// for(SAMSegment record : initPseudoAssembly.get("1_pilon")) {
// System.out.println(record.qseqid()+":"+record.sstart()+"-"+record.send());
// }
// }
final Set<SAMSegment> contained = new HashSet<SAMSegment>();
final Set<SAMSegment> placed = new HashSet<SAMSegment>();
final int flank_size = 10000;
int distance;
for (String sub_seq : sub_seqs.keySet()) {
// sub_seq = "Chr10";
if (sub_seq.equals("Chr00"))
continue;
myLogger.info(">>>>>>>>>>>>>" + sub_seq + "<<<<<<<<<<<<<<<<");
final List<SAMSegment> seq_by_sub = initPseudoAssembly.get(sub_seq);
Collections.sort(seq_by_sub, new AlignmentSegment.SubjectCoordinationComparator());
placed.clear();
int nSeq = seq_by_sub.size();
double edge_penalty, edge_score;
SAMSegment root_seq, source_seq, target_seq;
Set<SAMSegment> target_seqs;
Set<OverlapEdge> outgoing;
TraceableEdge edge;
String root_seqid, source_seqid, target_seqid;
TraceableVertex<String> root_vertex, source_vertex, target_vertex;
Deque<SAMSegment> deque = new ArrayDeque<SAMSegment>();
final List<TraceableVertex<String>> traceable = new ArrayList<TraceableVertex<String>>();
for (int i = 0; i < nSeq; i++) {
root_seq = seq_by_sub.get(i);
root_seqid = root_seq.qseqid();
if (placed.contains(root_seq))
continue;
final TraceableDirectedWeightedPseudograph<String> razor = new TraceableDirectedWeightedPseudograph<String>(TraceableEdge.class);
// final ListenableDirectedWeightedGraph<TraceableVertex<String>, DefaultWeightedEdge> razor =
// new ListenableDirectedWeightedGraph<TraceableVertex<String>, DefaultWeightedEdge>(DefaultWeightedEdge.class);
// JGraphModelAdapter<TraceableVertex<String>, DefaultWeightedEdge> jgAdapter =
// new JGraphModelAdapter<TraceableVertex<String>, DefaultWeightedEdge>(razor);
// JGraph jgraph = new JGraph(jgAdapter);
deque.clear();
deque.push(root_seq);
contained.clear();
while (!deque.isEmpty()) {
source_seq = deque.pop();
source_seqid = source_seq.qseqid();
if (contained.contains(source_seq))
continue;
contained.add(source_seq);
source_vertex = new TraceableVertex<String>(source_seqid);
source_vertex.setSAMSegment(source_seq);
if (!razor.containsVertex(source_vertex))
razor.addVertex(source_vertex);
outgoing = gfa.outgoingEdgesOf(source_seqid);
for (OverlapEdge out : outgoing) {
target_seqid = gfa.getEdgeTarget(out);
if (!initPlace.containsKey(target_seqid))
continue;
target_seqs = initPlace.get(target_seqid);
distance = Integer.MAX_VALUE;
target_seq = null;
for (SAMSegment seq : target_seqs) {
int d = AlignmentSegment.sdistance(source_seq, seq);
if (d < distance) {
distance = d;
target_seq = seq;
}
}
if (distance <= flank_size) {
target_vertex = new TraceableVertex<String>(target_seqid);
target_vertex.setSAMSegment(target_seq);
if (!razor.containsVertex(target_vertex))
razor.addVertex(target_vertex);
if (razor.containsEdge(source_vertex, target_vertex))
continue;
edge = razor.addEdge(source_vertex, target_vertex);
// calculate edge weight
// higher weight edges are those,
/**
**
* // 1. large/long alignment segments vertices
* // TODO: 2*. small gaps on the reference
* edge_weight = qry_seqs.get(source_seqid).seq_ln()+
* qry_seqs.get(target_seqid).seq_ln()-
* gfa.getEdge(source_seqid, target_seqid).olap();
*/
// TODO: 1*. large/long alignment segments vertices
// 2. small gaps on the reference
edge_penalty = AlignmentSegment.sdistance(source_seq, target_seq);
edge.setPenalty(edge_penalty);
edge_score = qry_seqs.get(source_seqid).seq_ln() + qry_seqs.get(target_seqid).seq_ln() - gfa.getEdge(source_seqid, target_seqid).olap();
edge.setScore(edge_score);
deque.push(target_seq);
}
}
}
if (ddebug)
myLogger.info(root_seqid + " " + razor.vertexSet().size() + " " + razor.edgeSet().size() + " done");
// JFrame frame = new JFrame();
// frame.getContentPane().add(jgraph);
// frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
// frame.pack();
// frame.setVisible(true);
// "pseudo"-DFS to find the route with the highest score
final Map<String, TraceableVertex<String>> razv_map = new HashMap<String, TraceableVertex<String>>();
for (TraceableVertex<String> v : razor.vertexSet()) razv_map.put(v.getId(), v);
// we use a bidirectional hashmap to simulate the deque
// this is because we may need to do deletions
// Deque<TraceableVertex<String>> queue = new ArrayDeque<TraceableVertex<String>>();
final TreeBidiMap<Long, TraceableVertex<String>> bidiQ = new TreeBidiMap<Long, TraceableVertex<String>>();
root_vertex = razv_map.get(root_seqid);
root_vertex.setSAMSegment(root_seq);
root_vertex.setScore(qry_seqs.get(root_seqid).seq_ln());
root_vertex.setPenalty(0);
root_vertex.setStatus(true);
bidiQ.put(0L, root_vertex);
double max_ws = Double.NEGATIVE_INFINITY, source_penalty, target_penalty, source_score, target_score, penalty, score, target_ws, source_ws, ws;
int source_ln;
Set<TraceableEdge> out_edges;
TraceableVertex<String> opt_vertex = null;
long sizeQ;
boolean isLeaf;
if (ddebug)
for (TraceableEdge e : razor.edgeSet()) myLogger.info(e.toString() + "(" + razor.getEdgeSource(e).getSAMSegment().toString() + "|" + razor.getEdgeTarget(e).getSAMSegment().toString() + "|" + e.getScore() + "-" + e.getPenalty() + ")");
while (!bidiQ.isEmpty()) {
sizeQ = bidiQ.lastKey();
source_vertex = bidiQ.get(sizeQ);
bidiQ.remove(sizeQ);
source_ln = qry_seqs.get(source_vertex.getId()).seq_ln();
source_score = source_vertex.getScore() - source_ln;
source_penalty = source_vertex.getPenalty();
source_ws = source_score - source_penalty;
isLeaf = true;
out_edges = razor.outgoingEdgesOf(source_vertex);
for (TraceableEdge out : out_edges) {
// this is not right because graph edges are immutable?
// target_vertex = razor.getEdgeTarget(out);
target_vertex = razv_map.get(razor.getEdgeTarget(out).getId());
target_score = target_vertex.getScore();
target_penalty = target_vertex.getPenalty();
target_ws = target_score - target_penalty;
edge_penalty = out.getPenalty();
penalty = source_penalty + edge_penalty;
edge_score = out.getScore();
score = source_score + edge_score;
ws = score - penalty;
if (edge_penalty > flank_size || target_vertex.getStatus() && (ws <= target_ws || isLoopback(razor, source_vertex, target_vertex)))
continue;
isLeaf = false;
target_vertex.setBackTrace(source_vertex);
target_vertex.setScore(score);
target_vertex.setPenalty(penalty);
target_vertex.setStatus(true);
bidiQ.put(sizeQ++, target_vertex);
}
if (isLeaf && source_ws > max_ws) {
penalty = source_vertex.getPenalty();
score = source_vertex.getScore();
max_ws = source_ws;
opt_vertex = source_vertex;
if (ddebug) {
String trace = opt_vertex.toString() + ":" + opt_vertex.getSAMSegment().sstart() + "-" + opt_vertex.getSAMSegment().send() + "(" + opt_vertex.getScore() + "-" + opt_vertex.getPenalty() + ")";
TraceableVertex<String> optx = opt_vertex;
while ((optx = optx.getBackTrace()) != null) {
trace += "," + optx.toString() + ":" + optx.getSAMSegment().sstart() + "-" + optx.getSAMSegment().send() + "(" + optx.getScore() + "-" + optx.getPenalty() + ")";
}
myLogger.info("trace back [" + score + ", " + penalty + "]: " + trace);
}
}
}
traceable.add(opt_vertex);
Set<TraceableVertex<String>> optx = new HashSet<TraceableVertex<String>>();
optx.add(opt_vertex);
while ((opt_vertex = opt_vertex.getBackTrace()) != null) optx.add(opt_vertex);
for (TraceableVertex<String> v : optx) placed.add(v.getSAMSegment());
}
// sort traceable by size
Collections.sort(traceable, new Comparator<TraceableVertex<String>>() {
@Override
public int compare(TraceableVertex<String> t0, TraceableVertex<String> t1) {
// TODO Auto-generated method stub
return Double.compare(t1.getScore(), t0.getScore());
}
});
if (debug) {
for (TraceableVertex<String> opt_vertex : traceable) {
double score = opt_vertex.getScore();
double penalty = opt_vertex.getPenalty();
String trace = opt_vertex.toString() + ":" + opt_vertex.getSAMSegment().sstart() + "-" + opt_vertex.getSAMSegment().send() + "(" + opt_vertex.getScore() + "-" + opt_vertex.getPenalty() + ")";
while ((opt_vertex = opt_vertex.getBackTrace()) != null) {
trace += "," + opt_vertex.toString() + ":" + opt_vertex.getSAMSegment().sstart() + "-" + opt_vertex.getSAMSegment().send() + "(" + opt_vertex.getScore() + "-" + opt_vertex.getPenalty() + ")";
}
myLogger.info("trace back [" + score + ", " + penalty + "]: " + trace);
}
}
// we generate a compound alignment record for each traceable
for (TraceableVertex<String> opt_vertex : traceable) {
}
}
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class Consensus method buildReadPairGraph.
public void buildReadPairGraph(String bam_in, String sz_in, String out, // int max_gap,
int mapQ_thres, int max_NM) {
try {
/**
* BufferedReader br_tmp = new BufferedReader(new FileReader(new File(contig_file)));
* int contig_num = 0;
* String line;
* while( (line=br_tmp.readLine())!=null )
* if(line.startsWith(">")) contig_num++;
* br_tmp.close();
*
* BufferedReader br_contig_file = new BufferedReader(new FileReader(new File(contig_file)));
* String[] contig_id = new String[contig_num];
* String[] contig_str = new String[contig_num];
* int[] contig_sz = new int[contig_num];
* int c = 0;
* line = br_contig_file.readLine();
* while( line!=null ) {
* if(line.startsWith(">")) {
* contig_id[c] = line.replace(">", "").trim();
* StringBuilder bs = new StringBuilder();
* while( (line=br_contig_file.readLine())!=null
* && !line.startsWith(">")) {
* bs.append(line.trim());
* }
* contig_str[c] = bs.toString();
* contig_sz[c] = bs.length();
* c++;
* }
* }
* br_contig_file.close();
*/
String[] bam_files = bam_in.trim().split(";");
String[] s = sz_in.trim().split(";");
final Map<String, Integer> szs = new HashMap<String, Integer>();
for (int i = 0; i != bam_files.length; i++) szs.put(bam_files[i], Integer.parseInt(s[i]));
final Map<Long, Integer> links = new HashMap<Long, Integer>();
this.initial_thread_pool();
for (String bam_file : bam_files) {
this.executor.submit(new Runnable() {
private String bam_file;
@Override
public void run() {
System.err.println("Process file ... " + bam_file);
SAMRecord tmp_record, sam_record, mate_record;
String sam_id;
final List<SAMRecord> record_pool = new ArrayList<SAMRecord>();
int sam_ref, mate_ref;
long key_ref;
final SamReader in1 = factory.open(new File(bam_file));
final List<SAMSequenceRecord> refSeq = in1.getFileHeader().getSequenceDictionary().getSequences();
final int[] refSz = new int[refSeq.size()];
for (int i = 0; i != refSz.length; i++) refSz[i] = refSeq.get(i).getSequenceLength();
SAMRecordIterator iter1 = in1.iterator();
tmp_record = iter1.next();
while (tmp_record != null) {
record_pool.clear();
record_pool.add(tmp_record);
sam_id = tmp_record.getReadName();
while ((tmp_record = iter1.hasNext() ? iter1.next() : null) != null && tmp_record.getReadName().equals(sam_id)) record_pool.add(tmp_record);
sam_record = null;
mate_record = null;
for (SAMRecord record : record_pool) {
if (record.getFirstOfPairFlag() && !record.getNotPrimaryAlignmentFlag())
sam_record = record;
if (record.getSecondOfPairFlag() && !record.getNotPrimaryAlignmentFlag())
mate_record = record;
}
if (sam_record == null || mate_record == null)
throw new RuntimeException(record_pool.get(0).getSAMString() + "!!!");
sam_ref = sam_record.getReferenceIndex();
mate_ref = mate_record.getReferenceIndex();
if (sam_record.getReadUnmappedFlag() || mate_record.getReadUnmappedFlag() || sam_ref == mate_ref || sam_record.getMappingQuality() < mapQ_thres || mate_record.getMappingQuality() < mapQ_thres || sam_record.getIntegerAttribute("NM") > max_NM || mate_record.getIntegerAttribute("NM") > max_NM)
continue;
if (sam_ref > mate_ref) {
int tmp_int = sam_ref;
sam_ref = mate_ref;
mate_ref = tmp_int;
SAMRecord tmp_rec = sam_record;
sam_record = mate_record;
mate_record = tmp_rec;
}
int infSz;
double maxSz = insz_thres * szs.get(bam_file);
key_ref = 0L;
/**
*FF*
* ---=>--- ---=>--
*/
if (!sam_record.getReadNegativeStrandFlag() && !mate_record.getReadNegativeStrandFlag()) {
infSz = refSz[sam_ref] - sam_record.getAlignmentStart() + refSz[mate_ref] - mate_record.getAlignmentStart();
if (infSz <= maxSz) {
// 1 - end
key_ref = 1;
key_ref <<= 31;
key_ref += sam_ref;
key_ref <<= 1;
// 1 - end
key_ref += 1;
key_ref <<= 31;
key_ref += mate_ref;
}
} else /**
*FR*
* ---=>--- ---<=--
*/
if (!sam_record.getReadNegativeStrandFlag() && mate_record.getReadNegativeStrandFlag()) {
infSz = refSz[sam_ref] - sam_record.getAlignmentStart() + mate_record.getAlignmentEnd();
if (infSz <= maxSz) {
// 1 - end
key_ref = 1;
key_ref <<= 31;
key_ref += sam_ref;
key_ref <<= 1;
// 0 - start
key_ref += 0;
key_ref <<= 31;
key_ref += mate_ref;
}
} else /**
*RF*
* ---<=--- ---=>--
*/
if (sam_record.getReadNegativeStrandFlag() && !mate_record.getReadNegativeStrandFlag()) {
infSz = sam_record.getAlignmentEnd() + refSz[mate_ref] - mate_record.getAlignmentStart();
if (infSz <= maxSz) {
// 0 - start
key_ref = 0;
key_ref <<= 31;
key_ref += sam_ref;
key_ref <<= 1;
// 1 - end
key_ref += 1;
key_ref <<= 31;
key_ref += mate_ref;
}
} else /**
*RR*
* ---<=--- ---<=--
*/
if (sam_record.getReadNegativeStrandFlag() && mate_record.getReadNegativeStrandFlag()) {
infSz = sam_record.getAlignmentEnd() + mate_record.getAlignmentEnd();
if (infSz <= maxSz) {
// 0 - start
key_ref = 0;
key_ref <<= 31;
key_ref += sam_ref;
key_ref <<= 1;
// 0 - start
key_ref += 0;
key_ref <<= 31;
key_ref += mate_ref;
}
}
if (key_ref == 0L)
continue;
synchronized (lock) {
if (links.containsKey(key_ref))
links.put(key_ref, links.get(key_ref) + 1);
else
links.put(key_ref, 1);
}
}
iter1.close();
try {
in1.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.err.println("Process file ... " + bam_file + " done.");
}
public Runnable init(final String bam_file) {
this.bam_file = bam_file;
return (this);
}
}.init(bam_file));
}
this.waitFor();
int mate_ref, sam_ref, sam_dir, mate_dir;
long key_ref;
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(out)));
for (Map.Entry<Long, Integer> entry : links.entrySet()) {
key_ref = entry.getKey();
mate_ref = (int) (key_ref & refMask);
key_ref >>= 31;
mate_dir = (int) (key_ref & dirMask);
key_ref >>= 1;
sam_ref = (int) (key_ref & refMask);
key_ref >>= 31;
sam_dir = (int) (key_ref & dirMask);
bw.write(sam_ref + "\t" + mate_ref + "\t" + sam_dir + "\t" + mate_dir + "\t" + entry.getValue() + "\n");
}
bw.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Aggregations