use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class Consensus method buildReadPairGraph.
public void buildReadPairGraph(String bam_in, String sz_in, String out, // int max_gap,
int mapQ_thres, int max_NM) {
try {
/**
* BufferedReader br_tmp = new BufferedReader(new FileReader(new File(contig_file)));
* int contig_num = 0;
* String line;
* while( (line=br_tmp.readLine())!=null )
* if(line.startsWith(">")) contig_num++;
* br_tmp.close();
*
* BufferedReader br_contig_file = new BufferedReader(new FileReader(new File(contig_file)));
* String[] contig_id = new String[contig_num];
* String[] contig_str = new String[contig_num];
* int[] contig_sz = new int[contig_num];
* int c = 0;
* line = br_contig_file.readLine();
* while( line!=null ) {
* if(line.startsWith(">")) {
* contig_id[c] = line.replace(">", "").trim();
* StringBuilder bs = new StringBuilder();
* while( (line=br_contig_file.readLine())!=null
* && !line.startsWith(">")) {
* bs.append(line.trim());
* }
* contig_str[c] = bs.toString();
* contig_sz[c] = bs.length();
* c++;
* }
* }
* br_contig_file.close();
*/
String[] bam_files = bam_in.trim().split(";");
String[] s = sz_in.trim().split(";");
final Map<String, Integer> szs = new HashMap<String, Integer>();
for (int i = 0; i != bam_files.length; i++) szs.put(bam_files[i], Integer.parseInt(s[i]));
final Map<Long, Integer> links = new HashMap<Long, Integer>();
this.initial_thread_pool();
for (String bam_file : bam_files) {
this.executor.submit(new Runnable() {
private String bam_file;
@Override
public void run() {
System.err.println("Process file ... " + bam_file);
SAMRecord tmp_record, sam_record, mate_record;
String sam_id;
final List<SAMRecord> record_pool = new ArrayList<SAMRecord>();
int sam_ref, mate_ref;
long key_ref;
final SamReader in1 = factory.open(new File(bam_file));
final List<SAMSequenceRecord> refSeq = in1.getFileHeader().getSequenceDictionary().getSequences();
final int[] refSz = new int[refSeq.size()];
for (int i = 0; i != refSz.length; i++) refSz[i] = refSeq.get(i).getSequenceLength();
SAMRecordIterator iter1 = in1.iterator();
tmp_record = iter1.next();
while (tmp_record != null) {
record_pool.clear();
record_pool.add(tmp_record);
sam_id = tmp_record.getReadName();
while ((tmp_record = iter1.hasNext() ? iter1.next() : null) != null && tmp_record.getReadName().equals(sam_id)) record_pool.add(tmp_record);
sam_record = null;
mate_record = null;
for (SAMRecord record : record_pool) {
if (record.getFirstOfPairFlag() && !record.getNotPrimaryAlignmentFlag())
sam_record = record;
if (record.getSecondOfPairFlag() && !record.getNotPrimaryAlignmentFlag())
mate_record = record;
}
if (sam_record == null || mate_record == null)
throw new RuntimeException(record_pool.get(0).getSAMString() + "!!!");
sam_ref = sam_record.getReferenceIndex();
mate_ref = mate_record.getReferenceIndex();
if (sam_record.getReadUnmappedFlag() || mate_record.getReadUnmappedFlag() || sam_ref == mate_ref || sam_record.getMappingQuality() < mapQ_thres || mate_record.getMappingQuality() < mapQ_thres || sam_record.getIntegerAttribute("NM") > max_NM || mate_record.getIntegerAttribute("NM") > max_NM)
continue;
if (sam_ref > mate_ref) {
int tmp_int = sam_ref;
sam_ref = mate_ref;
mate_ref = tmp_int;
SAMRecord tmp_rec = sam_record;
sam_record = mate_record;
mate_record = tmp_rec;
}
int infSz;
double maxSz = insz_thres * szs.get(bam_file);
key_ref = 0L;
/**
*FF*
* ---=>--- ---=>--
*/
if (!sam_record.getReadNegativeStrandFlag() && !mate_record.getReadNegativeStrandFlag()) {
infSz = refSz[sam_ref] - sam_record.getAlignmentStart() + refSz[mate_ref] - mate_record.getAlignmentStart();
if (infSz <= maxSz) {
// 1 - end
key_ref = 1;
key_ref <<= 31;
key_ref += sam_ref;
key_ref <<= 1;
// 1 - end
key_ref += 1;
key_ref <<= 31;
key_ref += mate_ref;
}
} else /**
*FR*
* ---=>--- ---<=--
*/
if (!sam_record.getReadNegativeStrandFlag() && mate_record.getReadNegativeStrandFlag()) {
infSz = refSz[sam_ref] - sam_record.getAlignmentStart() + mate_record.getAlignmentEnd();
if (infSz <= maxSz) {
// 1 - end
key_ref = 1;
key_ref <<= 31;
key_ref += sam_ref;
key_ref <<= 1;
// 0 - start
key_ref += 0;
key_ref <<= 31;
key_ref += mate_ref;
}
} else /**
*RF*
* ---<=--- ---=>--
*/
if (sam_record.getReadNegativeStrandFlag() && !mate_record.getReadNegativeStrandFlag()) {
infSz = sam_record.getAlignmentEnd() + refSz[mate_ref] - mate_record.getAlignmentStart();
if (infSz <= maxSz) {
// 0 - start
key_ref = 0;
key_ref <<= 31;
key_ref += sam_ref;
key_ref <<= 1;
// 1 - end
key_ref += 1;
key_ref <<= 31;
key_ref += mate_ref;
}
} else /**
*RR*
* ---<=--- ---<=--
*/
if (sam_record.getReadNegativeStrandFlag() && mate_record.getReadNegativeStrandFlag()) {
infSz = sam_record.getAlignmentEnd() + mate_record.getAlignmentEnd();
if (infSz <= maxSz) {
// 0 - start
key_ref = 0;
key_ref <<= 31;
key_ref += sam_ref;
key_ref <<= 1;
// 0 - start
key_ref += 0;
key_ref <<= 31;
key_ref += mate_ref;
}
}
if (key_ref == 0L)
continue;
synchronized (lock) {
if (links.containsKey(key_ref))
links.put(key_ref, links.get(key_ref) + 1);
else
links.put(key_ref, 1);
}
}
iter1.close();
try {
in1.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.err.println("Process file ... " + bam_file + " done.");
}
public Runnable init(final String bam_file) {
this.bam_file = bam_file;
return (this);
}
}.init(bam_file));
}
this.waitFor();
int mate_ref, sam_ref, sam_dir, mate_dir;
long key_ref;
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(out)));
for (Map.Entry<Long, Integer> entry : links.entrySet()) {
key_ref = entry.getKey();
mate_ref = (int) (key_ref & refMask);
key_ref >>= 31;
mate_dir = (int) (key_ref & dirMask);
key_ref >>= 1;
sam_ref = (int) (key_ref & refMask);
key_ref >>= 31;
sam_dir = (int) (key_ref & dirMask);
bw.write(sam_ref + "\t" + mate_ref + "\t" + sam_dir + "\t" + mate_dir + "\t" + entry.getValue() + "\n");
}
bw.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class BAMstats method run.
@Override
public void run() {
// TODO Auto-generated method stub
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
try {
final SamReader in1 = factory.open(new File(bam_in1));
final SamReader in2 = factory.open(new File(bam_in2));
SAMRecordIterator iter1 = in1.iterator();
SAMRecordIterator iter2 = in2.iterator();
SAMRecord tmp_record1 = iter1.next(), tmp_record2 = iter2.next();
final SAMRecord[] sam_record1 = new SAMRecord[2], sam_record2 = new SAMRecord[2];
final float[] sam_as1 = new float[2], sam_as2 = new float[2];
final boolean[] sam_mapped1 = new boolean[2], sam_mapped2 = new boolean[2];
String record_id1 = tmp_record1.getReadName(), record_id2 = tmp_record2.getReadName();
int i_sz;
float i_as;
boolean b1, b2;
while (true) {
if (record_counter % 4000000 == 0)
myLogger.info(record_counter + " processed.");
while (record_id1 != null && record_id2 != null && !record_id1.equals(record_id2)) {
while (record_id1 != null && record_id1.compareTo(record_id2) < 0) {
tmp_record1 = bufferRecord(iter1, tmp_record1, sam_record1, sam_as1, sam_mapped1, record_id1);
processBuffer(sam_record1, sam_as1, sam_mapped1, 0);
record_id1 = tmp_record1 == null ? null : tmp_record1.getReadName();
}
if (record_id1 == null)
break;
while (record_id2 != null && record_id2.compareTo(record_id1) < 0) {
tmp_record2 = bufferRecord(iter2, tmp_record2, sam_record2, sam_as2, sam_mapped2, record_id2);
processBuffer(sam_record2, sam_as2, sam_mapped2, 1);
record_id2 = tmp_record2 == null ? null : tmp_record2.getReadName();
}
}
if (record_id1 == null || record_id2 == null)
break;
tmp_record1 = bufferRecord(iter1, tmp_record1, sam_record1, sam_as1, sam_mapped1, record_id1);
tmp_record2 = bufferRecord(iter2, tmp_record2, sam_record2, sam_as2, sam_mapped2, record_id2);
record_id1 = tmp_record1 == null ? null : tmp_record1.getReadName();
record_id2 = tmp_record2 == null ? null : tmp_record2.getReadName();
if ((sam_record2[0] == null || sam_record2[1] == null) && (sam_record1[0] == null || sam_record1[1] == null))
continue;
if ((sam_record1[0] == null || sam_record1[1] == null) && sam_record2[0] != null && sam_record2[1] != null) {
processBuffer(sam_record2, sam_as2, sam_mapped2, 1);
continue;
}
if ((sam_record2[0] == null || sam_record2[1] == null) && sam_record1[0] != null && sam_record1[1] != null) {
processBuffer(sam_record1, sam_as1, sam_mapped1, 0);
continue;
}
if (sam_record1[0].getDuplicateReadFlag() || sam_record1[1].getDuplicateReadFlag() || sam_record2[0].getDuplicateReadFlag() || sam_record2[1].getDuplicateReadFlag() || sam_record1[0].getReadString().replaceAll("N+$", "").replaceAll("^N+", "").length() < min_seqLen || sam_record1[1].getReadString().replaceAll("N+$", "").replaceAll("^N+", "").length() < min_seqLen || sam_record2[0].getReadString().replaceAll("N+$", "").replaceAll("^N+", "").length() < min_seqLen || sam_record2[1].getReadString().replaceAll("N+$", "").replaceAll("^N+", "").length() < min_seqLen)
// we need both reads longer than 36
continue;
record_counter += 2;
if (sam_mapped1[0])
record_mapped_asInf[0]++;
if (sam_mapped1[1])
record_mapped_asInf[0]++;
if (sam_mapped2[0])
record_mapped_asInf[1]++;
if (sam_mapped2[1])
record_mapped_asInf[1]++;
if (sam_mapped1[0] && sam_mapped2[0])
record_mapped_asInf[2]++;
if (sam_mapped1[1] && sam_mapped2[1])
record_mapped_asInf[2]++;
if (b1 = (sam_mapped1[0] && sam_mapped1[1] && !sam_record1[0].getReferenceName().equals("Chr00") && !sam_record1[1].getReferenceName().equals("Chr00"))) {
i_sz = Math.abs(sam_record1[0].getInferredInsertSize());
if (!insert_size_c1.containsKey(i_sz))
insert_size_c1.put(i_sz, 1);
else
insert_size_c1.put(i_sz, insert_size_c1.get(i_sz) + 1);
}
if (b2 = (sam_mapped2[0] && sam_mapped2[1] && !sam_record2[0].getReferenceName().equals("Chr00") && !sam_record2[1].getReferenceName().equals("Chr00"))) {
i_sz = Math.abs(sam_record2[0].getInferredInsertSize());
if (!insert_size_c2.containsKey(i_sz))
insert_size_c2.put(i_sz, 1);
else
insert_size_c2.put(i_sz, insert_size_c2.get(i_sz) + 1);
}
if (!(b1 & b2))
continue;
i_sz = Math.abs(sam_record1[0].getInferredInsertSize()) - Math.abs(sam_record2[0].getInferredInsertSize());
if (!insert_size_cdiff.containsKey(i_sz))
insert_size_cdiff.put(i_sz, 1);
else
insert_size_cdiff.put(i_sz, insert_size_cdiff.get(i_sz) + 1);
// alignment score for pairs always the same
i_as = sam_as1[0] - sam_as2[0];
if (!alignment_score_cdiff.containsKey(i_as))
alignment_score_cdiff.put(i_as, 1);
else
alignment_score_cdiff.put(i_as, alignment_score_cdiff.get(i_as) + 1);
int i = i_as > 0 ? 0 : (i_as < 0 ? 1 : 2);
if (sam_mapped1[0] && !sam_mapped2[0])
record_mapped_as0[0]++;
if (!sam_mapped1[0] && sam_mapped2[0])
record_mapped_as0[1]++;
if (sam_mapped1[0] && sam_mapped2[0])
record_mapped_as0[i]++;
if (sam_mapped1[1] && !sam_mapped2[1])
record_mapped_as0[0]++;
if (!sam_mapped1[1] && sam_mapped2[1])
record_mapped_as0[1]++;
if (sam_mapped1[1] && sam_mapped2[1])
record_mapped_as0[i]++;
}
if (record_id1 == null) {
while (record_id2 != null) {
tmp_record2 = bufferRecord(iter2, tmp_record2, sam_record2, sam_as2, sam_mapped2, record_id2);
processBuffer(sam_record2, sam_as2, sam_mapped2, 1);
record_id2 = tmp_record2 == null ? null : tmp_record2.getReadName();
}
}
if (record_id2 == null) {
while (record_id1 != null) {
tmp_record1 = bufferRecord(iter1, tmp_record1, sam_record1, sam_as1, sam_mapped1, record_id1);
processBuffer(sam_record1, sam_as1, sam_mapped1, 0);
record_id1 = tmp_record1 == null ? null : tmp_record1.getReadName();
}
}
iter1.close();
iter2.close();
in1.close();
in2.close();
BufferedWriter bw = new BufferedWriter(new FileWriter(out));
bw.write("###record_total\n" + record_counter + "\n");
bw.write("###record_mapped ( 0 )\n" + record_mapped_as0[0] + ", " + record_mapped_as0[1] + ", " + record_mapped_as0[2] + "\n");
bw.write("###record_mapped (Inf)\n" + record_mapped_asInf[0] + ", " + record_mapped_asInf[1] + ", " + record_mapped_asInf[2] + "\n");
bw.write("###alignment_score_c1\n");
for (float i : alignment_score_c1.keySet()) bw.write(i + "\t" + alignment_score_c1.get(i) + "\n");
bw.write("###alignment_score_c2\n");
for (float i : alignment_score_c2.keySet()) bw.write(i + "\t" + alignment_score_c2.get(i) + "\n");
bw.write("###alignment_score_cdiff\n");
for (float i : alignment_score_cdiff.keySet()) bw.write(i + "\t" + alignment_score_cdiff.get(i) + "\n");
bw.write("###insert_size_c1\n");
for (int i : insert_size_c1.keySet()) bw.write(i + "\t" + insert_size_c1.get(i) + "\n");
bw.write("###insert_size_c2\n");
for (int i : insert_size_c2.keySet()) bw.write(i + "\t" + insert_size_c2.get(i) + "\n");
bw.write("###insert_size_cdiff\n");
for (int i : insert_size_cdiff.keySet()) bw.write(i + "\t" + insert_size_cdiff.get(i) + "\n");
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class TenXMoleculeStats method runBCSortedStats.
public void runBCSortedStats() {
// TODO Auto-generated method stub
final SamReader inputSam = factory.open(new File(this.in_bam));
final SAMSequenceDictionary seqDic = inputSam.getFileHeader().getSequenceDictionary();
mappedReadsOnChromosome = new long[seqDic.size()];
/**
* try {
* oos = Utils.getGZIPBufferedWriter(out_stats);
* } catch (IOException e1) {
* // TODO Auto-generated catch block
* e1.printStackTrace();
* }
*/
oos = Utils.getBufferedWriter(out_stats + ".txt");
oob = new SAMFileWriterFactory().makeSAMOrBAMWriter(inputSam.getFileHeader(), true, new File(out_stats + ".bam"));
SAMRecordIterator iter = inputSam.iterator();
SAMRecord record = iter.next();
String bc_str;
this.initial_thread_pool();
while (record != null) {
// if(mappedReads%1000000==0) myLogger.info(mappedReads+" mapped records processed.");
List<SAMRecord> bc_records = new ArrayList<SAMRecord>();
bc_str = record.getStringAttribute("BX");
this.processRecord(bc_records, record);
while (bc_str == null) {
record = iter.hasNext() ? iter.next() : null;
if (record == null)
break;
this.processRecord(bc_records, record);
bc_str = record.getStringAttribute("BX");
}
if (bc_str == null)
continue;
while (true) {
record = iter.hasNext() ? iter.next() : null;
if (record == null)
break;
if (bc_str.equals(record.getStringAttribute("BX"))) {
this.processRecord(bc_records, record);
} else
break;
}
if (bc_records.isEmpty())
continue;
readsOnBarcode.put(bc_records.get(0).getStringAttribute("BX"), bc_records.size());
executor.submit(new MoleculeConstructor(bc_records));
}
try {
iter.close();
inputSam.close();
this.waitFor();
oos.close();
oob.close();
BufferedWriter bw_summary = Utils.getBufferedWriter(this.out_stats + ".summary");
bw_summary.write("##Reads mapped: " + mappedReads + "\n");
bw_summary.write("##Reads unmapped: " + unmappedReads + "\n");
bw_summary.write("##Reads duplicated: " + duplicatedReads + "\n");
bw_summary.write("##Reads bad pairs: " + badpaired + "\n");
bw_summary.write("##Reads by pseudo-chromosomes:\n");
for (int i = 0; i < mappedReadsOnChromosome.length; i++) {
bw_summary.write(" #" + seqDic.getSequence(i).getSequenceName() + " " + mappedReadsOnChromosome[i] + "\n");
}
bw_summary.write("##Reads by barcodes:\n");
for (Map.Entry<String, Integer> entry : readsOnBarcode.entrySet()) {
bw_summary.write(" #" + entry.getKey() + " " + entry.getValue() + "\n");
}
bw_summary.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
myLogger.info("Completed. " + counter + " bam records processed.");
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class TenXSamtools method runSort.
private void runSort() {
// TODO Auto-generated method stub
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(new File(this.bam_in));
final SAMFileHeader sort_header = inputSam.getFileHeader();
switch(this.sort_order) {
case coordinate:
sort_header.setSortOrder(SortOrder.coordinate);
break;
case queryname:
sort_header.setSortOrder(SortOrder.queryname);
break;
case barcode:
sort_header.setSortOrder(SortOrder.unknown);
break;
}
SAMRecordIterator iter = inputSam.iterator();
long record_inCount = 0;
SAMRecord[] buff = new SAMRecord[this.batch_size];
int k = 0;
SAMRecord temp = iter.hasNext() ? iter.next() : null;
this.initial_thread_pool();
while (temp != null) {
buff[k++] = temp;
record_inCount++;
temp = iter.hasNext() ? iter.next() : null;
if (k == this.batch_size || temp == null) {
executor.submit(new Runnable() {
private SAMRecord[] records;
@Override
public void run() {
// TODO Auto-generated method stub
try {
Arrays.sort(records, comprator);
final SAMFileWriter outputSam;
synchronized (lock) {
outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(bam_out + String.format("%08d", batch++)));
}
int count = 0;
for (SAMRecord record : records) {
if (record != null) {
count++;
outputSam.addAlignment(record);
}
}
outputSam.close();
synchronized (lock) {
record_count += count;
}
myLogger.info("[" + Thread.currentThread().getName() + "] " + record_count + " records processed.");
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
public Runnable init(SAMRecord[] buff) {
// TODO Auto-generated method stub
this.records = buff;
return (this);
}
}.init(buff));
k = 0;
buff = new SAMRecord[this.batch_size];
}
}
iter.close();
try {
inputSam.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
myLogger.info(record_inCount + " records read from " + this.bam_in);
this.waitFor();
// merge all batches
myLogger.info("Merge " + batch + " files.");
final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(this.bam_out));
final SamReader[] batchSam = new SamReader[batch];
final SAMRecordIterator[] iterSam = new SAMRecordIterator[batch];
final boolean[] reachFileEnd = new boolean[batch];
final TreeMap<SAMRecord, Integer> treeMap = new TreeMap<SAMRecord, Integer>(this.comprator);
for (int i = 0; i != batch; i++) {
batchSam[i] = factory.open(new File(this.bam_out + String.format("%08d", i)));
iterSam[i] = batchSam[i].iterator();
if (iterSam[i].hasNext())
treeMap.put(iterSam[i].next(), i);
reachFileEnd[i] = !iterSam[i].hasNext();
}
Entry<SAMRecord, Integer> firstEntry;
int bch, nReachFileEnd = 0;
for (boolean b : reachFileEnd) if (b)
nReachFileEnd++;
long record_outCount = 0;
while (!treeMap.isEmpty()) {
firstEntry = treeMap.pollFirstEntry();
outputSam.addAlignment(firstEntry.getKey());
record_outCount++;
bch = firstEntry.getValue();
if (!reachFileEnd[bch]) {
treeMap.put(iterSam[bch].next(), bch);
if (!iterSam[bch].hasNext()) {
reachFileEnd[bch] = true;
nReachFileEnd++;
}
}
if (treeMap.isEmpty() && nReachFileEnd != batch) {
for (int i = 0; i != batch; i++) {
if (!reachFileEnd[i]) {
treeMap.put(iterSam[i].next(), i);
if (!iterSam[i].hasNext()) {
reachFileEnd[i] = true;
nReachFileEnd++;
}
}
}
}
}
try {
outputSam.close();
for (int i = 0; i != batch; i++) {
iterSam[i].close();
batchSam[i].close();
new File(this.bam_out + String.format("%08d", i)).delete();
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
myLogger.info(record_outCount + " records written to " + this.bam_out);
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class AssemblyGraph method validateAssemblyGraph.
private static void validateAssemblyGraph(String query_file, String bam_in) {
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
qry_seqs = Sequence.parseFastaFileWithRevCmpAsMap(query_file);
final SamReader in1 = factory.open(new File(bam_in));
SAMRecordIterator iter1 = in1.iterator();
final List<SAMRecord> buffered_records = new ArrayList<SAMRecord>();
SAMRecord tmp_record1 = iter1.hasNext() ? iter1.next() : null;
String record_id = tmp_record1.getReadName();
SAMRecord r1, r2;
int i1, i2;
StringBuilder oos = new StringBuilder();
String source, sink;
final BFSShortestPath<Integer, DefaultWeightedEdge> bfs = new BFSShortestPath<Integer, DefaultWeightedEdge>(assembly_graph);
while (tmp_record1 != null) {
buffered_records.clear();
buffered_records.add(tmp_record1);
while ((tmp_record1 = iter1.hasNext() ? iter1.next() : null) != null && tmp_record1.getReadName().equals(record_id)) {
buffered_records.add(tmp_record1);
}
if (buffered_records.size() == 1) {
if (tmp_record1 != null)
record_id = tmp_record1.getReadName();
continue;
}
// sort records by head clipping size
buffered_records.sort(new Comparator<SAMRecord>() {
@Override
public int compare(SAMRecord r0, SAMRecord r1) {
// TODO Auto-generated method stub
return Integer.compare(head_clip(r0), head_clip(r1));
}
private int head_clip(SAMRecord r) {
CigarElement cigar = r.getReadNegativeStrandFlag() ? r.getCigar().getLastCigarElement() : r.getCigar().getFirstCigarElement();
return cigar.getOperator().isClipping() ? cigar.getLength() : 0;
}
});
// calculate distance for adjacent alignment record pairs
r1 = buffered_records.get(0);
// for writing
oos.setLength(0);
oos.append(record_id);
oos.append("[");
oos.append(buffered_records.size());
oos.append(", ");
oos.append(readLength(r1));
// find reference index
source = r1.getReferenceName() + (r1.getReadNegativeStrandFlag() ? "'" : "");
i1 = contig_coordinate.getKey(source);
oos.append(", ");
oos.append(source);
oos.append("] ");
for (int i = 1; i < buffered_records.size(); i++) {
r2 = buffered_records.get(i);
sink = r2.getReferenceName() + (r2.getReadNegativeStrandFlag() ? "'" : "");
i2 = contig_coordinate.getKey(sink);
oos.append(", (");
GraphPath<Integer, DefaultWeightedEdge> e = bfs.getPath(i1, i2);
oos.append(sink);
oos.append(",");
oos.append(e == null ? "Inf" : e.getLength());
oos.append(",");
oos.append(pathLength(e));
oos.append(")");
i1 = i2;
}
oos.append("\n");
System.out.println(oos.toString());
if (tmp_record1 != null)
record_id = tmp_record1.getReadName();
}
try {
iter1.close();
in1.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Aggregations