use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class SamFileFilter method run.
@Override
public void run() {
// TODO Auto-generated method stub
File folder = new File(input_dir);
File[] listOfFiles = folder.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith(".bam");
}
});
int bam_n = listOfFiles.length;
this.initial_thread_pool();
for (int i = 0; i < bam_n; i++) {
executor.submit(new Runnable() {
private String bam_file;
@Override
public void run() {
// TODO Auto-generated method stub
try {
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(new File(input_dir + "/" + bam_file));
final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(inputSam.getFileHeader(), true, new File(output_dir + "/" + bam_file));
final BufferedReader filter_br = Utils.getBufferedReader(filter_file);
SAMRecordIterator iter = inputSam.iterator();
String line = filter_br.readLine();
if (line == null) {
filter_br.close();
iter.close();
inputSam.close();
outputSam.close();
return;
}
String[] s = line.split("\\s+");
String chr = s[0];
long end_pos = Long.parseLong(s[1]);
SAMRecord samr;
while (iter.hasNext()) {
samr = iter.next();
if (samr.getReferenceName().equals(chr) && samr.getAlignmentEnd() < end_pos) {
outputSam.addAlignment(samr);
continue;
}
if (!samr.getReferenceName().equals(chr) || samr.getAlignmentStart() > end_pos) {
line = null;
while ((line = filter_br.readLine()) != null && end_pos < samr.getAlignmentStart()) {
s = line.split("\\s");
chr = s[0];
end_pos = Long.parseLong(s[1]);
}
if (line != null) {
if (samr.getReferenceName().equals(chr) && samr.getAlignmentEnd() < end_pos)
outputSam.addAlignment(samr);
} else {
outputSam.addAlignment(samr);
while (iter.hasNext()) outputSam.addAlignment(iter.next());
break;
}
}
}
filter_br.close();
iter.close();
inputSam.close();
outputSam.close();
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
public Runnable init(String bam_file) {
// TODO Auto-generated method stub
this.bam_file = bam_file;
return this;
}
}.init(listOfFiles[i].getName()));
throw new RuntimeException("incorrect tools!!!!");
}
this.waitFor();
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class Consensus method parseDataLibrary.
private void parseDataLibrary(String[] args) {
// TODO Auto-generated method stub
final Map<Integer, String> bamLib = new HashMap<Integer, String>();
final Map<Integer, Integer> insLib = new HashMap<Integer, Integer>();
final Map<Integer, Integer> wLib = new HashMap<Integer, Integer>();
for (int i = 0; i < args.length; i++) {
String arg = args[i];
if (arg.matches(bam_lib)) {
Matcher m = bam_pat.matcher(arg);
m.find();
int lib = Integer.parseInt(m.group(1));
bamLib.put(lib, args[++i]);
}
if (arg.matches(ins_lib)) {
Matcher m = ins_pat.matcher(arg);
m.find();
int lib = Integer.parseInt(m.group(1));
insLib.put(lib, Integer.parseInt(args[++i]));
}
if (arg.matches(w_lib)) {
Matcher m = w_pat.matcher(arg);
m.find();
int lib = Integer.parseInt(m.group(1));
wLib.put(lib, Integer.parseInt(args[++i]));
}
}
int n = bamLib.size();
if (wLib.size() != n) {
printUsage();
throw new IllegalArgumentException("Library parameters do not match!!!");
}
this.bam_list = new String[n];
this.ins_thres = new int[n];
this.link_w = new double[n];
this.read_paired = new boolean[n];
int i = 0;
for (Integer key : bamLib.keySet()) {
bam_list[i] = bamLib.get(key);
final SamReader in1 = factory.open(new File(bam_list[i]));
SAMRecordIterator iter1 = in1.iterator();
SAMRecord record;
while (iter1.hasNext()) {
record = iter1.next();
if (!record.getReadUnmappedFlag()) {
if (record.getReadPairedFlag())
read_paired[i] = true;
break;
}
}
try {
iter1.close();
in1.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (!insLib.containsKey(key) && read_paired[i] || !wLib.containsKey(key)) {
printUsage();
throw new IllegalArgumentException("Library parameters do not match!!!");
}
ins_thres[i] = read_paired[i] ? insLib.get(key) : Integer.MAX_VALUE;
link_w[i] = 1.0 / wLib.get(key);
i++;
}
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class SamToTaxa method distributeSortedBam.
private void distributeSortedBam() {
// TODO Auto-generated method stub
long start = System.currentTimeMillis();
try {
File indexFile = new File(myIndexFileName);
myLogger.info("Using the following index file:");
myLogger.info(indexFile.getAbsolutePath());
File samFile = new File(mySamFileName);
myLogger.info("Using the following BAM file:");
myLogger.info(samFile.getAbsolutePath());
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(samFile);
header_sam = inputSam.getFileHeader();
header_sam.setSortOrder(SortOrder.unsorted);
indexReader = Utils.getBufferedReader(indexFile, 65536);
String line = indexReader.readLine();
String[] samples = line.split("\\s+");
for (int i = 1; i < samples.length; i++) {
String taxa = samples[i];
SAMFileHeader header = header_sam.clone();
SAMReadGroupRecord rg = new SAMReadGroupRecord(taxa);
rg.setSample(taxa);
header.addReadGroup(rg);
bam_writers.put(taxa, new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, new File(myOutputDir + System.getProperty("file.separator") + taxa + ".bam")));
locks.put(taxa, new Object());
}
SAMRecordIterator iter = inputSam.iterator();
cache();
this.initial_thread_pool();
final int block = 10000;
int bS = 0;
SAMRecord[] Qs = new SAMRecord[block];
SAMRecord temp = iter.next();
int k = 0;
allReads = 0;
long tag;
while (temp != null) {
allReads++;
Qs[k] = temp;
temp = iter.hasNext() ? iter.next() : null;
k++;
tag = temp == null ? 0 : Long.parseLong(temp.getReadName());
if (k == block || temp == null || tag >= cursor) {
executor.submit(new Runnable() {
private SAMRecord[] sam;
private int block_i;
@Override
public void run() {
// TODO Auto-generated method stub
long tag;
Tag tagObj;
String taxa;
for (int i = 0; i < sam.length; i++) {
try {
if (sam[i] == null)
break;
tag = Long.parseLong(sam[i].getReadName());
tagObj = indexMap.get(tag);
int l = tagObj.taxa.length;
long cov = 0;
for (int t = 0; t < l; t++) cov += tagObj.count[t];
if (cov > maxCov) {
myLogger.info("?tag id, " + tag + "::tag sequence, " + sam[i].getReadString() + "::aligned to " + sam[i].getReferenceName() + ":" + sam[i].getAlignmentStart() + "-" + sam[i].getAlignmentEnd() + "::omitted due to abnormal high coverage, " + cov);
continue;
}
for (int t = 0; t < l; t++) {
taxa = tagObj.taxa[t];
sam[i].setAttribute("RG", taxa);
int p = tagObj.count[t];
synchronized (locks.get(taxa)) {
for (int r = 0; r < p; r++) bam_writers.get(taxa).addAlignment(sam[i]);
}
}
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
if (block * (block_i + 1) % 1000000 == 0)
myLogger.info("Tag processed: " + block * (block_i + 1));
}
public Runnable init(SAMRecord[] sam, int bS) {
this.sam = sam;
this.block_i = bS;
return (this);
}
}.init(Qs, bS));
k = 0;
Qs = new SAMRecord[block];
bS++;
if (temp == null || tag >= cursor) {
this.waitFor();
if (temp != null) {
cache();
this.initial_thread_pool();
}
}
}
}
iter.close();
inputSam.close();
indexReader.close();
// executor.awaitTermination(365, TimeUnit.DAYS);
for (String key : bam_writers.keySet()) bam_writers.get(key).close();
myLogger.info("Total number of reads in lane=" + allReads);
myLogger.info("Process took " + (System.currentTimeMillis() - start) / 1000 + " seconds.");
} catch (Exception e) {
e.printStackTrace();
}
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class GenomeComparison method run.
@Override
public void run() {
// TODO Auto-generated method stub
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam1 = factory.open(new File(in_bam1));
SAMRecordIterator iter1 = inputSam1.iterator();
final SamReader inputSam2 = factory.open(new File(in_bam2));
SAMRecordIterator iter2 = inputSam2.iterator();
this.initial_thread_pool();
try {
bw_con = new BufferedWriter(new FileWriter(out_f + ".txt"));
bw_1 = new BufferedWriter(new FileWriter(out_f + ".1txt"));
bw_2 = new BufferedWriter(new FileWriter(out_f + ".2txt"));
SAMRecord record1 = iter1.next(), record2 = iter2.next();
List<SAMRecord> barcoded_records1 = new ArrayList<SAMRecord>(), barcoded_records2 = new ArrayList<SAMRecord>();
barcoded_records1.add(record1);
barcoded_records2.add(record2);
String barcode1 = record1.getStringAttribute("BX"), barcode2 = record2.getStringAttribute("BX");
jobs: while (true) {
if (barcode1.compareTo(barcode2) < 0) {
// output barcode 1 up to barcode 1
while (iter1.hasNext()) {
while ((record1 = iter1.next()).getStringAttribute("BX").equals(barcode1)) {
barcoded_records1.add(record1);
if (!iter1.hasNext())
break;
}
// TODO barcoded records 1 output
Molecule[] mols = extractMoleculeFromList(barcoded_records1);
for (int i = 0; i != mols.length; i++) bw_1.write(mols[i].chr_id + ":" + mols[i].chr_start + "-" + mols[i].chr_end + "\n");
barcoded_records1.clear();
if (iter1.hasNext()) {
barcoded_records1.add(record1);
barcode1 = record1.getStringAttribute("BX");
} else
break jobs;
if (barcode1.compareTo(barcode2) >= 0)
break;
}
if (!barcode1.equals(barcode2))
continue;
} else if (barcode1.compareTo(barcode2) > 0) {
// output barcode 2 up to barcode 2
while (iter2.hasNext()) {
while ((record2 = iter2.next()).getStringAttribute("BX").equals(barcode2)) {
barcoded_records2.add(record2);
if (!iter2.hasNext())
break;
}
// TODO barcoded records 2 output
Molecule[] mols = extractMoleculeFromList(barcoded_records2);
for (int i = 0; i != mols.length; i++) bw_2.write(mols[i].chr_id + ":" + mols[i].chr_start + "-" + mols[i].chr_end + "\n");
barcoded_records2.clear();
if (iter2.hasNext()) {
barcoded_records2.add(record2);
barcode2 = record2.getStringAttribute("BX");
} else
break jobs;
if (barcode2.compareTo(barcode1) >= 0)
break;
}
if (!barcode1.equals(barcode2))
continue;
}
while ((record1 = iter1.next()).getStringAttribute("BX").equals(barcode1)) {
barcoded_records1.add(record1);
if (!iter1.hasNext())
break;
}
while ((record2 = iter2.next()).getStringAttribute("BX").equals(barcode2)) {
barcoded_records2.add(record2);
if (!iter2.hasNext())
break;
}
executor.submit(new Runnable() {
List<SAMRecord> list1;
List<SAMRecord> list2;
@Override
public void run() {
// TODO Auto-generated method stub
try {
Molecule[] mols1 = extractMoleculeFromList(list1);
Molecule[] mols2 = extractMoleculeFromList(list2);
int n1 = mols1.length, n2 = mols2.length;
double insec;
Set<Integer> m1 = new HashSet<Integer>();
Set<Integer> m2 = new HashSet<Integer>();
for (int i = 0; i != n1; i++) {
for (int j = 0; j != n2; j++) {
if ((insec = intersect(mols1[i], mols2[j])) >= overlap_frac) {
bw_con.write(mols1[i].chr_id + ":" + mols1[i].chr_start + "-" + mols1[i].chr_end + "\t" + mols2[j].chr_id + ":" + mols2[j].chr_start + "-" + mols2[j].chr_end + "\t" + insec + "\n");
m1.add(i);
m2.add(j);
}
}
}
for (int i = 0; i != n1; i++) {
if (!m1.contains(i))
bw_1.write(mols1[i].chr_id + ":" + mols1[i].chr_start + "-" + mols1[i].chr_end + "\n");
}
for (int i = 0; i != n2; i++) {
if (!m2.contains(i))
bw_2.write(mols2[i].chr_id + ":" + mols2[i].chr_start + "-" + mols2[i].chr_end + "\n");
}
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
public Runnable init(List<SAMRecord> list1, List<SAMRecord> list2) {
// TODO Auto-generated method stub
this.list1 = list1;
this.list2 = list2;
return this;
}
}.init(new ArrayList<SAMRecord>(barcoded_records1), new ArrayList<SAMRecord>(barcoded_records2)));
if (!iter1.hasNext() || !iter2.hasNext())
break jobs;
barcoded_records1.clear();
barcoded_records2.clear();
barcoded_records1.add(record1);
barcoded_records2.add(record2);
barcode1 = record1.getStringAttribute("BX");
barcode2 = record2.getStringAttribute("BX");
}
if (!iter1.hasNext()) {
// TODO output remaining iter2
while (iter2.hasNext()) {
while ((record2 = iter2.next()).getStringAttribute("BX").equals(barcode2)) {
barcoded_records2.add(record2);
if (!iter2.hasNext())
break;
}
Molecule[] mols = extractMoleculeFromList(barcoded_records2);
for (int i = 0; i != mols.length; i++) bw_2.write(mols[i].chr_id + ":" + mols[i].chr_start + "-" + mols[i].chr_end + "\n");
barcoded_records2.clear();
if (iter2.hasNext()) {
barcoded_records2.add(record2);
barcode2 = record2.getStringAttribute("BX");
}
}
}
if (!iter2.hasNext()) {
// TODO output remaining iter1
while (iter1.hasNext()) {
while ((record1 = iter1.next()).getStringAttribute("BX").equals(barcode1)) {
barcoded_records1.add(record1);
if (!iter1.hasNext())
break;
}
Molecule[] mols = extractMoleculeFromList(barcoded_records1);
for (int i = 0; i != mols.length; i++) bw_1.write(mols[i].chr_id + ":" + mols[i].chr_start + "-" + mols[i].chr_end + "\n");
barcoded_records1.clear();
if (iter1.hasNext()) {
barcoded_records1.add(record1);
barcode1 = record1.getStringAttribute("BX");
}
}
}
this.waitFor();
inputSam1.close();
inputSam2.close();
bw_con.close();
bw_1.close();
bw_2.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class TenXMoleculeStats method runSNPCaller.
private void runSNPCaller() {
// TODO Auto-generated method stub
final SamReader inputSam = factory.open(new File(this.in_bam));
if (inputSam.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate)
throw new RuntimeException("Sort BAM file before SNP calling!!!");
final SAMSequenceDictionary seqDic = inputSam.getFileHeader().getSequenceDictionary();
mappedReadsOnChromosome = new long[seqDic.size()];
oos = Utils.getBufferedWriter(out_stats + ".txt");
hap = Utils.getBufferedWriter(out_stats + ".haps");
vcf = Utils.getBufferedWriter(out_stats + ".snps");
SAMFileHeader header = inputSam.getFileHeader();
header.setSortOrder(SortOrder.unknown);
oob = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, new File(out_stats + ".bam"));
try {
inputSam.close();
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
this.initial_thread_pool();
for (int i = 0; i < seqDic.size(); i++) {
executor.submit(new Runnable() {
private int chr_num;
@Override
public void run() {
// TODO Auto-generated method stub
try {
final SAMSequenceRecord refSeq = seqDic.getSequence(chr_num);
final String seqnm = refSeq.getSequenceName();
final int seqln = refSeq.getSequenceLength();
final Set<Integer> snp_pos = readSNPs(in_vcf, seqnm);
final SamReader inputSam = factory.open(new File(in_bam));
final SAMRecordIterator iter = inputSam.queryOverlapping(seqnm, 0, seqln);
// do NOT call SNPs, read it from file instead
final Map<String, List<SAMRecord>> bc_record = new HashMap<String, List<SAMRecord>>();
final Map<String, Integer> bc_position = new HashMap<String, Integer>();
String bc;
SAMRecord tmp_record;
int pos;
while (iter.hasNext()) {
tmp_record = iter.next();
if (!processRecord(tmp_record))
continue;
bc = tmp_record.getStringAttribute("BX");
if (bc == null)
continue;
pos = tmp_record.getAlignmentStart();
if (mappedReads % 10000 == 0) {
Set<String> bc_removal = new HashSet<String>();
for (Map.Entry<String, List<SAMRecord>> entry : bc_record.entrySet()) {
String b = entry.getKey();
List<SAMRecord> records = entry.getValue();
if (bc_position.get(b) + max_gap < pos) {
processMolecule(b, records, snp_pos);
bc_removal.add(b);
}
}
for (String b : bc_removal) {
bc_record.remove(b);
bc_position.remove(b);
}
}
if (bc_record.containsKey(bc)) {
List<SAMRecord> records = bc_record.get(bc);
if (bc_position.get(bc) + max_gap < pos) {
processMolecule(bc, records, snp_pos);
records.clear();
bc_position.remove(bc);
}
} else
bc_record.put(bc, new ArrayList<SAMRecord>());
bc_record.get(bc).add(tmp_record);
bc_position.put(bc, Math.max(tmp_record.getAlignmentEnd(), bc_position.containsKey(bc) ? bc_position.get(bc) : 0));
}
if (!bc_record.isEmpty())
for (Map.Entry<String, List<SAMRecord>> entry : bc_record.entrySet()) processMolecule(entry.getKey(), entry.getValue(), snp_pos);
/**
* // do NOT call SNPs, read it from file instead
* int pos;
* SAMRecord buffer = iter.hasNext() ? iter.next() : null;
* int bufferS = buffer==null ? Integer.MAX_VALUE : buffer.getAlignmentStart();
* final Set<SAMRecord> record_pool = new HashSet<SAMRecord>();
* final Set<SAMRecord> tmp_removal = new HashSet<SAMRecord>();
*
* final int[] allele_stats = new int[5];
*
* final Map<String, TreeSet<SNP>> bc_snp = new HashMap<String, TreeSet<SNP>>();
*
* String dna_seq, bc;
* int tmp_int;
* String snp_str;
* char nucl;
* for(int p=0; p!=seqln; p++) {
* if(p%1000000==0) {
* System.err.println(seqnm+":"+p);
* // TODO
* // process bc_snp
* Set<String> bc_removal = new HashSet<String>();
* for(Map.Entry<String, TreeSet<SNP>> entry : bc_snp.entrySet()) {
* if(entry.getValue().last().position+max_gap<p) {
* processMolecule(entry.getKey(), entry.getValue());
* bc_removal.add(entry.getKey());
* }
* }
* for(String b : bc_removal) bc_snp.remove(b);
* }
* pos = p+1; // BAM format is 1-based coordination
*
* while(bufferS<=pos) {
* // TODO
* // buffer records upto this position
* while( buffer!=null &&
* buffer.getAlignmentStart()<=pos) {
* if( processRecord(buffer) )
* record_pool.add(buffer);
* buffer=iter.hasNext()?iter.next():null;
* bufferS = buffer==null ? Integer.MAX_VALUE :
* buffer.getAlignmentStart();
* }
* }
* if(record_pool.isEmpty()) continue;
*
* Arrays.fill(allele_stats, 0);
* tmp_removal.clear();
* for(SAMRecord record : record_pool) {
*
* if(record.getAlignmentEnd()<pos) {
* tmp_removal.add(record);
* continue;
* }
* dna_seq = record.getReadString();
* tmp_int = record.getReadPositionAtReferencePosition(pos);
* nucl = tmp_int==0?'D':Character.toUpperCase(dna_seq.charAt(tmp_int-1));
* switch(nucl) {
* case 'A':
* allele_stats[0]++;
* break;
* case 'C':
* allele_stats[1]++;
* break;
* case 'G':
* allele_stats[2]++;
* break;
* case 'T':
* allele_stats[3]++;
* break;
* case 'D':
* allele_stats[4]++;
* break;
* case 'N':
* // do nothing here
* break;
* default:
* throw new RuntimeException("!!!");
* }
* }
* record_pool.removeAll(tmp_removal);
*
* snp_str = feasibleSNP(allele_stats);
*
* // not two alleles or minor allele frequency is smaller than threshold
* // don't call indels
* if(snp_str==null) continue;
*
* vcf.write(seqnm+"\t"+p+"\t"+snp_str+"\n");
*
* for(SAMRecord record : record_pool) {
* bc = record.getStringAttribute("BX");
* if(bc==null) continue;
* dna_seq = record.getReadString();
* tmp_int = record.getReadPositionAtReferencePosition(pos);
* if(tmp_int==0) continue;
* nucl = Character.toUpperCase(dna_seq.charAt(tmp_int-1));
* if(bc_snp.containsKey(bc)) {
* TreeSet<SNP> bc_set = bc_snp.get(bc);
* if(bc_set.last().position+max_gap<p) {
* // TODO
* // gap exceeds max_gap
* // write molecule out
* processMolecule(bc, bc_set);
* bc_set.clear();
* }
* } else {
* bc_snp.put(bc, new TreeSet<SNP>(new SNP.PositionComparator()));
* }
* bc_snp.get(bc).add(new SNP(p, nucl, record));
*
* }
* }
*
* for(Map.Entry<String, TreeSet<SNP>> entry : bc_snp.entrySet())
* processMolecule(entry.getKey(), entry.getValue());
*/
iter.close();
inputSam.close();
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
private void processMolecule(final String bc, final List<SAMRecord> records, final Set<Integer> snp_pos) {
// TODO Auto-generated method stub
MoleculeConstructor molCon = new MoleculeConstructor(records);
if (molCon.getMolecularLength() < oo_thresh)
return;
molCon.run();
}
/**
* private void processMolecule(final String bc, final TreeSet<SNP> snp_set)
* throws IOException {
* // TODO Auto-generated method stub
* // write molecule haplotypes
* Set<SAMRecord> records = new HashSet<SAMRecord>();
* List<String> snp_str = new ArrayList<String>();
* SNP snp;
* int tmp_position = -1;
* while(!snp_set.isEmpty()) {
* snp = snp_set.pollFirst();
* records.add(snp.alignment);
* if(tmp_position==snp.position) {
* snp_str.remove(snp_str.size()-1);
* } else {
* snp_str.add(snp.position+","+
* snp.allele);
* tmp_position = snp.position;
* }
* }
* // write molecule statistics
* MoleculeConstructor molCon =
* new MoleculeConstructor(new ArrayList<SAMRecord>(records));
* if(molCon.getMolecularLength()<oo_thresh) return;
* molCon.run();
*
* if(snp_str.size()<2) return;
* StringBuilder os = new StringBuilder();
* os.append(bc);
* for(String ss : snp_str) {
* os.append(" ");
* os.append( ss);
* }
* os.append("\n");
* hap.write(os.toString());
* }
*/
public Runnable init(final int i) {
// TODO Auto-generated method stub
this.chr_num = i;
return this;
}
private String feasibleSNP(int[] ints) {
// TODO Auto-generated method stub
if (ints[4] > 0)
return null;
List<Integer> obs = new ArrayList<Integer>();
for (int i = 0; i < 4; i++) if (ints[i] > 0)
obs.add(i);
if (obs.size() != 2)
return null;
int ref = obs.get(0), alt = obs.get(1);
int d = ints[ref] + ints[alt];
if (d < min_depth)
return null;
double maf = (double) ints[ref] / d;
if (maf > 0.5)
maf = 1 - maf;
if (maf < min_maf)
return null;
return Sequence.nucleotide[ref] + "," + Sequence.nucleotide[alt] + "\t" + ints[ref] + "," + ints[alt];
}
}.init(i));
}
try {
inputSam.close();
this.waitFor();
oos.close();
hap.close();
vcf.close();
oob.close();
BufferedWriter bw_summary = Utils.getBufferedWriter(this.out_stats + ".summary");
bw_summary.write("##Reads mapped: " + mappedReads + "\n");
bw_summary.write("##Reads unmapped: " + unmappedReads + "\n");
bw_summary.write("##Reads duplicated: " + duplicatedReads + "\n");
bw_summary.write("##Reads bad pairs: " + badpaired + "\n");
bw_summary.write("##Reads by pseudo-chromosomes:\n");
for (int i = 0; i < mappedReadsOnChromosome.length; i++) {
bw_summary.write(" #" + seqDic.getSequence(i).getSequenceName() + " " + mappedReadsOnChromosome[i] + "\n");
}
bw_summary.write("##Reads by barcodes:\n");
for (Map.Entry<String, Integer> entry : readsOnBarcode.entrySet()) {
bw_summary.write(" #" + entry.getKey() + " " + entry.getValue() + "\n");
}
bw_summary.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Aggregations