use of com.github.lindenb.jvarkit.util.Counter in project jvarkit by lindenb.
the class ExtendReferenceWithReads method scanRegion.
/**
*scanRegion
* @param contig Reference sequence of interest.
* @param start 0-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
* @param end 0-based, exclusive end of interval of interest. Zero implies end of the reference sequence.
*/
private Map<Integer, Counter<Byte>> scanRegion(final SAMSequenceRecord contig, final int chromStart, final int chromEnd, final Rescue rescue) {
final Map<Integer, Counter<Byte>> pos2bases = new HashMap<>(1 + chromEnd - chromStart);
LOG.info("Scanning :" + contig.getSequenceName() + ":" + chromStart + "-" + chromEnd);
for (int side = 0; side < 2; ++side) {
if (// 5' or 3'
!rescue.equals(Rescue.CENTER) && side > 0) {
// already done
break;
}
for (final SamReader samReader : samReaders) {
final SAMSequenceDictionary dict2 = samReader.getFileHeader().getSequenceDictionary();
final SAMSequenceRecord ssr2 = dict2.getSequence(contig.getSequenceName());
if (ssr2 == null || ssr2.getSequenceLength() != contig.getSequenceLength()) {
LOG.info("No contig " + contig.getSequenceName() + " with L=" + contig.getSequenceLength() + " bases in " + samReader.getResourceDescription());
continue;
}
int mappedPos = -1;
switch(rescue) {
case LEFT:
mappedPos = 1;
break;
case RIGHT:
mappedPos = contig.getSequenceLength();
break;
case CENTER:
mappedPos = (side == 0 ? chromStart + 1 : chromEnd);
break;
default:
throw new IllegalStateException();
}
final SAMRecordIterator iter = samReader.query(contig.getSequenceName(), mappedPos, mappedPos, false);
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (rec.getReadUnmappedFlag())
continue;
if (this.filter.filterOut(rec))
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null)
continue;
final byte[] bases = rec.getReadBases();
if (bases == null || bases.length == 0)
continue;
int refPos1 = rec.getUnclippedStart();
int readpos = 0;
for (final CigarElement ce : cigar) {
final CigarOperator op = ce.getOperator();
for (int L = 0; L < ce.getLength(); ++L) {
if (op.consumesReadBases()) {
Counter<Byte> count = pos2bases.get(refPos1 - 1);
if (count == null) {
count = new Counter<>();
pos2bases.put(refPos1 - 1, count);
}
count.incr((byte) Character.toLowerCase(bases[readpos]));
readpos++;
}
if (op.consumesReferenceBases()) {
refPos1++;
}
}
}
}
iter.close();
}
}
return pos2bases;
}
use of com.github.lindenb.jvarkit.util.Counter in project jvarkit by lindenb.
the class ExtendReferenceWithReads method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null) {
LOG.error("No REF defined");
return -1;
}
this.samReaders.clear();
PrintStream out = null;
try {
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
SAMSequenceDictionary dict = this.indexedFastaSequenceFile.getSequenceDictionary();
if (dict == null) {
LOG.error("Reference file is missing a sequence dictionary (use picard)");
return -1;
}
final SamReaderFactory srf = super.createSamReaderFactory();
srf.setOption(Option.CACHE_FILE_BASED_INDEXES, true);
for (final String uri : IOUtils.unrollFiles(args)) {
LOG.info("opening BAM " + uri);
final SamReader sr = srf.open(SamInputResource.of(uri));
/* doesn't work with remote ?? */
if (!sr.hasIndex()) {
LOG.error("file " + uri + " is not indexed");
return -1;
}
final SAMFileHeader header = sr.getFileHeader();
if (!header.getSortOrder().equals(SortOrder.coordinate)) {
LOG.error("file " + uri + " not sorted on coordinate but " + header.getSortOrder());
return -1;
}
final SAMSequenceDictionary dict2 = header.getSequenceDictionary();
if (dict2 == null) {
LOG.error("file " + uri + " needs a sequence dictionary");
return -1;
}
samReaders.add(sr);
}
if (samReaders.isEmpty()) {
LOG.error("No BAM defined");
return -1;
}
out = super.openFileOrStdoutAsPrintStream(this.outputFile);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
for (final SAMSequenceRecord ssr : dict.getSequences()) {
final GenomicSequence genomic = new GenomicSequence(this.indexedFastaSequenceFile, ssr.getSequenceName());
int chromStart = 0;
int nPrinted = 0;
out.print(">");
out.print(ssr.getSequenceName());
for (final Rescue side : Rescue.values()) {
switch(side) {
case LEFT:
/* look before position 0 of chromosome */
{
final Map<Integer, Counter<Byte>> pos2bases = scanRegion(ssr, -1, -1, side);
int newstart = 0;
for (final Integer pos : pos2bases.keySet()) {
if (pos >= 0)
continue;
newstart = Math.min(newstart, pos);
}
while (newstart < 0) {
final Counter<Byte> count = pos2bases.get(newstart);
if (nPrinted % 60 == 0)
out.println();
out.print(consensus(count));
newstart++;
++nPrinted;
}
break;
}
case RIGHT:
/* look after position > length(chromosome) */
{
final Map<Integer, Counter<Byte>> pos2bases = this.scanRegion(ssr, -1, -1, side);
int newend = ssr.getSequenceLength();
for (final Integer pos : pos2bases.keySet()) {
if (pos < ssr.getSequenceLength())
continue;
newend = Math.max(newend, pos + 1);
}
for (int i = ssr.getSequenceLength(); i < newend; i++) {
final Counter<Byte> count = pos2bases.get(i);
if (nPrinted % 60 == 0)
out.println();
out.print(consensus(count));
++nPrinted;
}
break;
}
case CENTER:
/* 0 to chromosome-length */
{
chromStart = 0;
while (chromStart < genomic.length()) {
final char base = Character.toUpperCase(genomic.charAt(chromStart));
if (base != 'N') {
progress.watch(ssr.getSequenceName(), chromStart);
if (nPrinted % 60 == 0)
out.println();
out.print(base);
++chromStart;
++nPrinted;
continue;
}
int chromEnd = chromStart + 1;
while (chromEnd < genomic.length() && Character.toUpperCase(genomic.charAt(chromEnd)) == 'N') {
++chromEnd;
}
if (chromEnd - chromStart < this.minLenNNNNContig) {
while (chromStart < chromEnd) {
progress.watch(ssr.getSequenceName(), chromStart);
if (nPrinted % 60 == 0)
out.println();
out.print(base);
++chromStart;
++nPrinted;
}
continue;
}
final Map<Integer, Counter<Byte>> pos2bases = scanRegion(ssr, chromStart, chromEnd, side);
while (chromStart < chromEnd) {
final Counter<Byte> count = pos2bases.get(chromStart);
if (nPrinted % 60 == 0)
out.println();
if (count == null) {
out.print('N');
} else {
out.print(consensus(count));
}
++chromStart;
++nPrinted;
continue;
}
}
break;
}
}
// end switch type
}
out.println();
}
progress.finish();
out.flush();
out.close();
return RETURN_OK;
} catch (final Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
for (final SamReader r : samReaders) CloserUtil.close(r);
samReaders.clear();
}
}
use of com.github.lindenb.jvarkit.util.Counter in project jvarkit by lindenb.
the class VcfStage method makeGenotypePie.
private PieChart makeGenotypePie(final VariantContext ctx) {
final Counter<GenotypeType> countTypes = new Counter<>();
if (ctx != null) {
for (final Genotype g : ctx.getGenotypes()) {
// ignore genotype if not displayed
final SampleDef sampleDef = this.name2sampledef.get(g.getSampleName());
if (sampleDef == null || !sampleDef.isDisplayed())
continue;
countTypes.incr(g.getType());
}
}
final ObservableList<PieChart.Data> pieChartData = FXCollections.observableArrayList();
for (final GenotypeType t : GenotypeType.values()) {
int c = (int) countTypes.count(t);
if (c == 0)
continue;
pieChartData.add(new PieChart.Data(t.name() + " = " + c, c));
}
final PieChart chart = new PieChart(pieChartData);
chart.setLegendVisible(false);
return chart;
}
use of com.github.lindenb.jvarkit.util.Counter in project jvarkit by lindenb.
the class VCFComm method doWork.
@Override
public int doWork(final List<String> args) {
CloseableIterator<LineAndFile> iter = null;
SortingCollection<LineAndFile> variants = null;
VariantContextWriter w = null;
try {
if (args.isEmpty()) {
LOG.error("Illegal number of arguments");
return -1;
}
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
variants = SortingCollection.newInstance(LineAndFile.class, new LineAndFileCodec(), new LineAndFileComparator(), super.sortingCollectionArgs.getMaxRecordsInRam(), super.sortingCollectionArgs.getTmpPaths());
variants.setDestructiveIteration(true);
/**
* new sample names in the output vcf: one sample per file
*/
final Map<Integer, String> fileid2sampleName = new TreeMap<>();
/**
* samples names as they appear in the original VCF headers
*/
final Counter<String> countInputSamples = new Counter<String>();
/**
* dicts
*/
final List<SAMSequenceDictionary> all_dictionaries = new ArrayList<>();
for (final String vcffilename : IOUtils.unrollFiles(args)) {
LOG.info("Reading from " + vcffilename);
final Input input = super.put(variants, vcffilename);
String sampleName = vcffilename;
if (sampleName.endsWith(".vcf.gz")) {
sampleName = sampleName.substring(0, sampleName.length() - 7);
} else if (sampleName.endsWith(".vcf.gz")) {
sampleName = sampleName.substring(0, sampleName.length() - 4);
}
int slash = sampleName.lastIndexOf(File.separatorChar);
if (slash != -1)
sampleName = sampleName.substring(slash + 1);
int suffix = 1;
// loop until we find a uniq name
for (; ; ) {
final String key = sampleName + (suffix == 1 ? "" : "_" + suffix);
if (fileid2sampleName.values().contains(key)) {
suffix++;
continue;
}
fileid2sampleName.put(input.file_id, key);
metaData.add(new VCFHeaderLine(key, vcffilename));
break;
}
for (final String sname : input.codecAndHeader.header.getSampleNamesInOrder()) {
countInputSamples.incr(sname);
}
all_dictionaries.add(input.codecAndHeader.header.getSequenceDictionary());
}
variants.doneAdding();
/**
* unique sample name, if any present in all VCF
*/
Optional<String> unqueSampleName = Optional.empty();
if (countInputSamples.getCountCategories() == 1 && countInputSamples.count(countInputSamples.keySet().iterator().next()) == fileid2sampleName.size()) {
unqueSampleName = Optional.of(countInputSamples.keySet().iterator().next());
LOG.info("Unique sample name is " + unqueSampleName.get());
}
VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_FILTER_KEY);
VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.DEPTH_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_NUMBER_KEY);
metaData.add(new VCFHeaderLine(getClass().getSimpleName(), "version:" + getVersion() + " command:" + getProgramCommandLine()));
final VCFFilterHeaderLine variantNotCalledInAllVcf = new VCFFilterHeaderLine("NotCalledEveryWhere", "Variant was NOT called in all input VCF");
metaData.add(variantNotCalledInAllVcf);
final VCFFilterHeaderLine variantWasFiltered = new VCFFilterHeaderLine("VariantWasFiltered", "At least one variant was filtered");
metaData.add(variantWasFiltered);
final VCFFormatHeaderLine variantQUALFormat = new VCFFormatHeaderLine("VCQUAL", 1, VCFHeaderLineType.Float, "Variant Quality");
metaData.add(variantQUALFormat);
metaData.add(new VCFFormatHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Number of allle in the src vcf"));
metaData.add(new VCFFormatHeaderLine(VCFConstants.ALLELE_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of ALT alllele"));
final VCFInfoHeaderLine foundInCountVcfInfo = new VCFInfoHeaderLine("NVCF", 1, VCFHeaderLineType.Integer, "Number of VCF this variant was found");
metaData.add(foundInCountVcfInfo);
final VCFInfoHeaderLine variantTypesInfo = new VCFInfoHeaderLine("VTYPES", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Distinct Variants type");
metaData.add(variantTypesInfo);
final VCFFilterHeaderLine multipleTypeFilters = new VCFFilterHeaderLine("DiscordantTypes", "Discordant types at this position");
metaData.add(multipleTypeFilters);
final VCFFormatHeaderLine variantTypeFormat = new VCFFormatHeaderLine("VTYPE", 1, VCFHeaderLineType.String, "Variant Type");
metaData.add(variantTypeFormat);
final VCFFilterHeaderLine uniqueVariantDiscordantGTFilter;
if (unqueSampleName.isPresent()) {
metaData.add(new VCFHeaderLine("UniqSample", unqueSampleName.get()));
uniqueVariantDiscordantGTFilter = new VCFFilterHeaderLine("DiscordantGenotypeForUniqSample", "Genotype Dicordant for for sample " + unqueSampleName.get());
metaData.add(uniqueVariantDiscordantGTFilter);
} else {
uniqueVariantDiscordantGTFilter = null;
}
final VCFHeader header = new VCFHeader(metaData, new ArrayList<>(fileid2sampleName.values()));
if (// all have a dict
!normalize_chr && !all_dictionaries.contains(null)) {
SAMSequenceDictionary thedict = null;
for (int x = 0; x < all_dictionaries.size(); ++x) {
SAMSequenceDictionary d = all_dictionaries.get(x);
if (thedict == null) {
thedict = d;
} else if (!SequenceUtil.areSequenceDictionariesEqual(d, thedict)) {
thedict = null;
break;
}
}
if (thedict != null)
header.setSequenceDictionary(thedict);
}
w = super.openVariantContextWriter(super.outputFile);
w.writeHeader(header);
final List<LineAndFile> row = new ArrayList<LineAndFile>(super.inputs.size());
final Comparator<LineAndFile> posCompare = (A, B) -> A.getContigPosRef().compareTo(B.getContigPosRef());
iter = variants.iterator();
for (; ; ) {
LineAndFile rec = null;
if (iter.hasNext()) {
rec = iter.next();
}
if (rec == null || (!row.isEmpty() && posCompare.compare(row.get(0), rec) != 0)) {
if (!row.isEmpty()) {
final VariantContext first = row.get(0).getContext();
/* in which file id we find this variant */
Set<Integer> fileids_for_variant = row.stream().map(LAF -> LAF.fileIdx).collect(Collectors.toSet());
// see with HAS multiple chrom/pos/ref but different alt
if (row.size() != fileids_for_variant.size()) {
for (; ; ) {
boolean ok = true;
for (int x = 0; ok && x + 1 < row.size(); ++x) {
final VariantContext ctxx = row.get(x).getContext();
final List<Allele> altsx = ctxx.getAlternateAlleles();
for (int y = x + 1; ok && y < row.size(); ++y) {
if (row.get(x).fileIdx != row.get(y).fileIdx)
continue;
final VariantContext ctxy = row.get(y).getContext();
final List<Allele> altsy = ctxy.getAlternateAlleles();
if (altsx.equals(altsy))
continue;
if (!ctxx.isVariant() && ctxy.isVariant()) {
row.remove(x);
} else if (ctxx.isVariant() && !ctxy.isVariant()) {
row.remove(y);
} else if (!ctxx.isSNP() && ctxy.isSNP()) {
row.remove(x);
} else if (ctxx.isSNP() && !ctxy.isSNP()) {
row.remove(y);
} else if (altsx.size() > altsy.size()) {
row.remove(x);
} else if (altsx.size() < altsy.size()) {
row.remove(y);
} else {
row.remove(y);
}
ok = false;
break;
}
}
if (ok)
break;
}
fileids_for_variant = row.stream().map(LAF -> LAF.fileIdx).collect(Collectors.toSet());
}
if (row.size() != fileids_for_variant.size()) {
LOG.error("There are some duplicated variants at the position " + new ContigPosRef(first) + " in the same vcf file");
for (final LineAndFile laf : row) {
LOG.error("File [" + laf.fileIdx + "]" + fileid2sampleName.get(laf.fileIdx));
LOG.error("\t" + laf.getContigPosRef());
}
row.clear();
} else {
final Set<Allele> alleles = row.stream().flatMap(R -> R.getContext().getAlleles().stream()).collect(Collectors.toSet());
final VariantContextBuilder vcb = new VariantContextBuilder(getClass().getName(), first.getContig(), first.getStart(), first.getEnd(), alleles);
final Set<String> filters = new HashSet<>();
final Set<VariantContext.Type> variantContextTypes = new HashSet<>();
final List<Genotype> genotypes = new ArrayList<Genotype>();
for (final LineAndFile laf : row) {
if (laf.getContext().isFiltered())
filters.add(variantWasFiltered.getID());
variantContextTypes.add(laf.getContext().getType());
final GenotypeBuilder gbuilder = new GenotypeBuilder();
gbuilder.name(fileid2sampleName.get(laf.fileIdx));
if (unqueSampleName.isPresent()) {
final Genotype g0 = laf.getContext().getGenotype(unqueSampleName.get());
if (g0 == null) {
iter.close();
w.close();
throw new IllegalStateException("Cannot find genotype for " + unqueSampleName.get());
}
if (g0.hasDP())
gbuilder.DP(g0.getDP());
if (g0.hasGQ())
gbuilder.GQ(g0.getGQ());
gbuilder.alleles(g0.getAlleles());
} else {
gbuilder.alleles(Arrays.asList(first.getReference(), first.getReference()));
if (laf.getContext().hasAttribute(VCFConstants.DEPTH_KEY)) {
gbuilder.DP(laf.getContext().getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
}
}
if (laf.getContext().isFiltered()) {
gbuilder.filter("VCFFILTERED");
}
if (laf.getContext().hasLog10PError()) {
gbuilder.attribute(variantQUALFormat.getID(), laf.getContext().getPhredScaledQual());
}
gbuilder.attribute(VCFConstants.ALLELE_NUMBER_KEY, laf.getContext().getGenotypes().stream().flatMap(G -> G.getAlleles().stream()).filter(A -> !A.isNoCall()).count());
gbuilder.attribute(VCFConstants.ALLELE_COUNT_KEY, laf.getContext().getGenotypes().stream().flatMap(G -> G.getAlleles().stream()).filter(A -> !(A.isReference() || A.isNoCall())).count());
gbuilder.attribute(variantTypeFormat.getID(), laf.getContext().getType().name());
genotypes.add(gbuilder.make());
}
final String id = String.join(";", row.stream().map(LAF -> LAF.getContext()).filter(V -> V.hasID()).map(V -> V.getID()).collect(Collectors.toSet()));
if (!id.isEmpty())
vcb.id(id);
vcb.genotypes(genotypes);
if (unqueSampleName.isPresent()) {
boolean all_same = true;
for (int x = 0; all_same && x + 1 < genotypes.size(); ++x) {
if (!genotypes.get(x).isCalled())
continue;
for (int y = x + 1; all_same && y < genotypes.size(); ++y) {
if (!genotypes.get(y).isCalled())
continue;
if (!genotypes.get(x).sameGenotype(genotypes.get(y), true)) {
all_same = false;
break;
}
}
}
if (!all_same)
filters.add(uniqueVariantDiscordantGTFilter.getID());
}
// Add AN
vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, genotypes.stream().filter(G -> G.isCalled()).mapToInt(G -> G.getAlleles().size()).sum());
if (!variantContextTypes.isEmpty()) {
vcb.attribute(variantTypesInfo.getID(), new ArrayList<>(variantContextTypes.stream().map(T -> T.name()).collect(Collectors.toSet())));
if (variantContextTypes.size() > 1) {
filters.add(multipleTypeFilters.getID());
}
}
vcb.attribute(foundInCountVcfInfo.getID(), fileids_for_variant.size());
boolean print = true;
if (row.size() == super.inputs.size() && ignore_everywhere) {
print = false;
}
if (fileids_for_variant.size() != fileid2sampleName.size()) {
filters.add(variantNotCalledInAllVcf.getID());
if (only_everywhere) {
print = false;
}
}
vcb.filters(filters);
if (print) {
w.add(vcb.make());
}
}
row.clear();
}
if (rec == null)
break;
}
row.add(rec);
}
iter.close();
iter = null;
w.close();
w = null;
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(w);
try {
if (variants != null)
variants.cleanup();
} catch (Exception err) {
}
}
}
use of com.github.lindenb.jvarkit.util.Counter in project jvarkit by lindenb.
the class SamClipIndelFraction method doWork.
@Override
public int doWork(final List<String> args) {
SamReader sfr = null;
SAMRecordIterator iter = null;
PrintWriter pw = null;
try {
sfr = openSamReader(oneFileOrNull(args));
pw = super.openFileOrStdoutAsPrintWriter(outputFile);
long total_bases_count = 0L;
long count_clipped_reads = 0L;
long count_clipped_left_reads = 0L;
long count_clipped_right_reads = 0L;
long count_unclipped_reads = 0L;
long count_unmapped_reads = 0L;
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(sfr.getFileHeader()).logger(LOG);
Counter<Integer> counter = new Counter<>();
iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord record = progress.watch(iter.next());
if (record.getReadUnmappedFlag()) {
++count_unmapped_reads;
continue;
}
final Cigar cigar = record.getCigar();
int left_clip_length = 0;
int right_clip_length = 0;
int deletion_N_length = 0;
int deletion_D_length = 0;
int insertion_length = 0;
boolean onLeft = true;
for (int i = 0; i < cigar.numCigarElements(); ++i) {
final CigarElement ce = cigar.getCigarElement(i);
final CigarOperator op = ce.getOperator();
switch(op) {
case N:
{
onLeft = false;
deletion_D_length += ce.getLength();
total_bases_count += ce.getLength();
break;
}
case D:
{
onLeft = false;
deletion_N_length += ce.getLength();
total_bases_count += ce.getLength();
break;
}
case I:
{
onLeft = false;
insertion_length += ce.getLength();
total_bases_count += ce.getLength();
break;
}
case S:
case H:
{
if (onLeft) {
if (record.getReadNegativeStrandFlag()) {
right_clip_length += ce.getLength();
} else {
left_clip_length += ce.getLength();
}
} else {
if (record.getReadNegativeStrandFlag()) {
left_clip_length += ce.getLength();
} else {
right_clip_length += ce.getLength();
}
}
total_bases_count += ce.getLength();
break;
}
default:
{
onLeft = false;
if (op.consumesReadBases()) {
total_bases_count += ce.getLength();
}
break;
}
}
}
if (left_clip_length + right_clip_length == 0) {
count_unclipped_reads++;
} else {
if (left_clip_length > 0)
count_clipped_left_reads++;
if (right_clip_length > 0)
count_clipped_right_reads++;
count_clipped_reads++;
}
switch(type) {
case leftclip:
counter.incr(left_clip_length);
break;
case rightclip:
counter.incr(right_clip_length);
break;
case allclip:
counter.incr(left_clip_length + right_clip_length);
break;
case deletion:
counter.incr(deletion_D_length + deletion_N_length);
break;
case insert:
counter.incr(insertion_length);
break;
default:
LOG.error("Bad type: " + type);
return -1;
}
}
progress.finish();
pw.println("##UNMAPPED_READS=" + count_unmapped_reads);
pw.println("##MAPPED_READS=" + (count_clipped_reads + count_unclipped_reads));
pw.println("##CLIPPED_READS=" + count_clipped_reads);
pw.println("##CLIPPED_READS_5_PRIME=" + count_clipped_left_reads);
pw.println("##CLIPPED_READS_3_PRIME=" + count_clipped_right_reads);
pw.println("##UNCLIPPED_READS=" + count_unclipped_reads);
pw.println("##COUNT_BASES=" + total_bases_count);
pw.print("#");
switch(type) {
case leftclip:
pw.print("CLIP_5_PRIME");
break;
case rightclip:
pw.print("CLIP_3_PRIME");
break;
case allclip:
pw.print("CLIP");
break;
case deletion:
pw.print("DELETION");
break;
case insert:
pw.print("INSERTION");
break;
default:
LOG.error("Bad type: " + type);
return -1;
}
pw.println("\tCOUNT\tFRACTION_OF_MAPPED_READS");
for (final Integer size : new TreeSet<Integer>(counter.keySet())) {
pw.print(size);
pw.print('\t');
pw.print(counter.count(size));
pw.print('\t');
pw.println(counter.count(size) / (double) (count_unclipped_reads + count_unclipped_reads));
}
pw.flush();
pw.close();
pw = null;
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(pw);
}
}
Aggregations