use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class Transcript method getCdsInterval.
/**
*return genomic interval spanning between codon start and codon stop if they both exists
*/
public default Optional<Locatable> getCdsInterval() {
if (!hasCodonStartDefined() || !hasCodonStopDefined())
return Optional.empty();
int x1 = this.getEnd();
int x2 = this.getStart();
Locatable codon = getCodonStart().get();
x1 = Math.min(codon.getStart(), x1);
x2 = Math.max(codon.getEnd(), x2);
codon = getCodonStop().get();
x1 = Math.min(codon.getStart(), x1);
x2 = Math.max(codon.getEnd(), x2);
return Optional.of(new SimpleInterval(getContig(), x1, x2));
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class SlidingWindowIterator method advance.
@Override
protected Entry<? extends Locatable, List<V>> advance() {
for (; ; ) {
if (!to_be_released.isEmpty()) {
final Window<V> w = to_be_released.pollFirst();
return new AbstractMap.SimpleEntry<>(w.interval, w.variants);
}
final V ctx = this.delegate.hasNext() ? this.delegate.next() : null;
// new contig?
if (ctx == null || !ctx.getContig().equals(prevContig)) {
to_be_released.addAll(this.buffer);
// EOF
if (ctx == null && to_be_released.isEmpty())
return null;
interval2win.clear();
buffer.clear();
}
// because to_be_released might be not empty
if (ctx == null)
continue;
this.prevContig = ctx.getContig();
// remove previous windows
int i = 0;
while (i < buffer.size()) {
final Window<V> w = buffer.get(i);
if (w.getEnd() < ctx.getStart()) {
to_be_released.add(w);
buffer.remove(i);
interval2win.remove(w.interval);
} else {
i++;
}
}
prevContig = ctx.getContig();
int x1 = ctx.getStart() - ctx.getStart() % this.window_shift;
while (x1 - this.window_shift + this.window_size >= ctx.getStart()) {
x1 -= this.window_shift;
}
for (; ; ) {
final SimpleInterval r = new SimpleInterval(ctx.getContig(), Math.max(1, x1), Math.max(1, x1 + this.window_size));
if (r.getStart() > ctx.getEnd())
break;
if (r.overlaps(ctx)) {
Window<V> w = interval2win.get(r);
if (w == null) {
w = new Window<V>(r);
interval2win.put(r, w);
buffer.add(w);
}
w.variants.add(ctx);
}
x1 += this.window_shift;
}
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class VcfFisherCombinatorics method doWork.
@Override
public int doWork(final List<String> args) {
try {
final Pedigree pedigree = new PedigreeParser().parse(this.pedigreeFile);
final IntervalTreeMap<List<GeneInfo>> geneInfoMap = new IntervalTreeMap<>();
final Predicate<Genotype> isGtCnv = G -> G.isHet() || G.isHomVar();
try (VCFIterator vcfIter = super.openVCFIterator(super.oneFileOrNull(args))) {
final VCFHeader header = vcfIter.getHeader();
final ContigNameConverter ctgConverter = ContigNameConverter.fromOneDictionary(SequenceDictionaryUtils.extractRequired(header));
if (!header.hasGenotypingData()) {
LOG.error("No Genotype data in " + header);
return -1;
}
final Map<String, Integer> sample2index = header.getSampleNameToOffset();
final List<Integer> casesIdx = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isAffected()).map(S -> S.getId()).map(S -> sample2index.get(S)).sorted().collect(Collectors.toList());
LOG.info("cases N=" + casesIdx.size());
if (casesIdx.isEmpty()) {
LOG.error("No affected/cases sample in the input VCF");
return -1;
}
final List<Integer> controlsIdx = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isUnaffected()).map(S -> S.getId()).map(S -> sample2index.get(S)).sorted().collect(Collectors.toList());
LOG.info("controls N=" + controlsIdx.size());
if (controlsIdx.isEmpty()) {
LOG.error("No unaffected/controls sample in the input VCF");
return -1;
}
final Predicate<VariantContext> acceptCtx = V -> {
if (discard_bnd && V.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND"))
return false;
if (discard_filtered && V.isFiltered())
return false;
if (max_af < 1.0) {
final Iterator<Integer> it = Stream.concat(controlsIdx.stream(), casesIdx.stream()).iterator();
int ac = 0;
int an = 0;
while (it.hasNext()) {
switch(V.getGenotype(it.next()).getType()) {
case HOM_VAR:
ac += 2;
an += 2;
break;
case HOM_REF:
ac += 0;
an += 2;
break;
case HET:
ac += 1;
an += 2;
break;
default:
break;
}
}
if (an == 0)
return false;
if (ac / (double) an > max_af)
return false;
}
return true;
};
try (BedLineReader br = new BedLineReader(this.bedPath)) {
while (br.hasNext()) {
final BedLine bed = br.next();
final String ctg = ctgConverter.apply(bed.getContig());
if (StringUtil.isBlank(ctg))
continue;
final GeneInfo geneInfo = new GeneInfo();
geneInfo.casesflags = new BitSet(casesIdx.size());
geneInfo.controlsflags = new BitSet(controlsIdx.size());
geneInfo.contig = ctg;
geneInfo.name = bed.getOrDefault(3, "undefined");
geneInfo.parts.add(new SimpleInterval(bed).renameContig(ctg));
final Interval key = new Interval(geneInfo);
List<GeneInfo> L = geneInfoMap.get(key);
if (L == null) {
L = new ArrayList<>();
geneInfoMap.put(key, L);
}
L.add(geneInfo);
}
}
if (geneInfoMap.isEmpty()) {
LOG.error("no gene found in " + this.bedPath);
return -1;
}
LOG.info("reading variants...");
while (vcfIter.hasNext()) {
final VariantContext ctx = vcfIter.next();
if (!acceptCtx.test(ctx))
continue;
for (List<GeneInfo> gil : geneInfoMap.getOverlapping(ctx)) {
for (GeneInfo gi : gil) {
for (int y = 0; y < casesIdx.size(); ++y) {
final Genotype gt = ctx.getGenotype(casesIdx.get(y));
if (!isGtCnv.test(gt))
continue;
gi.casesflags.set(y);
}
for (int y = 0; y < controlsIdx.size(); ++y) {
final Genotype gt = ctx.getGenotype(controlsIdx.get(y));
if (!isGtCnv.test(gt))
continue;
gi.controlsflags.set(y);
}
}
}
}
/* remove Genes without variant , count sample per gene*/
for (List<GeneInfo> gil : geneInfoMap.values()) {
gil.removeIf(GI -> GI.casesflags.nextSetBit(0) == -1 && GI.controlsflags.nextSetBit(0) == -1);
}
// end remove
/* load bed file */
final List<GeneInfo> geneList = geneInfoMap.values().stream().filter(L -> !L.isEmpty()).flatMap(L -> L.stream()).sorted().collect(Collectors.toList());
if (geneList.isEmpty()) {
LOG.error("no gene found in " + this.bedPath);
return -1;
}
LOG.info("N Genes=" + geneList.size());
Solution best = null;
for (int i = 2; i < Math.min(this.max_num_genes, geneList.size()); i++) {
LOG.info("sarting loop from " + i);
best = recursion(geneList, casesIdx, controlsIdx, new ArrayList<>(), i, best);
}
try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
pw.println(best);
pw.flush();
}
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class FindGVCFsBlocks method doWork.
@SuppressWarnings("resource")
@Override
public int doWork(final List<String> args) {
Path tmpBedFile0 = null;
Path tmpBedFile1 = null;
try {
if (this.bedPath != null) {
IOUtil.assertFileIsReadable(this.bedPath);
}
final List<Path> inputs = IOUtils.unrollPaths(args);
if (inputs.isEmpty()) {
LOG.error("input missing");
return -1;
}
if (this.outputFile != null) {
final String fname = this.outputFile.getFileName().toString();
if (!fname.endsWith(FileExtensions.INTERVAL_LIST) && !fname.endsWith(FileExtensions.COMPRESSED_INTERVAL_LIST)) {
LOG.error("Output should end with " + FileExtensions.INTERVAL_LIST + " or " + FileExtensions.COMPRESSED_INTERVAL_LIST);
return -1;
}
}
if (this.tmpDir == null && this.outputFile != null) {
this.tmpDir = this.outputFile.getParent();
}
if (this.tmpDir == null) {
this.tmpDir = IOUtils.getDefaultTempDir();
}
IOUtil.assertDirectoryIsWritable(this.tmpDir);
tmpBedFile0 = Files.createTempFile(this.tmpDir, "tmp.", ".bed");
tmpBedFile1 = Files.createTempFile(this.tmpDir, "tmp.", ".bed");
SAMSequenceDictionary dict = null;
final long initMilliSec = System.currentTimeMillis();
for (int i = 0; i < inputs.size(); i++) {
final long startMilliSec = System.currentTimeMillis();
LOG.info(inputs.get(i) + " " + (i + 1) + "/" + inputs.size());
try (GVCFOrIntervalIterator r0 = openInput(inputs.get(i))) {
if (dict != null) {
SequenceUtil.assertSequenceDictionariesEqual(dict, r0.getSAMSequenceDictionary());
} else {
dict = r0.getSAMSequenceDictionary();
}
long count_variants = 0L;
try (IntervalListWriter pw = new IntervalListWriter(tmpBedFile0, dict)) {
/* first VCF , just convert to bed */
if (i == 0) {
while (r0.hasNext()) {
final Locatable loc = r0.next();
pw.add(loc);
count_variants++;
}
} else /* merge previous bed with current VCF using INFO/END */
{
Locatable start0 = null;
Locatable start1 = null;
try (IntervalListIterator r1 = new IntervalListIterator(tmpBedFile1)) {
PeekableIterator<Locatable> peek0 = new PeekableIterator<>(r0);
PeekableIterator<Locatable> peek1 = new PeekableIterator<>(r1);
while (peek0.hasNext() && peek1.hasNext()) {
final Locatable loc0 = peek0.peek();
final Locatable loc1 = peek1.peek();
if (!loc0.contigsMatch(loc1)) {
throw new IllegalStateException("unexpected: not the same contigs " + loc0 + " " + loc1);
}
if (start0 == null)
start0 = loc0;
if (start1 == null)
start1 = loc1;
final int end0 = loc0.getEnd();
final int end1 = loc1.getEnd();
if (end0 < end1) {
peek0.next();
continue;
} else if (end0 > end1) {
peek1.next();
continue;
} else {
/* end0==end1 */
pw.add(new SimpleInterval(loc0.getContig(), (Math.min(start0.getStart(), start1.getStart())), loc0.getEnd()));
count_variants++;
// consumme
peek0.next();
// consumme
peek1.next();
start0 = null;
start1 = null;
}
}
if (lenient_discordant_end) {
while (peek0.hasNext()) {
final Locatable loc = peek0.next();
LOG.warn("extra interval 0 : " + loc);
}
while (peek1.hasNext()) {
final Locatable loc = peek1.next();
LOG.warn("extra interval 1 : " + loc);
}
} else {
if (peek0.hasNext())
throw new IllegalStateException("peek0 has Next ? " + peek0.next() + " use --lenient ?");
if (peek1.hasNext())
throw new IllegalStateException("peek1 has Next ? " + peek1.next() + " use --lenient ?");
}
peek0.close();
peek1.close();
}
}
final long millisecPerVcf = (System.currentTimeMillis() - initMilliSec) / (i + 1L);
LOG.info("N=" + count_variants + ". That took: " + StringUtils.niceDuration(System.currentTimeMillis() - startMilliSec) + " Remains: " + StringUtils.niceDuration((inputs.size() - (i + 1)) * millisecPerVcf));
}
// end writer
Files.deleteIfExists(tmpBedFile1);
Files.move(tmpBedFile0, tmpBedFile1);
}
}
final IntervalTreeMap<Boolean> intervalTreeMap;
if (this.bedPath != null) {
try (BedLineReader blr = new BedLineReader(this.bedPath)) {
intervalTreeMap = blr.toIntervalTreeMap(BED -> Boolean.TRUE);
}
} else {
intervalTreeMap = null;
}
try (IntervalListWriter w = new IntervalListWriter(this.outputFile, dict)) {
try (IntervalListIterator r1 = new IntervalListIterator(tmpBedFile1)) {
final PeekableIterator<Locatable> peek1 = new PeekableIterator<>(r1);
while (peek1.hasNext()) {
Locatable loc = peek1.next();
if (intervalTreeMap != null && !intervalTreeMap.containsOverlapping(loc)) {
continue;
}
while (this.min_block_size > 0 && peek1.hasNext()) {
final Locatable loc2 = peek1.peek();
if (!loc2.contigsMatch(loc))
break;
if (CoordMath.getLength(loc.getStart(), loc2.getEnd()) > this.min_block_size)
break;
loc = new SimpleInterval(loc.getContig(), loc.getStart(), loc2.getEnd());
// consumme loc2
peek1.next();
}
w.add(loc);
}
peek1.close();
}
Files.deleteIfExists(tmpBedFile1);
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
if (tmpBedFile0 != null)
try {
Files.deleteIfExists(tmpBedFile0);
} catch (Throwable err) {
}
if (tmpBedFile1 != null)
try {
Files.deleteIfExists(tmpBedFile1);
} catch (Throwable err) {
}
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class ValidateCnv method doWork.
@Override
public int doWork(final List<String> args) {
if (this.extendFactor <= 0) {
LOG.error("bad extend factor " + this.extendFactor);
return -1;
}
if (this.treshold < 0 || this.treshold >= 0.25) {
LOG.error("Bad treshold 0 < " + this.treshold + " >=0.25 ");
return -1;
}
final Map<String, BamInfo> sample2bam = new HashMap<>();
VariantContextWriter out = null;
Iterator<? extends Locatable> iterIn = null;
try {
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.rererencePath);
final CRAMReferenceSource cramReferenceSource = new ReferenceSource(this.rererencePath);
final List<Path> bamPaths = IOUtils.unrollPaths(this.bamFiles);
final String input = oneFileOrNull(args);
if (input == null) {
iterIn = IntervalListProvider.empty().dictionary(dict).skipUnknownContigs().fromInputStream(stdin(), "bed").iterator();
} else {
final IntervalListProvider ilp = IntervalListProvider.from(input).setVariantPredicate(CTX -> {
if (CTX.isSNP())
return false;
final String svType = CTX.getAttributeAsString(VCFConstants.SVTYPE, "");
if (svType != null && (svType.equals("INV") || svType.equals("BND")))
return false;
return true;
}).dictionary(dict).skipUnknownContigs();
iterIn = ilp.stream().iterator();
}
/* register each bam */
for (final Path p2 : bamPaths) {
final BamInfo bi = new BamInfo(p2, cramReferenceSource);
if (sample2bam.containsKey(bi.sampleName)) {
LOG.error("sample " + bi.sampleName + " specified twice.");
bi.close();
return -1;
}
sample2bam.put(bi.sampleName, bi);
}
if (sample2bam.isEmpty()) {
LOG.error("no bam was defined");
return -1;
}
final Set<VCFHeaderLine> metadata = new HashSet<>();
final VCFInfoHeaderLine infoSVSamples = new VCFInfoHeaderLine("N_SAMPLES", 1, VCFHeaderLineType.Integer, "Number of Samples that could carry a SV");
metadata.add(infoSVSamples);
final VCFInfoHeaderLine infoSvLen = new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "SV length");
metadata.add(infoSvLen);
final BiFunction<String, String, VCFFormatHeaderLine> makeFmt = (TAG, DESC) -> new VCFFormatHeaderLine(TAG, 1, VCFHeaderLineType.Integer, DESC);
final VCFFormatHeaderLine formatCN = new VCFFormatHeaderLine("CN", 1, VCFHeaderLineType.Float, "normalized copy-number. Treshold was " + this.treshold);
metadata.add(formatCN);
final VCFFormatHeaderLine nReadsSupportingSv = makeFmt.apply("RSD", "number of split reads supporting SV.");
metadata.add(nReadsSupportingSv);
final VCFFilterHeaderLine filterAllDel = new VCFFilterHeaderLine("ALL_DEL", "number of samples greater than 1 and all are deletions");
metadata.add(filterAllDel);
final VCFFilterHeaderLine filterAllDup = new VCFFilterHeaderLine("ALL_DUP", "number of samples greater than 1 and all are duplication");
metadata.add(filterAllDup);
final VCFFilterHeaderLine filterNoSV = new VCFFilterHeaderLine("NO_SV", "There is no DUP or DEL in this variant");
metadata.add(filterNoSV);
final VCFFilterHeaderLine filterHomDel = new VCFFilterHeaderLine("HOM_DEL", "There is one Homozygous deletion.");
metadata.add(filterHomDel);
final VCFFilterHeaderLine filterHomDup = new VCFFilterHeaderLine("HOM_DUP", "There is one Homozygous duplication.");
metadata.add(filterHomDup);
VCFStandardHeaderLines.addStandardFormatLines(metadata, true, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_FILTER_KEY, VCFConstants.GENOTYPE_QUALITY_KEY);
VCFStandardHeaderLines.addStandardInfoLines(metadata, true, VCFConstants.DEPTH_KEY, VCFConstants.END_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY, VCFConstants.ALLELE_NUMBER_KEY);
final VCFHeader header = new VCFHeader(metadata, sample2bam.keySet());
if (dict != null)
header.setSequenceDictionary(dict);
JVarkitVersion.getInstance().addMetaData(this, header);
final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(dict).logger(LOG).build();
out = this.writingVariantsDelegate.dictionary(dict).open(this.outputFile);
out.writeHeader(header);
final Allele DUP_ALLELE = Allele.create("<DUP>", false);
final Allele DEL_ALLELE = Allele.create("<DEL>", false);
final Allele REF_ALLELE = Allele.create("N", true);
while (iterIn.hasNext()) {
final Locatable ctx = iterIn.next();
if (ctx == null)
continue;
final SAMSequenceRecord ssr = dict.getSequence(ctx.getContig());
if (ssr == null || ctx.getStart() >= ssr.getSequenceLength())
continue;
final int svLen = ctx.getLengthOnReference();
if (svLen < this.min_abs_sv_size)
continue;
if (svLen > this.max_abs_sv_size)
continue;
int n_samples_with_cnv = 0;
final SimplePosition breakPointLeft = new SimplePosition(ctx.getContig(), ctx.getStart());
final SimplePosition breakPointRight = new SimplePosition(ctx.getContig(), ctx.getEnd());
final int extend = 1 + (int) (svLen * this.extendFactor);
final int leftPos = Math.max(1, breakPointLeft.getPosition() - extend);
final int array_mid_start = breakPointLeft.getPosition() - leftPos;
final int array_mid_end = breakPointRight.getPosition() - leftPos;
final int rightPos = Math.min(breakPointRight.getPosition() + extend, ssr.getSequenceLength());
final VariantContextBuilder vcb = new VariantContextBuilder();
vcb.chr(ctx.getContig());
vcb.start(ctx.getStart());
vcb.stop(ctx.getEnd());
vcb.attribute(VCFConstants.END_KEY, ctx.getEnd());
final Set<Allele> alleles = new HashSet<>();
alleles.add(REF_ALLELE);
int count_dup = 0;
int count_del = 0;
int an = 0;
final Counter<Allele> countAlleles = new Counter<>();
final List<Genotype> genotypes = new ArrayList<>(sample2bam.size());
Double badestGQ = null;
final double[] raw_coverage = new double[CoordMath.getLength(leftPos, rightPos)];
for (final String sampleName : sample2bam.keySet()) {
final BamInfo bi = sample2bam.get(sampleName);
Arrays.fill(raw_coverage, 0.0);
int n_reads_supporting_sv = 0;
try (CloseableIterator<SAMRecord> iter2 = bi.samReader.queryOverlapping(ctx.getContig(), leftPos, rightPos)) {
while (iter2.hasNext()) {
final SAMRecord rec = iter2.next();
if (!SAMRecordDefaultFilter.accept(rec, this.min_mapq))
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null || cigar.isEmpty())
continue;
// any clip supporting deletion ?
boolean read_supports_cnv = false;
final int breakpoint_distance = 10;
// any clip on left ?
if (cigar.isLeftClipped() && rec.getUnclippedStart() < rec.getAlignmentStart() && new SimpleInterval(ctx.getContig(), rec.getUnclippedStart(), rec.getAlignmentStart()).withinDistanceOf(breakPointLeft, breakpoint_distance)) {
read_supports_cnv = true;
}
// any clip on right ?
if (!read_supports_cnv && cigar.isRightClipped() && rec.getAlignmentEnd() < rec.getUnclippedEnd() && new SimpleInterval(ctx.getContig(), rec.getAlignmentEnd(), rec.getUnclippedEnd()).withinDistanceOf(breakPointRight, breakpoint_distance)) {
read_supports_cnv = true;
}
if (read_supports_cnv) {
n_reads_supporting_sv++;
}
int ref = rec.getStart();
for (final CigarElement ce : cigar) {
final CigarOperator op = ce.getOperator();
if (op.consumesReferenceBases()) {
if (op.consumesReadBases()) {
for (int x = 0; x < ce.getLength() && ref + x - leftPos < raw_coverage.length; ++x) {
final int p = ref + x - leftPos;
if (p < 0 || p >= raw_coverage.length)
continue;
raw_coverage[p]++;
}
}
ref += ce.getLength();
}
}
}
// end while iter record
}
// end try query for iterator
// test for great difference between DP left and DP right
final OptionalDouble medianDepthLeft = Percentile.median().evaluate(raw_coverage, 0, array_mid_start);
final OptionalDouble medianDepthRight = Percentile.median().evaluate(raw_coverage, array_mid_end, raw_coverage.length - array_mid_end);
// any is just too low
if (!medianDepthLeft.isPresent() || medianDepthLeft.getAsDouble() < this.min_depth || !medianDepthRight.isPresent() || medianDepthRight.getAsDouble() < this.min_depth) {
final Genotype gt2 = new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).filter("LowDp").make();
genotypes.add(gt2);
continue;
}
final double difference_factor = 2.0;
// even if a value is divided , it remains greater than the other size
if (medianDepthLeft.getAsDouble() / difference_factor > medianDepthRight.getAsDouble() || medianDepthRight.getAsDouble() / difference_factor > medianDepthLeft.getAsDouble()) {
final Genotype gt2 = new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).filter("DiffLR").make();
genotypes.add(gt2);
continue;
}
// run median to smooth spline
final double[] smoothed_cov = new RunMedian(RunMedian.getTurlachSize(raw_coverage.length)).apply(raw_coverage);
final double[] bounds_cov = IntStream.concat(IntStream.range(0, array_mid_start), IntStream.range(array_mid_end, smoothed_cov.length)).mapToDouble(IDX -> raw_coverage[IDX]).toArray();
final OptionalDouble optMedianBound = Percentile.median().evaluate(bounds_cov);
if (!optMedianBound.isPresent() || optMedianBound.getAsDouble() == 0) {
final Genotype gt2 = new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).filter("MedZero").make();
genotypes.add(gt2);
continue;
}
final double medianBound = optMedianBound.getAsDouble();
// divide coverage per medianBound
final double[] normalized_mid_coverage = new double[array_mid_end - array_mid_start];
for (int i = 0; i < normalized_mid_coverage.length; ++i) {
normalized_mid_coverage[i] = smoothed_cov[array_mid_start + i] / medianBound;
}
final double normDepth = Percentile.median().evaluate(normalized_mid_coverage).getAsDouble();
final boolean is_sv;
final boolean is_hom_deletion = Math.abs(normDepth - 0.0) <= this.treshold;
final boolean is_het_deletion = Math.abs(normDepth - 0.5) <= this.treshold || (!is_hom_deletion && normDepth <= 0.5);
final boolean is_hom_dup = Math.abs(normDepth - 2.0) <= this.treshold || normDepth > 2.0;
final boolean is_het_dup = Math.abs(normDepth - 1.5) <= this.treshold || (!is_hom_dup && normDepth >= 1.5);
final boolean is_ref = Math.abs(normDepth - 1.0) <= this.treshold;
final double theoritical_depth;
final GenotypeBuilder gb;
if (is_ref) {
gb = new GenotypeBuilder(sampleName, Arrays.asList(REF_ALLELE, REF_ALLELE));
is_sv = false;
theoritical_depth = 1.0;
an += 2;
} else if (is_het_deletion) {
gb = new GenotypeBuilder(sampleName, Arrays.asList(REF_ALLELE, DEL_ALLELE));
alleles.add(DEL_ALLELE);
is_sv = true;
theoritical_depth = 0.5;
count_del++;
an += 2;
countAlleles.incr(DEL_ALLELE);
} else if (is_hom_deletion) {
gb = new GenotypeBuilder(sampleName, Arrays.asList(DEL_ALLELE, DEL_ALLELE));
alleles.add(DEL_ALLELE);
vcb.filter(filterHomDel.getID());
is_sv = true;
theoritical_depth = 0.0;
count_del++;
an += 2;
countAlleles.incr(DEL_ALLELE, 2);
} else if (is_het_dup) {
gb = new GenotypeBuilder(sampleName, Arrays.asList(REF_ALLELE, DUP_ALLELE));
alleles.add(DUP_ALLELE);
is_sv = true;
theoritical_depth = 1.5;
count_dup++;
an += 2;
countAlleles.incr(DUP_ALLELE);
} else if (is_hom_dup) {
gb = new GenotypeBuilder(sampleName, Arrays.asList(DUP_ALLELE, DUP_ALLELE));
alleles.add(DUP_ALLELE);
vcb.filter(filterHomDup.getID());
is_sv = true;
theoritical_depth = 2.0;
count_dup++;
an += 2;
countAlleles.incr(DUP_ALLELE, 2);
} else {
gb = new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).filter("Ambigous");
is_sv = false;
theoritical_depth = 1.0;
}
if (is_sv) {
n_samples_with_cnv++;
}
double gq = Math.abs(theoritical_depth - normDepth);
gq = Math.min(0.5, gq);
gq = gq * gq;
gq = gq / 0.25;
gq = 99 * (1.0 - gq);
gb.GQ((int) gq);
if (badestGQ == null || badestGQ.compareTo(gq) > 0) {
badestGQ = gq;
}
gb.attribute(formatCN.getID(), normDepth);
gb.attribute(nReadsSupportingSv.getID(), n_reads_supporting_sv);
genotypes.add(gb.make());
}
vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, an);
final List<Allele> orderedAlleles = new ArrayList<>(alleles);
Collections.sort(orderedAlleles);
if (orderedAlleles.size() > 1) {
final List<Integer> acL = new ArrayList<>();
final List<Double> afL = new ArrayList<>();
for (int i = 1; i < orderedAlleles.size(); i++) {
final Allele a = orderedAlleles.get(i);
final int c = (int) countAlleles.count(a);
acL.add(c);
if (an > 0)
afL.add(c / (double) an);
}
vcb.attribute(VCFConstants.ALLELE_COUNT_KEY, acL);
if (an > 0)
vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, afL);
}
// if(alleles.size()<=1) continue;
vcb.alleles(orderedAlleles);
vcb.noID();
vcb.genotypes(genotypes);
vcb.attribute(infoSVSamples.getID(), n_samples_with_cnv);
vcb.attribute(infoSvLen.getID(), svLen);
if (count_dup == sample2bam.size() && sample2bam.size() != 1) {
vcb.filter(filterAllDup.getID());
}
if (count_del == sample2bam.size() && sample2bam.size() != 1) {
vcb.filter(filterAllDel.getID());
}
if (n_samples_with_cnv == 0) {
vcb.filter(filterNoSV.getID());
}
if (badestGQ != null) {
vcb.log10PError(badestGQ / -10.0);
}
out.add(vcb.make());
}
progress.close();
out.close();
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iterIn);
CloserUtil.close(out);
sample2bam.values().forEach(F -> CloserUtil.close(F));
}
}
Aggregations