use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class VCFBedSetFilter method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VCFIterator r, final VariantContextWriter w) {
final Set<String> contigs_not_found = new HashSet<>();
final VCFHeader h2 = new VCFHeader(r.getHeader());
final VCFFilterHeaderLine filter;
if (!StringUtil.isBlank(this.filterName)) {
filter = new VCFFilterHeaderLine(this.filterName, "Variant " + (this.tabixBlackFile != null ? " overlapping any bed record of " : " that do not overlap any bed record of ") + tabixFile);
h2.addMetaDataLine(filter);
} else {
filter = null;
}
JVarkitVersion.getInstance().addMetaData(this, h2);
final ContigNameConverter ctgNameConverter;
if (this.bedReader != null) {
ctgNameConverter = ContigNameConverter.fromContigSet(this.bedReader.getContigs());
} else {
ctgNameConverter = ContigNameConverter.fromIntervalTreeMap(this.intervalTreeMap);
}
final SAMSequenceDictionary dict = r.getHeader().getSequenceDictionary();
w.writeHeader(h2);
while (r.hasNext()) {
final VariantContext ctx = r.next();
boolean set_filter = false;
final String convert_contig = ctgNameConverter.apply(ctx.getContig());
final int ctx_start = Math.max(1, ctx.getStart() - this.extend_bases);
final int ctx_end;
if (dict == null) {
ctx_end = ctx.getEnd() + this.extend_bases;
} else {
final SAMSequenceRecord ssr = dict.getSequence(ctx.getContig());
if (ssr == null)
throw new JvarkitException.ContigNotFoundInDictionary(ctx.getContig(), dict);
ctx_end = Math.min(ssr.getSequenceLength(), ctx.getEnd() + this.extend_bases);
}
if (StringUtil.isBlank(convert_contig)) {
if (debug)
LOG.warn("Cannot convert contig " + ctx.getContig());
if (contigs_not_found.size() < 100) {
if (contigs_not_found.add(ctx.getContig())) {
LOG.warn("Cannot convert variant contig " + ctx.getContig() + " to bed file. (Contig is not in BED file)");
}
}
set_filter = false;
} else if (this.intervalTreeMap != null) {
if (this.intervalTreeMap.getOverlapping(new SimpleInterval(convert_contig, ctx_start, ctx_end)).stream().anyMatch(LOC -> testOverlap(ctx_start, ctx_end, LOC))) {
if (debug)
LOG.warn("treemap.overlap=true set Filter=true for " + ctx.getContig() + ":" + ctx.getStart());
set_filter = true;
}
} else {
try (CloseableIterator<BedLine> iter = this.bedReader.iterator(convert_contig, ctx_start - 1, ctx_end + 1)) {
while (iter.hasNext()) {
final BedLine bed = iter.next();
if (bed == null)
continue;
if (!testOverlap(ctx_start, ctx_end, bed))
continue;
if (debug)
LOG.warn("tabix=true set Filter=true for " + ctx.getContig() + ":" + ctx.getStart());
set_filter = true;
break;
}
} catch (IOException err) {
throw new RuntimeIOException(err);
}
}
/* variant is in whitelist, we want to keep it */
if (this.tabixWhiteFile != null) {
set_filter = !set_filter;
if (debug)
LOG.warn("inverse. Now Filter=" + set_filter + " for " + ctx.getContig() + ":" + ctx.getStart());
}
if (!set_filter) {
if (debug)
LOG.warn("no filter. Writing " + ctx.getContig() + ":" + ctx.getStart());
w.add(ctx);
continue;
}
if (filter != null) {
if (debug)
LOG.warn("adding filter. Writing " + ctx.getContig() + ":" + ctx.getStart());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.filter(filter.getID());
w.add(vcb.make());
} else {
if (debug)
LOG.warn("Ignoring " + ctx.getContig() + ":" + ctx.getStart());
}
}
if (!contigs_not_found.isEmpty()) {
LOG.warn("The following contigs were not found: " + String.join(" ", contigs_not_found) + "...");
}
return 0;
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class MantaMerger method doWork.
@Override
public int doWork(final List<String> args) {
VariantContextWriter out = null;
try {
final Map<String, VcfInput> sample2inputs = new TreeMap<>();
SAMSequenceDictionary dict = null;
final List<String> lines;
if (args.size() == 1 && args.get(0).endsWith(".list")) {
lines = Files.lines(Paths.get(args.get(0))).filter(L -> !(StringUtils.isBlank(L) || L.startsWith("#"))).collect(Collectors.toList());
} else {
lines = args;
}
for (final String line : lines) {
final String[] tokens = CharSplitter.TAB.split(line);
final VcfInput vcfInput = new VcfInput();
vcfInput.vcfPath = Paths.get(tokens[0]);
IOUtil.assertFileIsReadable(vcfInput.vcfPath);
final SAMSequenceDictionary dict1 = SequenceDictionaryUtils.extractRequired(vcfInput.vcfPath);
if (dict == null) {
dict = dict1;
} else if (!SequenceUtil.areSequenceDictionariesEqual(dict, dict1)) {
throw new JvarkitException.DictionariesAreNotTheSame(dict1, dict);
}
if (tokens.length < 2 || StringUtils.isBlank(tokens[1])) {
try (VCFReader r = VCFReaderFactory.makeDefault().open(vcfInput.vcfPath, false)) {
List<String> snl = r.getHeader().getSampleNamesInOrder();
if (snl.size() == 1) {
vcfInput.sample = snl.get(0);
} else {
vcfInput.sample = vcfInput.vcfPath.toString();
}
}
} else {
vcfInput.sample = tokens[1];
}
if (sample2inputs.containsKey(vcfInput.sample)) {
LOG.error("duplicate sample " + vcfInput.sample);
return -1;
}
sample2inputs.put(vcfInput.sample, vcfInput);
}
if (sample2inputs.isEmpty()) {
LOG.error("no input found");
return -1;
}
if (!StringUtils.isBlank(this.limitContig) && dict.getSequence(this.limitContig) == null) {
LOG.error(JvarkitException.ContigNotFoundInDictionary.getMessage(this.limitContig, dict));
return -1;
}
final Predicate<VariantContext> bedPredicate;
if (this.excludeBedPath != null) {
final BedLineCodec codec = new BedLineCodec();
final ContigNameConverter converter = ContigNameConverter.fromOneDictionary(dict);
final IntervalTreeMap<Boolean> map = new IntervalTreeMap<>();
try (BufferedReader br = IOUtils.openPathForBufferedReading(this.excludeBedPath)) {
br.lines().filter(L -> !BedLine.isBedHeader(L)).map(L -> codec.decode(L)).filter(L -> L != null).filter(B -> !StringUtils.isBlank(converter.apply(B.getContig()))).map(B -> new Interval(converter.apply(B.getContig()), B.getStart(), B.getEnd())).forEach(R -> map.put(R, true));
}
bedPredicate = V -> !map.containsOverlapping(V);
} else {
bedPredicate = V -> true;
}
final Set<VCFHeaderLine> metaData = new HashSet<>();
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY, true));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY, true));
metaData.add(new VCFInfoHeaderLine(VCFConstants.SVTYPE, 1, VCFHeaderLineType.String, "Variation type"));
final VCFInfoHeaderLine infoSvLen = new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "Variation length");
metaData.add(infoSvLen);
final VCFInfoHeaderLine infoNSamples = new VCFInfoHeaderLine("NSAMPLES", 1, VCFHeaderLineType.Integer, "Number of samples");
metaData.add(infoNSamples);
final VCFInfoHeaderLine infoSamples = new VCFInfoHeaderLine("SAMPLES", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "amples");
metaData.add(infoSamples);
final VCFFilterHeaderLine filterSameNext = new VCFFilterHeaderLine("NEXT", "next variant in VCF is the same.");
metaData.add(filterSameNext);
final VCFFilterHeaderLine filterSamePrev = new VCFFilterHeaderLine("PREV", "next variant in VCF is the same.");
metaData.add(filterSamePrev);
final VCFHeader header = new VCFHeader(metaData, sample2inputs.keySet());
header.setSequenceDictionary(dict);
JVarkitVersion.getInstance().addMetaData(this, header);
out = VCFUtils.createVariantContextWriterToPath(this.outputFile);
out.writeHeader(header);
final Decoy decoy = Decoy.getDefaultInstance();
for (final SAMSequenceRecord ssr : dict.getSequences()) {
if (!StringUtils.isBlank(this.limitContig)) {
if (!ssr.getSequenceName().equals(this.limitContig))
continue;
}
LOG.info("contig " + ssr.getSequenceName());
if (decoy.isDecoy(ssr.getSequenceName()))
continue;
final Map<SVKey, Set<String>> variants2samples = new HashMap<>();
for (final VcfInput vcfinput : sample2inputs.values()) {
// reset count for this contig
vcfinput.contigCount = 0;
try (VCFReader vcfFileReader = VCFReaderFactory.makeDefault().open(vcfinput.vcfPath, true)) {
vcfFileReader.query(ssr.getSequenceName(), 1, ssr.getSequenceLength()).stream().filter(V -> discard_bnd == false || !V.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND")).filter(bedPredicate).map(V -> new VariantContextBuilder(V).unfiltered().noID().noGenotypes().rmAttribute("EVENT").rmAttribute("HOMSEQ").rmAttribute("HOMLEN").rmAttribute("SVINSSEQ").rmAttribute("SVINSLEN").rmAttribute("MATEID").rmAttribute("LEFT_SVINSSEQ").rmAttribute("RIGHT_SVINSSEQ").rmAttribute("BND_DEPTH").rmAttribute("MATE_BND_DEPTH").rmAttribute("JUNCTION_QUAL").rmAttribute("CIGAR").make()).forEach(V -> {
final SVKey key1 = new SVKey(V);
if (!svComparator.test(V, V))
throw new RuntimeException("compare to self failed ! " + V);
variants2samples.put(key1, new HashSet<>());
vcfinput.contigCount++;
});
}
}
if (variants2samples.isEmpty())
continue;
// build an interval tree for a faster access
final IntervalTree<SVKey> intervalTree = new IntervalTree<>();
for (final SVKey key : variants2samples.keySet()) {
final SimpleInterval r = new SimpleInterval(key.archetype).extend(this.svComparator.getBndDistance() + 1);
intervalTree.put(r.getStart(), r.getEnd(), key);
// paranoidcheck interval is ok to find current archetype
boolean found = false;
final Iterator<IntervalTree.Node<SVKey>> nodeIter = intervalTree.overlappers(r.getStart(), r.getEnd());
while (nodeIter.hasNext()) {
final SVKey key1 = nodeIter.next().getValue();
if (this.svComparator.test(key1.archetype, key.archetype)) {
found = true;
break;
}
}
if (!found) {
out.close();
throw new RuntimeException("cannot find self " + key.archetype + " in " + r);
}
}
for (final VcfInput vcfinput : sample2inputs.values()) {
try (VCFReader vcfFileReader = VCFReaderFactory.makeDefault().open(vcfinput.vcfPath, true)) {
final CloseableIterator<VariantContext> iter = vcfFileReader.query(ssr.getSequenceName(), 1, ssr.getSequenceLength());
while (iter.hasNext()) {
final VariantContext ctx = iter.next();
if (this.discard_bnd && ctx.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND"))
continue;
if (!bedPredicate.test(ctx))
continue;
final SimpleInterval r = new SimpleInterval(ctx).extend(this.svComparator.getBndDistance() + 1);
final Iterator<IntervalTree.Node<SVKey>> nodeIter = intervalTree.overlappers(r.getStart(), r.getEnd());
while (nodeIter.hasNext()) {
final SVKey key1 = nodeIter.next().getValue();
if (!this.svComparator.test(key1.archetype, ctx))
continue;
final Set<String> samples = variants2samples.get(key1);
samples.add(vcfinput.sample);
}
}
iter.close();
}
}
final Comparator<VariantContext> sorter = new ContigDictComparator(dict).createLocatableComparator();
final List<SVKey> orderedKeys = variants2samples.keySet().stream().filter(// no samples for this key ??!
K -> !variants2samples.get(K).isEmpty()).sorted((A, B) -> sorter.compare(A.archetype, B.archetype)).collect(Collectors.toCollection(ArrayList::new));
// remove duplicates
int i = 0;
while (i + 1 < orderedKeys.size()) {
final SVKey key1 = orderedKeys.get(i);
final SVKey key2 = orderedKeys.get(i + 1);
if (svComparator.test(key1.archetype, key2.archetype) && // same samples
variants2samples.get(key1).equals(variants2samples.get(key2))) {
orderedKeys.remove(i + 1);
} else {
i++;
}
}
for (int key_index = 0; key_index < orderedKeys.size(); key_index++) {
final SVKey key = orderedKeys.get(key_index);
final Set<String> samples = variants2samples.get(key);
final Allele refAllele = key.archetype.getReference();
final Allele altAllele = Allele.create("<SV>", false);
final Object svType = key.archetype.getAttribute(VCFConstants.SVTYPE, ".");
final VariantContextBuilder vcb = new VariantContextBuilder();
vcb.chr(key.archetype.getContig());
vcb.start(key.archetype.getStart());
vcb.stop(key.archetype.getEnd());
vcb.log10PError(key.archetype.getLog10PError());
vcb.alleles(Arrays.asList(refAllele, altAllele));
vcb.attribute(VCFConstants.END_KEY, key.archetype.getEnd());
vcb.attribute(VCFConstants.SVTYPE, svType);
vcb.attribute(infoSvLen.getID(), (svType.equals("DEL") ? -1 : 1) * key.archetype.getLengthOnReference());
vcb.attribute(infoNSamples.getID(), samples.size());
vcb.attribute(infoSamples.getID(), samples.stream().sorted().collect(Collectors.toList()));
int ac = 0;
final List<Genotype> genotypes = new ArrayList<>(sample2inputs.size());
for (final String sn : sample2inputs.keySet()) {
List<Allele> gta;
if (samples.contains(sn)) {
ac++;
gta = Arrays.asList(refAllele, altAllele);
} else {
gta = Arrays.asList(refAllele, refAllele);
}
genotypes.add(new GenotypeBuilder(sn, gta).make());
}
vcb.attribute(VCFConstants.ALLELE_COUNT_KEY, ac);
vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, sample2inputs.size() * 2);
if (ac > 0) {
vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, ac / (float) sample2inputs.size() * 2);
}
if (key_index > 0 && svComparator.test(key.archetype, orderedKeys.get(key_index - 1).archetype)) {
vcb.filter(filterSamePrev.getID());
}
if (key_index + 1 < orderedKeys.size() && svComparator.test(key.archetype, orderedKeys.get(key_index + 1).archetype)) {
System.err.println("SAME\n" + key.archetype + "\n" + orderedKeys.get(key_index + 1).archetype);
vcb.filter(filterSameNext.getID());
}
vcb.genotypes(genotypes);
out.add(vcb.make());
}
}
out.close();
out = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(out);
}
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class GridssPostProcessVcf method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VCFIterator iterin, VariantContextWriter out) {
SortingCollection<VariantContext> sorter1 = null;
SortingCollection<VariantContext> sorter2 = null;
PrintWriter debug = null;
try {
debug = this.debugFile == null ? new PrintWriter(new NullOuputStream()) : new PrintWriter(Files.newBufferedWriter(this.debugFile));
final VCFHeader header = iterin.getHeader();
LOG.info("reading input.");
final Comparator<VariantContext> comparator = (A, B) -> A.getAttributeAsString(EVENT_KEY, "").compareTo(B.getAttributeAsString(EVENT_KEY, ""));
sorter1 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), comparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
while (iterin.hasNext()) {
final VariantContext ctx = iterin.next();
if (!ctx.hasAttribute(EVENT_KEY)) {
LOG.warn("skipping variant without INFO/" + EVENT_KEY + " at " + toString(ctx));
continue;
}
sorter1.add(ctx);
}
sorter1.doneAdding();
sorter1.setDestructiveIteration(true);
LOG.info("done adding");
CloseableIterator<VariantContext> iter2 = sorter1.iterator();
@SuppressWarnings("resource") final EqualRangeIterator<VariantContext> equal_range = new EqualRangeIterator<>(iter2, comparator);
sorter2 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), header.getVCFRecordComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
while (equal_range.hasNext()) {
final List<VariantContext> variants = equal_range.next();
if (variants.size() == 1) {
final VariantContext ctx = variants.get(0);
final Allele alt = ctx.getAlleles().get(1);
// INSERTION LIKE. chr22:1234 A/.AAAACAAGGAG
if (isATGC(ctx.getReference()) && isATGC(alt) && length(ctx.getReference()) < length(alt)) {
final VariantContextBuilder vcb1 = new VariantContextBuilder(ctx);
vcb1.attribute(VCFConstants.SVTYPE, "INS");
vcb1.attribute("SVLEN", length(alt) - length(ctx.getReference()));
sorter2.add(vcb1.make());
} else // STRANGE ? no ? change
if (isATGC(ctx.getReference()) && isATGC(alt) && length(ctx.getReference()) == 1 && length(alt) == 1) {
sorter2.add(ctx);
} else {
sorter2.add(ctx);
debug.println("ALONE " + toString(ctx));
}
} else if (variants.size() != 2) {
for (final VariantContext ctx : variants) {
debug.println("SIZE>2 " + toString(ctx));
sorter2.add(ctx);
}
} else {
final VariantContext ctx1 = variants.get(0);
final VariantContext ctx2 = variants.get(1);
final Breakend brk1 = Breakend.parse(ctx1.getAlleles().get(1)).orElse(null);
final Breakend brk2 = Breakend.parse(ctx2.getAlleles().get(1)).orElse(null);
if (brk1 == null || brk2 == null) {
debug.println("expected two bnd " + toString(ctx1) + " " + toString(ctx2));
sorter2.add(ctx1);
sorter2.add(ctx2);
return -1;
// continue;
}
/* should be the same breakend */
if (!ctx1.getContig().equals(brk2.getContig()) || !ctx2.getContig().equals(brk1.getContig()) || ctx1.getStart() != brk2.getStart() || ctx2.getStart() != brk1.getStart()) {
debug.println("expected concordant bnd " + toString(ctx1) + " " + toString(ctx2));
sorter2.add(ctx1);
sorter2.add(ctx2);
return -1;
// continue;
}
final VariantContextBuilder vcb1 = new VariantContextBuilder(ctx1);
final VariantContextBuilder vcb2 = new VariantContextBuilder(ctx2);
if (!ctx1.contigsMatch(ctx2)) {
vcb1.attribute(VCFConstants.SVTYPE, "CTX");
vcb2.attribute(VCFConstants.SVTYPE, "CTX");
sorter2.add(vcb1.make());
sorter2.add(vcb2.make());
continue;
}
if (ctx1.getStart() > ctx2.getStart()) {
debug.println("CTX1>CTX2?" + toString(ctx1) + " " + toString(ctx2));
sorter2.add(vcb1.make());
sorter2.add(vcb2.make());
continue;
}
final BndType bndType1 = getBndType(brk1);
final BndType bndType2 = getBndType(brk2);
if (bndType1.equals(BndType.DNA_OPEN) && bndType2.equals(BndType.CLOSE_DNA)) {
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<DEL>", false));
vcb1.attribute(VCFConstants.SVTYPE, "DEL");
vcb1.alleles(alleles);
vcb1.attribute("SVLEN", ctx1.getEnd() - ctx2.getStart() + 1);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
} else if (bndType1.equals(BndType.CLOSE_DNA) && bndType2.equals(BndType.DNA_OPEN)) {
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<DUP>", false));
vcb1.attribute(VCFConstants.SVTYPE, "DUP");
vcb1.alleles(alleles);
vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
} else if (bndType1.equals(BndType.OPEN_DNA) && bndType2.equals(BndType.OPEN_DNA)) {
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<INV>", false));
vcb1.attribute(VCFConstants.SVTYPE, "INV");
vcb1.alleles(alleles);
vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
} else if (bndType1.equals(BndType.DNA_CLOSE) && bndType2.equals(BndType.DNA_CLOSE)) {
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<SV>", false));
vcb1.attribute(VCFConstants.SVTYPE, "SV");
vcb1.alleles(alleles);
vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
} else {
debug.println("How to handle " + toString(ctx1) + " " + toString(ctx2));
sorter2.add(ctx1);
sorter2.add(ctx2);
}
}
}
equal_range.close();
iter2.close();
sorter1.cleanup();
sorter1 = null;
sorter2.doneAdding();
sorter2.setDestructiveIteration(true);
iter2 = sorter2.iterator();
final VCFHeader header2 = new VCFHeader(header);
JVarkitVersion.getInstance().addMetaData(this, header2);
final VCFInfoHeaderLine originalAlt = new VCFInfoHeaderLine("BND", 1, VCFHeaderLineType.String, "Original ALT allele.");
header.addMetaDataLine(originalAlt);
header.addMetaDataLine(new VCFInfoHeaderLine(originalAlt.getID(), 1, VCFHeaderLineType.Integer, "SV length"));
header.addMetaDataLine(new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "SV length"));
header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
out.writeHeader(header2);
while (iter2.hasNext()) {
out.add(iter2.next());
}
iter2.close();
sorter2.cleanup();
sorter2 = null;
debug.flush();
debug.close();
debug = null;
return 0;
} catch (final Throwable err) {
err.printStackTrace();
LOG.error(err);
return -1;
} finally {
if (sorter1 != null)
sorter1.cleanup();
if (sorter2 != null)
sorter2.cleanup();
if (debug != null)
try {
debug.close();
} catch (final Throwable err2) {
}
}
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class Bam2Wig method doWork.
@Override
public int doWork(final List<String> args) {
if (this.win_shift <= 0) {
LOG.error("window shift<=0");
return -1;
}
if (this.window_span <= 0) {
LOG.error("window size<=0");
return -1;
}
final SimpleInterval interval;
PrintWriter pw = null;
CloseableIterator<SAMRecord> samRecordIterator = null;
final List<SamReader> samReaders = new ArrayList<>();
final List<CloseableIterator<SAMRecord>> merginIterators = new ArrayList<>();
try {
final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(htsjdk.samtools.ValidationStringency.LENIENT);
if (args.isEmpty()) {
if (!StringUtil.isBlank(region_str)) {
LOG.error("cannot specify region for stdin");
return -1;
}
interval = null;
samReaders.add(srf.open(SamInputResource.of(stdin())));
samRecordIterator = samReaders.get(0).iterator();
} else if (args.size() == 1 && !args.get(0).endsWith(".list")) {
samReaders.add(srf.open(SamInputResource.of(new File(args.get(0)))));
if (StringUtil.isBlank(this.region_str)) {
samRecordIterator = samReaders.get(0).iterator();
interval = null;
} else {
interval = IntervalParserFactory.newInstance().dictionary(samReaders.get(0).getFileHeader().getSequenceDictionary()).enableWholeContig().make().apply(region_str).orElseThrow(IntervalParserFactory.exception(this.region_str));
samRecordIterator = samReaders.get(0).query(interval.getContig(), interval.getStart(), interval.getEnd(), false);
}
} else {
final List<Path> samFiles = IOUtils.unrollPaths(args);
if (samFiles.isEmpty()) {
LOG.error("No Input SAM file");
return -1;
}
final SAMSequenceDictionary dict0 = SequenceDictionaryUtils.extractRequired(samFiles.get(0));
samFiles.stream().forEach(F -> {
final SAMSequenceDictionary dicti = SequenceDictionaryUtils.extractRequired(F);
if (!SequenceUtil.areSequenceDictionariesEqual(dicti, dict0)) {
throw new JvarkitException.DictionariesAreNotTheSame(dict0, dicti);
}
});
for (final Path bamFile : samFiles) {
LOG.info("opening " + bamFile);
samReaders.add(srf.open(bamFile));
}
final SamFileHeaderMerger mergedheader = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, samReaders.stream().map(SR -> SR.getFileHeader()).collect(Collectors.toList()), false);
final Map<SamReader, CloseableIterator<SAMRecord>> reader2iter = new HashMap<>();
if (StringUtil.isBlank(this.region_str)) {
for (final SamReader sr : samReaders) {
reader2iter.put(sr, sr.iterator());
}
interval = null;
} else {
interval = IntervalParserFactory.newInstance().dictionary(dict0).enableWholeContig().make().apply(region_str).orElseThrow(IntervalParserFactory.exception(region_str));
LOG.info("interval :" + interval);
for (final SamReader sr : samReaders) {
reader2iter.put(sr, sr.query(interval.getContig(), interval.getStart(), interval.getEnd(), false));
}
}
merginIterators.addAll(reader2iter.values());
samRecordIterator = new MergingSamRecordIterator(mergedheader, reader2iter, true);
}
for (final SamReader sr : samReaders) {
if (sr.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
LOG.error("one of your bam input is not sorted on coordinate");
return -1;
}
}
pw = openPathOrStdoutAsPrintWriter(this.outputFile);
run(pw, samRecordIterator, samReaders.get(0).getFileHeader().getSequenceDictionary(), interval);
samRecordIterator.close();
samRecordIterator = null;
CloserUtil.close(samReaders);
samReaders.clear();
pw.flush();
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(merginIterators);
CloserUtil.close(samRecordIterator);
CloserUtil.close(samReaders);
CloserUtil.close(pw);
pw = null;
}
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class BamAlleleBalance method doWork.
@Override
public int doWork(List<String> args) {
try {
final List<Path> bamPaths = IOUtils.unrollPaths(args);
if (bamPaths.isEmpty()) {
LOG.error("Bam list is empty");
return -1;
}
final List<Snp> snps = new ArrayList<>(10_000);
try (VCFReader vr = VCFReaderFactory.makeDefault().open(this.variantsPath, false)) {
try (CloseableIterator<VariantContext> iter = vr.iterator()) {
while (iter.hasNext()) {
final VariantContext ctx = iter.next();
if (!ctx.isBiallelic() || ctx.isFiltered() || !ctx.isSNP())
continue;
snps.add(new Snp(ctx));
}
}
}
if (snps.isEmpty()) {
LOG.error("snp list is empty");
return -1;
}
LOG.info(String.valueOf(snps.size()) + " snp(s)");
final Map<String, Counter<RangeOfDoubles.Range>> sample2count = new HashMap<>(bamPaths.size());
final SamReaderFactory srf = super.createSamReaderFactory();
if (this.faidx != null)
srf.referenceSequence(this.faidx);
for (final Path bamPath : bamPaths) {
try (SamReader samReader = srf.open(bamPath)) {
final String defaultPartition = IOUtils.getFilenameWithoutCommonSuffixes(bamPath);
final SAMFileHeader header = samReader.getFileHeader();
final Set<String> samples = header.getReadGroups().stream().map(RG -> this.groupBy.apply(RG, defaultPartition)).collect(Collectors.toSet());
samples.stream().forEach(SN -> sample2count.putIfAbsent(SN, new Counter<>()));
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
final ContigNameConverter converter = ContigNameConverter.fromOneDictionary(dict);
for (final Snp snp : snps) {
final String bamContig = converter.apply(snp.contigSrc);
if (StringUtils.isBlank(bamContig))
continue;
final Map<String, SiteInfo> sample2siteinfo = new HashMap<>(samples.size());
for (final String sn : samples) {
sample2siteinfo.put(sn, new SiteInfo());
}
try (CloseableIterator<SAMRecord> iter = samReader.queryOverlapping(bamContig, snp.pos1, snp.pos1)) {
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (!SAMRecordDefaultFilter.accept(rec, this.mapq))
continue;
final SAMReadGroupRecord rg = rec.getReadGroup();
if (rg == null)
continue;
String sample = this.groupBy.apply(rg, defaultPartition);
if (StringUtils.isBlank(sample) || !sample2siteinfo.containsKey(sample))
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null || cigar.isEmpty())
continue;
byte[] bases = rec.getReadBases();
if (bases == null || bases == SAMRecord.NULL_SEQUENCE)
continue;
final int readpos1 = rec.getReadPositionAtReferencePosition(snp.pos1);
if (readpos1 < 1)
continue;
final int readpos0 = readpos1 - 1;
if (readpos0 < 0 || readpos0 >= bases.length)
continue;
final char base = (char) Character.toUpperCase(bases[readpos0]);
final SiteInfo si = sample2siteinfo.get(sample);
if (si == null)
continue;
if (base == snp.ref) {
si.countRef++;
} else if (base == snp.alt) {
si.countAlt++;
}
}
for (final String sample : sample2siteinfo.keySet()) {
final SiteInfo si = sample2siteinfo.get(sample);
final int depth = si.countAlt + si.countRef;
if (depth < this.min_depth)
continue;
// must be het
if (si.countAlt == 0 || si.countRef == 0)
continue;
final double ratio = si.countAlt / (double) depth;
final RangeOfDoubles.Range range = this.ratios.getRange(ratio);
sample2count.get(sample).incr(range);
}
}
// end of iter
}
// end of loop over snps
}
}
// print report
try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
final List<String> sortedSamples = sample2count.keySet().stream().sorted((A, B) -> Long.compare(sample2count.get(A).getTotal(), sample2count.get(B).getTotal())).collect(Collectors.toList());
pw.print("RANGE");
for (final String sn : sortedSamples) {
pw.print("\t");
pw.print(sn);
}
pw.println();
for (final RangeOfDoubles.Range r : this.ratios.getRanges()) {
pw.print(r.toString());
for (final String sn : sortedSamples) {
pw.print("\t");
pw.print(sample2count.get(sn).count(r));
}
pw.println();
}
pw.flush();
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
}
Aggregations