use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class LumpyMoreSamples method doWork.
@Override
public int doWork(final List<String> args) {
VCFIterator r = null;
VariantContextWriter vcw = null;
final Map<String, SamReader> sample2samreaders = new HashMap<>();
try {
r = super.openVCFIterator(oneFileOrNull(args));
final VCFHeader headerIn = r.getHeader();
final SAMSequenceDictionary dict = headerIn.getSequenceDictionary();
if (dict == null) {
LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("input vcf"));
return -1;
}
final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
IOUtil.slurpLines(this.bamFileList).stream().forEach(F -> {
if (F.trim().isEmpty())
return;
final SamReader sr = samReaderFactory.open(SamInputResource.of(F));
final SAMFileHeader samHeader = sr.getFileHeader();
final SAMSequenceDictionary dict2 = samHeader.getSequenceDictionary();
if (dict2 == null) {
throw new JvarkitException.BamDictionaryMissing(F);
}
if (!SequenceUtil.areSequenceDictionariesEqual(dict, dict2)) {
throw new JvarkitException.DictionariesAreNotTheSame(dict, dict2);
}
for (final SAMReadGroupRecord rg : samHeader.getReadGroups()) {
final String sample = rg.getSample();
if (StringUtil.isBlank(sample))
continue;
final SamReader reader = sample2samreaders.get(sample);
if (reader == null) {
sample2samreaders.put(sample, reader);
} else if (reader == sr) {
continue;
} else {
throw new JvarkitException.UserError("more than one sample per bam:" + F);
}
}
});
final Set<String> inVcfSampleNames = new HashSet<>(headerIn.getSampleNamesInOrder());
final Set<String> outVcfSampleNames = new HashSet<>(inVcfSampleNames);
outVcfSampleNames.addAll(sample2samreaders.keySet());
final VCFHeader headerOut = new VCFHeader(headerIn.getMetaDataInInputOrder(), outVcfSampleNames);
final VCFFormatHeaderLine SU2 = new VCFFormatHeaderLine("SU2", 1, VCFHeaderLineType.Integer, "Number of pieces of evidence supporting the variant");
final VCFFormatHeaderLine PE2 = new VCFFormatHeaderLine("PE2", 1, VCFHeaderLineType.Integer, "Number of split reads supporting the variant");
final VCFFormatHeaderLine SR2 = new VCFFormatHeaderLine("SR2", 1, VCFHeaderLineType.Integer, "Number of paired-end reads supporting the variant");
headerOut.addMetaDataLine(SU2);
headerOut.addMetaDataLine(PE2);
headerOut.addMetaDataLine(SR2);
vcw = super.openVariantContextWriter(this.outputFile);
vcw.writeHeader(headerOut);
while (r.hasNext()) {
final VariantContext ctx = r.next();
final StructuralVariantType sttype = ctx.getStructuralVariantType();
if (sttype == null)
continue;
final int tid = dict.getSequenceIndex(ctx.getContig());
final Map<String, Genotype> genotypeMap = new HashMap<>();
ctx.getGenotypes().stream().forEach(G -> genotypeMap.put(G.getSampleName(), G));
for (final String sample : sample2samreaders.keySet()) {
final SamReader samReader = sample2samreaders.get(sample);
final SupportingReads sr = new SupportingReads();
switch(sttype) {
case DEL:
{
int pos = ctx.getStart();
int[] ci = confidenceIntervalPos(ctx);
final QueryInterval left = new QueryInterval(tid, pos - ci[0], pos + ci[1]);
int end = ctx.getEnd();
ci = confidenceIntervalEnd(ctx);
final QueryInterval right = new QueryInterval(tid, end - ci[0], end + ci[1]);
for (final SAMRecord rec : extractSupportingReads(ctx, sample, samReader, new QueryInterval[] { left, right })) {
final Cigar cigar = rec.getCigar();
if (cigar.isLeftClipped()) {
final QueryInterval qi = new QueryInterval(tid, rec.getUnclippedStart(), rec.getStart() - 1);
if (qi.overlaps(left)) {
sr.splitReads++;
if (rec.getReadPairedFlag())
sr.pairedReads++;
}
}
if (cigar.isRightClipped()) {
final QueryInterval qi = new QueryInterval(tid, rec.getEnd() + 1, rec.getUnclippedEnd());
if (qi.overlaps(right)) {
sr.splitReads++;
if (rec.getReadPairedFlag())
sr.pairedReads++;
}
}
}
break;
}
default:
break;
}
final GenotypeBuilder gb;
if (genotypeMap.containsKey(sample)) {
gb = new GenotypeBuilder(genotypeMap.get(sample));
} else {
gb = new GenotypeBuilder(sample, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
}
gb.attribute(SR2.getID(), sr.splitReads);
gb.attribute(PE2.getID(), sr.pairedReads);
gb.attribute(SU2.getID(), 0);
genotypeMap.put(sample, gb.make());
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
// add missing samples.
for (final String sampleName : outVcfSampleNames) {
if (genotypeMap.containsKey(sampleName))
continue;
genotypeMap.put(sampleName, new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).make());
}
vcb.genotypes(genotypeMap.values());
vcw.add(vcb.make());
}
r.close();
r = null;
sample2samreaders.values().stream().forEach(R -> CloserUtil.close(R));
LOG.info("done");
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class AlleleFrequencyCalculator method doWork.
@Override
public int doWork(final List<String> args) {
PrintStream out = null;
VCFIterator in = null;
try {
in = super.openVCFIterator(oneAndOnlyOneFile(args));
out = openFileOrStdoutAsPrintStream(outputFile);
out.println("CHR\tPOS\tID\tREF\tALT\tTOTAL_CNT\tALT_CNT\tFRQ");
while (in.hasNext() && !out.checkError()) {
final VariantContext ctx = in.next();
final Allele ref = ctx.getReference();
if (ref == null)
continue;
if (ctx.getNSamples() == 0 || ctx.getAlternateAlleles().isEmpty())
continue;
final Allele alt = ctx.getAltAlleleWithHighestAlleleCount();
if (alt == null)
continue;
final GenotypesContext genotypes = ctx.getGenotypes();
if (genotypes == null)
continue;
int total_ctn = 0;
int alt_ctn = 0;
for (int i = 0; i < genotypes.size(); ++i) {
final Genotype g = genotypes.get(i);
for (final Allele allele : g.getAlleles()) {
if (allele.equals(ref)) {
total_ctn++;
} else if (allele.equals(alt)) {
total_ctn++;
alt_ctn++;
}
}
}
out.print(ctx.getContig());
out.print("\t");
out.print(ctx.getStart());
out.print("\t");
out.print(ctx.hasID() ? ctx.getID() : ".");
out.print("\t");
out.print(ref.getBaseString());
out.print("\t");
out.print(alt.getBaseString());
out.print("\t");
out.print(total_ctn);
out.print("\t");
out.print(alt_ctn);
out.print("\t");
out.print(alt_ctn / (float) total_ctn);
out.println();
}
out.flush();
out.close();
out = null;
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(out);
CloserUtil.close(in);
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class ConvertVcfChromosomes method doWork.
public int doWork(final List<String> args) {
if (this.mappingFile == null) {
throw new JvarkitException.CommandLineError("undefined mapping file");
}
VCFIterator iterin = null;
VariantContextWriter out = null;
try {
final ContigNameConverter customMapping = ContigNameConverter.fromPathOrOneDictionary(mappingFile);
final Set<String> unseen = new HashSet<>();
iterin = super.openVCFIterator(oneFileOrNull(args));
out = this.output == null ? VCFUtils.createVariantContextWriterToOutputStream(stdout()) : VCFUtils.createVariantContextWriterToPath(this.output);
final VCFHeader header1 = iterin.getHeader();
final VCFHeader header2 = new VCFHeader(header1.getMetaDataInInputOrder().stream().filter(L -> !L.getKey().equals(VCFHeader.CONTIG_KEY)).collect(Collectors.toSet()), header1.getSampleNamesInOrder());
JVarkitVersion.getInstance().addMetaData(this, header2);
if (header1.getSequenceDictionary() != null) {
header2.setSequenceDictionary(customMapping.convertDictionary(header1.getSequenceDictionary()));
}
out.writeHeader(header2);
final Function<String, String> converter = (S) -> {
final String newName = customMapping.apply(S);
if (StringUtils.isBlank(newName)) {
if (exitOnFailure)
throw new IllegalArgumentException("Cannot convert contig " + S);
if (unseen.size() < 1000 && !unseen.contains(S)) {
LOG.warn("Cannot find contig for " + S);
unseen.add(S);
}
return null;
}
return newName;
};
while (iterin.hasNext()) {
final VariantContext ctx = iterin.next();
final String contig = converter.apply(ctx.getContig());
if (StringUtils.isBlank(contig))
continue;
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
// if it's a breaking end, loop over the ALT alleles and try to change it
if (StructuralVariantType.BND.equals(ctx.getStructuralVariantType())) {
boolean ok_convert = true;
final List<Genotype> genotypes = new ArrayList<>(ctx.getGenotypes());
final List<Allele> alleles = new ArrayList<>(ctx.getAlleles());
for (int i = 1; i < ctx.getAlleles().size(); i++) {
Allele alt = ctx.getAlleles().get(i);
if (!alt.isSymbolic())
continue;
if (!alt.isBreakpoint())
continue;
String display = alt.getDisplayString();
int colon = display.indexOf(':');
if (colon == -1)
continue;
final char delim = display.contains("[") ? '[' : ']';
final int i1 = display.indexOf(delim);
if (i1 > colon)
continue;
final int i2 = colon + 1 == display.length() ? -1 : display.indexOf(delim, colon + 1);
if (!(i1 + 1 < colon && colon + 1 < i2)) {
continue;
}
final String bndCtg = converter.apply(display.substring(i1 + 1, colon));
if (StringUtils.isBlank(bndCtg)) {
ok_convert = false;
break;
}
Allele newAlt = Allele.create(display.substring(0, i1 + 1) + bndCtg + display.substring(colon), false);
alleles.set(i, newAlt);
for (int x = 0; x < genotypes.size(); ++x) {
final Genotype gt = genotypes.get(x);
final List<Allele> getAlleles = new ArrayList<>(gt.getAlleles());
getAlleles.replaceAll(A -> A.equals(alt) ? newAlt : A);
genotypes.set(x, new GenotypeBuilder(gt).alleles(getAlleles).make());
}
}
if (!ok_convert)
continue;
vcb.alleles(alleles);
vcb.genotypes(genotypes);
}
vcb.chr(contig);
out.add(vcb.make());
}
iterin.close();
iterin = null;
out.close();
out = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iterin);
CloserUtil.close(out);
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfROH method doWork.
@Override
public int doWork(final List<String> args) {
try {
final String input = oneFileOrNull(args);
try (VCFIterator iter = input == null ? new VCFIteratorBuilder().open(stdin()) : new VCFIteratorBuilder().open(input)) {
final VCFHeader header = iter.getHeader();
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
final OrderChecker<VariantContext> checker = new OrderChecker<VariantContext>(dict, false);
final List<Sample> samples = new ArrayList<>(header.getNGenotypeSamples());
for (String sn : header.getSampleNamesInOrder()) {
final Sample sample = new Sample(sn, header.getSampleNameToOffset().get(sn));
samples.add(sample);
}
try (PrintWriter w = super.openPathOrStdoutAsPrintWriter(this.output)) {
if (!this.output_as_bed) {
final SAMFileHeader samHeader = new SAMFileHeader(dict);
samHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
for (Sample sn : samples) {
final SAMReadGroupRecord rg = new SAMReadGroupRecord(sn.name);
rg.setSample(sn.name);
samHeader.addReadGroup(rg);
}
JVarkitVersion.getInstance().addMetaData(this, samHeader);
codec.encode(w, samHeader);
}
while (iter.hasNext()) {
final VariantContext ctx = checker.apply(iter.next());
for (Sample sample : samples) sample.visit(w, ctx);
}
for (Sample sample : samples) sample.finish(w);
w.flush();
}
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfSkatSlidingWindow method doWork.
@Override
public int doWork(final List<String> args) {
if (this.nJobs < 1) {
this.nJobs = Math.max(1, Runtime.getRuntime().availableProcessors());
LOG.info("setting njobs to " + this.nJobs);
}
VCFIterator r = null;
try {
final VCFHeader header;
final SAMSequenceDictionary dict;
final File vcfFile = new File(oneAndOnlyOneFile(args));
try (final VCFReader vr = VCFReaderFactory.makeDefault().open(vcfFile, true)) {
header = vr.getHeader();
dict = header.getSequenceDictionary();
}
if (dict == null || dict.isEmpty()) {
throw new JvarkitException.VcfDictionaryMissing(vcfFile);
}
if (!this.limit_contigs.isEmpty()) {
if (this.limit_contigs.stream().anyMatch(C -> dict.getSequence(C) == null)) {
LOG.error("user contig missing in vcf dictionary.");
return -1;
}
}
final Pedigree pedigree;
if (this.pedigreeFile != null) {
pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
} else {
pedigree = new Pedigree.Parser().parse(header);
}
final Set<Pedigree.Person> samples = new HashSet<>(pedigree.getPersons());
samples.removeIf(I -> !(I.isAffected() || I.isUnaffected()) || !header.getSampleNamesInOrder().contains(I.getId()));
this.writer = super.openFileOrStdoutAsPrintWriter(this.outputFile);
final Consumer<SkatCallerResult> writeResult = (R) -> {
synchronized (this.writer) {
this.writer.println(R.toString());
}
};
for (final SAMSequenceRecord ssr : dict.getSequences()) {
if (!this.limit_contigs.isEmpty() && !this.limit_contigs.contains(ssr.getSequenceName())) {
LOG.warning("skipping contig " + ssr.getSequenceName());
continue;
}
LOG.info("contig " + ssr.getSequenceName());
final ExecutorService executorService = new ThreadPoolExecutor(this.nJobs, this.nJobs, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingDeque<>(this.nJobs));
final List<Future<Integer>> results = new ArrayList<>(this.nJobs);
for (int i = 0; i < this.nJobs; i++) {
final int windowLen = Math.max(1, ssr.getSequenceLength() / this.nJobs);
final SkatWorker caller = new SkatWorker(vcfFile, new Interval(ssr.getSequenceName(), i * windowLen, Math.min(ssr.getSequenceLength(), (i + 1) * windowLen)), samples, this.skat.build(), writeResult);
results.add(executorService.submit(caller));
}
executorService.shutdown();
executorService.awaitTermination(365, TimeUnit.DAYS);
for (final Future<Integer> fl : results) {
try {
if (fl.get() != 0) {
LOG.error("An error occured");
return -1;
}
} catch (final Exception err) {
LOG.error(err);
return -1;
}
}
}
this.writer.flush();
this.writer.close();
this.writer = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(this.writer);
}
}
Aggregations