use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VcfBurdenSplitter method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, File outorNull) {
SortingCollection<KeyAndLine> sortingcollection = null;
BufferedReader in = null;
CloseableIterator<KeyAndLine> iter = null;
PrintStream pw = null;
PrintWriter allDiscardedLog = null;
try {
in = inputName == null ? IOUtils.openStreamForBufferedReader(stdin()) : IOUtils.openURIForBufferedReading(inputName);
if (this.allFilteredFileOut != null) {
allDiscardedLog = IOUtils.openFileForPrintWriter(this.allFilteredFileOut);
}
final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(in);
/**
* find splitter by name
*/
Splitter splitter = null;
for (final Splitter s : this.splitters) {
if (this.splitterName.equals(s.getName())) {
splitter = s;
break;
}
}
if (splitter == null) {
return wrapException("Cannot find a splitter named " + this.splitterName);
}
splitter.initialize(cah.header);
LOG.info("splitter is " + splitter);
pw = super.openFileOrStdoutAsPrintStream(outorNull);
// read variants
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(cah.header);
String prev_contig = null;
for (; ; ) {
final String line = in.readLine();
final VariantContext variant = (line == null ? null : progess.watch(cah.codec.decode(line)));
if (variant == null || !variant.getContig().equals(prev_contig)) {
if (sortingcollection != null) {
sortingcollection.doneAdding();
iter = sortingcollection.iterator();
LOG.info("dumping data for CONTIG: \"" + prev_contig + "\"");
final EqualRangeIterator<KeyAndLine> eqiter = new EqualRangeIterator<>(iter, new Comparator<KeyAndLine>() {
@Override
public int compare(final KeyAndLine o1, final KeyAndLine o2) {
return o1.key.compareTo(o2.key);
}
});
while (eqiter.hasNext()) {
final List<KeyAndLine> buffer = eqiter.next();
final KeyAndLine first = buffer.get(0);
LOG.info(first.key);
final List<VariantContext> variants = new ArrayList<>(buffer.size());
boolean has_only_filtered = true;
for (final KeyAndLine kal : buffer) {
final VariantContext ctx = cah.codec.decode(kal.ctx);
variants.add(ctx);
if (isDebuggingVariant(ctx)) {
LOG.info("Adding variant to list for key " + kal.key + " " + shortName(ctx));
}
if (!ctx.getContig().equals(prev_contig)) {
eqiter.close();
return wrapException("illegal state");
}
if (!ctx.isFiltered() || this.acceptFiltered) {
has_only_filtered = false;
// break; NOOOONNN !!!
}
}
// all ctx are filtered
if (has_only_filtered) {
LOG.warn("ALL IS FILTERED in " + first.key);
if (allDiscardedLog != null) {
for (final VariantContext ctx : variants) {
if (isDebuggingVariant(ctx)) {
LOG.info("Variant " + shortName(ctx) + " is part of never filtered for " + first.key);
}
allDiscardedLog.println(String.join("\t", first.key, ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().getDisplayString(), ctx.getAlternateAllele(0).getDisplayString(), String.valueOf(ctx.getFilters())));
}
}
continue;
}
// save vcf file
final VariantContextWriter out = VCFUtils.createVariantContextWriterToOutputStream(IOUtils.uncloseableOutputStream(pw));
final VCFHeader header2 = addMetaData(new VCFHeader(cah.header));
header2.addMetaDataLine(new VCFHeaderLine(VCF_HEADER_SPLITKEY, first.key));
out.writeHeader(header2);
for (final VariantContext ctx : variants) {
if (isDebuggingVariant(ctx)) {
LOG.info("saving variant " + shortName(ctx) + " to final output with key=" + first.key);
}
out.add(ctx);
}
// yes because wrapped into IOUtils.encloseableOutputSream
out.close();
pw.flush();
}
eqiter.close();
iter.close();
iter = null;
// dispose sorting collection
sortingcollection.cleanup();
sortingcollection = null;
}
// EOF met
if (variant == null)
break;
prev_contig = variant.getContig();
}
if (sortingcollection == null) {
/* create sorting collection for new contig */
sortingcollection = SortingCollection.newInstance(KeyAndLine.class, new KeyAndLineCodec(), new KeyAndLineComparator(), this.writingSortingCollection.maxRecordsInRam, this.writingSortingCollection.getTmpPaths());
sortingcollection.setDestructiveIteration(true);
}
if (variant.getAlternateAlleles().size() != 1) {
return wrapException("Expected only one allele per variant. Please use VcfMultiToOneAllele https://github.com/lindenb/jvarkit/wiki/VcfMultiToOneAllele.");
}
// no check for ctx.ifFiltered here, we do this later.
for (final String key : splitter.keys(variant)) {
if (isDebuggingVariant(variant)) {
LOG.info("Adding variant with key " + key + " " + shortName(variant));
}
sortingcollection.add(new KeyAndLine(key, line));
}
}
progess.finish();
pw.flush();
pw.close();
pw = null;
if (allDiscardedLog != null) {
allDiscardedLog.flush();
allDiscardedLog.close();
allDiscardedLog = null;
}
return RETURN_OK;
} catch (final Exception err) {
return wrapException(err);
} finally {
CloserUtil.close(iter);
if (sortingcollection != null)
sortingcollection.cleanup();
CloserUtil.close(in);
CloserUtil.close(pw);
CloserUtil.close(allDiscardedLog);
}
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VcfFilterNotInPedigree method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter delegate) {
final VariantContextWriter out = this.component.open(delegate);
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(in.getHeader()).logger(LOG);
out.writeHeader(in.getHeader());
while (in.hasNext()) {
out.add(progess.watch(in.next()));
}
progess.finish();
out.close();
return 0;
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VcfInjectPedigree method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter delegate) {
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(in.getHeader()).logger(LOG);
final VariantContextWriter out = this.component.open(delegate);
out.writeHeader(in.getHeader());
while (in.hasNext()) {
out.add(progess.watch(in.next()));
}
progess.finish();
out.close();
return 0;
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class CaseControlPlot method doWork.
@Override
public int doWork(final List<String> args) {
ArchiveFactory archiveFactory = null;
VcfIterator in = null;
VariantContextWriter teeVariantWriter = null;
final List<CaseControlExtractor> excractors = new ArrayList<>();
try {
in = super.openVcfIterator(oneFileOrNull(args));
final VCFHeader header = in.getHeader();
excractors.addAll(parseConfigFile(header));
final Pedigree pedigree;
if (this.pedigreeFile != null) {
pedigree = Pedigree.newParser().parse(this.pedigreeFile);
} else {
pedigree = Pedigree.newParser().parse(header);
}
if (pedigree == null || pedigree.isEmpty()) {
LOG.error("No pedigree defined , or it is empty");
return -1;
}
final Set<Pedigree.Person> casepersons = pedigree.getPersons().stream().filter(F -> F.isAffected() && header.getSampleNamesInOrder().indexOf(F.getId()) != -1).collect(Collectors.toSet());
if (casepersons.isEmpty()) {
LOG.error("No Affected individuals in pedigree/header");
return -1;
}
final Set<Pedigree.Person> controlpersons = pedigree.getPersons().stream().filter(F -> F.isUnaffected() && header.getSampleNamesInOrder().indexOf(F.getId()) != -1).collect(Collectors.toSet());
if (controlpersons.isEmpty()) {
LOG.error("No Unaffected individuals in pedigree/header");
return -1;
}
if (this.teeToStdout) {
teeVariantWriter = super.openVariantContextWriter(null);
teeVariantWriter.writeHeader(header);
}
archiveFactory = ArchiveFactory.open(this.outputDirOrZip);
for (final CaseControlExtractor extractor : excractors) {
extractor.pw = archiveFactory.openWriter(extractor.name);
}
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
while (in.hasNext()) {
final VariantContext ctx = progress.watch(in.next());
if (teeVariantWriter != null)
teeVariantWriter.add(ctx);
for (final CaseControlExtractor handler : excractors) {
handler.visit(ctx, casepersons, controlpersons);
}
}
for (final CaseControlExtractor handler : excractors) {
handler.close();
}
progress.finish();
if (teeVariantWriter != null) {
teeVariantWriter.close();
teeVariantWriter = null;
}
in.close();
in = null;
archiveFactory.close();
archiveFactory = null;
return RETURN_OK;
} catch (final Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(in);
CloserUtil.close(archiveFactory);
CloserUtil.close(teeVariantWriter);
for (final CaseControlExtractor handler : excractors) {
handler.close();
}
}
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VcfBurdenFilterGenes method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
final VCFHeader header = in.getHeader();
try {
final VCFHeader h2 = addMetaData(new VCFHeader(header));
final VCFFilterHeaderLine filterControlsHeader;
if (!StringUtil.isBlank(this.filterTag)) {
filterControlsHeader = new VCFFilterHeaderLine(this.filterTag.trim(), "Genes not in list " + this.geneFile);
h2.addMetaDataLine(filterControlsHeader);
} else {
filterControlsHeader = null;
}
final List<String> lookColumns = Arrays.asList("CCDS", "Feature", "ENSP", "Gene", "HGNC", "HGNC_ID", "SYMBOL", "RefSeq");
final VepPredictionParser vepParser = new VepPredictionParserFactory(header).get();
final AnnPredictionParser annParser = new AnnPredictionParserFactory(header).get();
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary()).logger(LOG);
out.writeHeader(h2);
while (in.hasNext() && !out.checkError()) {
final VariantContext ctx = progess.watch(in.next());
boolean keep = false;
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
// not just set FILTER ?
if (filterControlsHeader == null) {
vcb.rmAttribute(vepParser.getTag());
vcb.rmAttribute(annParser.getTag());
}
final List<String> newVepList = new ArrayList<>();
for (final String predStr : ctx.getAttributeAsList(vepParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
final VepPredictionParser.VepPrediction pred = vepParser.parseOnePrediction(ctx, predStr);
for (final String col : lookColumns) {
final String token = pred.getByCol(col);
if (!StringUtil.isBlank(token) && this.geneNames.contains(token)) {
newVepList.add(predStr);
keep = true;
break;
}
}
}
final List<String> newEffList = new ArrayList<>();
for (final String predStr : ctx.getAttributeAsList(annParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
final AnnPredictionParser.AnnPrediction pred = annParser.parseOnePrediction(predStr);
final String token = pred.getGeneName();
if (!StringUtil.isBlank(token) && this.geneNames.contains(token)) {
newEffList.add(predStr);
keep = true;
break;
}
}
// not just set FILTER ?
if (filterControlsHeader == null) {
if (!newVepList.isEmpty())
vcb.attribute(vepParser.getTag(), newVepList);
if (!newEffList.isEmpty())
vcb.attribute(annParser.getTag(), newEffList);
}
if (filterControlsHeader != null) {
if (!keep) {
vcb.filter(filterControlsHeader.getID());
} else if (!ctx.isFiltered()) {
vcb.passFilters();
}
out.add(vcb.make());
} else {
if (keep)
out.add(vcb.make());
}
}
progess.finish();
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(in);
}
}
Aggregations