use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VCFUtils method convertVCFHeaderToList.
/**
* convert a VCF header to List of String. Created for serialization
*/
public static List<String> convertVCFHeaderToList(final VCFHeader header) {
final ByteArrayOutputStream baos = new ByteArrayOutputStream(1000);
final VariantContextWriter vcw = createVariantContextWriterToOutputStream(baos);
vcw.writeHeader(header);
vcw.close();
return Arrays.asList(new String(baos.toByteArray()).split("\n"));
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VcfMultiToOneAlleleTest method createVcf.
private List<VariantContext> createVcf(final String params, final List<Genotype> genotypes) throws IOException {
Set<VCFHeaderLine> metaData = new HashSet<>();
VCFStandardHeaderLines.addStandardFormatLines(metaData, true, "GT", "DP");
VCFStandardHeaderLines.addStandardInfoLines(metaData, true, "AF", "AN", "AC", "DP");
final VCFHeader vcfheader = new VCFHeader(metaData, genotypes.stream().map(G -> G.getSampleName()).sorted().collect(Collectors.toSet()));
final VariantAttributesRecalculator calc = new VariantAttributesRecalculator();
calc.setHeader(vcfheader);
final File vcfOut = super.createTmpFile(".vcf");
final File vcfOut2 = super.createTmpFile(".vcf");
VariantContextBuilder vcb = new VariantContextBuilder();
vcb.chr("1");
vcb.start(1);
vcb.stop(1);
vcb.alleles(genotypes.stream().flatMap(G -> G.getAlleles().stream()).collect(Collectors.toSet()));
vcb.genotypes(genotypes);
final VariantContextWriter w = VCFUtils.createVariantContextWriter(vcfOut);
w.writeHeader(vcfheader);
w.add(calc.apply(vcb.make()));
w.close();
Assert.assertEquals(new VcfMultiToOneAllele().instanceMain(newCmd().add("-o").add(vcfOut2).split(params).add(vcfOut).make()), 0);
super.assertIsVcf(vcfOut2);
return super.variantStream(vcfOut2).collect(Collectors.toList());
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VcfBurdenFisherHTest method test01.
@Test(dataProvider = "all-vcf-files")
public void test01(final String inputFile) throws IOException {
final VCFFileReader r0 = new VCFFileReader(new File(inputFile), false);
final VCFHeader vcfheader = r0.getFileHeader();
if (vcfheader.getNGenotypeSamples() < 2) {
r0.close();
return;
}
final CloseableIterator<VariantContext> iter = r0.iterator();
final File inputVcf = super.createTmpFile(".vcf");
final VariantContextWriter w = VCFUtils.createVariantContextWriter(inputVcf);
w.writeHeader(vcfheader);
iter.stream().filter(V -> V.getAlleles().size() == 2).forEach(V -> w.add(V));
iter.close();
w.close();
r0.close();
final File ped = super.createRandomPedigreeFromFile(inputVcf.getPath());
if (ped == null) {
Reporter.getCurrentTestResult().setAttribute("warn", "No Pedigree for " + inputFile);
return;
}
final File output = super.createTmpFile(".vcf");
Assert.assertEquals(new VcfBurdenFisherH().instanceMain(newCmd().add("-o", output, "--pedigree", ped, inputFile).make()), 0);
assertIsVcf(output);
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VcfCalledWithAnotherMethod method doVcfToVcf.
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
final List<ExternalVcf> externalVcfs = new ArrayList<>();
try {
final VCFHeader header = in.getHeader();
this.dictionary = header.getSequenceDictionary();
/**
* open primitive input
*/
if (this.dictionary == null) {
LOG.error("no dictionary in input");
return -1;
}
final Set<File> samtoolsFiles = new HashSet<>();
this.externalVcfStrs.stream().filter(S -> S.endsWith(".list")).map(S -> Paths.get(S)).forEach(P -> {
try {
samtoolsFiles.addAll(Files.readAllLines(P).stream().filter(L -> !(L.startsWith("#") || L.trim().isEmpty())).map(S -> new File(S)).collect(Collectors.toSet()));
} catch (final Exception err) {
throw new RuntimeIOException(err);
}
});
samtoolsFiles.addAll(this.externalVcfStrs.stream().filter(S -> !S.endsWith(".list")).map(S -> new File(S)).collect(Collectors.toSet()));
externalVcfs.addAll(samtoolsFiles.stream().map(F -> new ExternalVcf(F)).collect(Collectors.toList()));
/**
* check for uniq keys
*/
final Set<String> uniqKeys = new HashSet<>();
for (final ExternalVcf extvcf : externalVcfs) {
int n = 0;
for (; ; ) {
final String newkey = extvcf.key + (n == 0 ? "" : "_" + n);
if (!uniqKeys.contains(newkey)) {
extvcf.key = newkey;
uniqKeys.add(newkey);
break;
}
++n;
}
}
final VCFHeader h2 = new VCFHeader(header);
for (final ExternalVcf extvcf : externalVcfs) {
h2.addMetaDataLine(new VCFHeaderLine("otherVcf:" + extvcf.key, extvcf.vcfFile.getPath()));
}
final VCFFilterHeaderLine variantNotFoundElsewhereFILTER = (filterNameVariantNotFoundElseWhere.isEmpty() ? null : new VCFFilterHeaderLine(filterNameVariantNotFoundElseWhere, "Variant Was not found in other VCFs: " + externalVcfs.stream().map(S -> S.vcfFile.getPath()).collect(Collectors.joining(", "))));
if (variantNotFoundElsewhereFILTER != null) {
h2.addMetaDataLine(variantNotFoundElsewhereFILTER);
}
final VCFInfoHeaderLine variantFoundElseWhereKeys = new VCFInfoHeaderLine(this.infoFoundKey, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Variant was found in the VCFs designed by those keys");
h2.addMetaDataLine(variantFoundElseWhereKeys);
final VCFInfoHeaderLine variantFoundElseWhereCount = new VCFInfoHeaderLine(this.infoFoundKey, 1, VCFHeaderLineType.Integer, "Number of times the Variant was found in the VCFs");
h2.addMetaDataLine(variantFoundElseWhereCount);
final VCFFormatHeaderLine genotypeCountSame = new VCFFormatHeaderLine(this.formatCountSame, 1, VCFHeaderLineType.Integer, "Number of times the Genotype was found the same in other VCFs");
h2.addMetaDataLine(genotypeCountSame);
final VCFFormatHeaderLine genotypeCountDiscordant = new VCFFormatHeaderLine(this.formatCountDiscordant, 1, VCFHeaderLineType.Integer, "Number of times the Genotype was found dicordant in other VCFs");
h2.addMetaDataLine(genotypeCountDiscordant);
super.addMetaData(h2);
final VariantContextWriter w = super.openVariantContextWriter(outputFile);
w.writeHeader(h2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dictionary);
while (in.hasNext()) {
final VariantContext ctx = progress.watch(in.next());
final List<GenotypeCount> genotypeCounts = ctx.getGenotypes().stream().map(G -> new GenotypeCount(G)).collect(Collectors.toList());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
final Set<String> variantFoundInOtherVcfs = new HashSet<>();
for (final ExternalVcf extvcf : externalVcfs) {
final List<VariantContext> otherVariants = extvcf.get(ctx);
if (otherVariants.stream().filter(CTX2 -> {
if (ctx.getAlternateAlleles().isEmpty())
return true;
for (final Allele a : ctx.getAlternateAlleles()) {
if (CTX2.hasAllele(a))
return true;
}
return false;
}).findAny().isPresent()) {
variantFoundInOtherVcfs.add(extvcf.key);
}
for (final GenotypeCount gt : genotypeCounts) {
for (final VariantContext otherVariant : otherVariants) {
final Genotype otherGt = otherVariant.getGenotype(gt.gt.getSampleName());
if (otherGt == null)
continue;
if (gt.gt.sameGenotype(otherGt) || (this.noCallSameAsHomRef && ((gt.gt.isNoCall() && otherGt.isHomRef()) || (gt.gt.isHomRef() && otherGt.isNoCall())))) {
gt.countSame++;
} else {
gt.countDiscordant++;
}
}
}
}
vcb.genotypes(genotypeCounts.stream().map(G -> {
final GenotypeBuilder gb = new GenotypeBuilder(G.gt);
gb.attribute(genotypeCountSame.getID(), G.countSame);
gb.attribute(genotypeCountDiscordant.getID(), G.countDiscordant);
return gb.make();
}).collect(Collectors.toList()));
vcb.attribute(variantFoundElseWhereCount.getID(), variantFoundInOtherVcfs.size());
if (variantFoundInOtherVcfs.isEmpty()) {
if (variantNotFoundElsewhereFILTER != null) {
vcb.filter(variantNotFoundElsewhereFILTER.getID());
}
} else {
if (variantNotFoundElsewhereFILTER != null && !ctx.isFiltered()) {
vcb.passFilters();
}
vcb.attribute(variantFoundElseWhereKeys.getID(), new ArrayList<>(variantFoundInOtherVcfs));
}
w.add(vcb.make());
}
progress.finish();
while (!externalVcfs.isEmpty()) externalVcfs.remove(0).close();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
while (!externalVcfs.isEmpty()) externalVcfs.remove(0).close();
}
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VcfGetVariantByIndex method doWork.
public int doWork(List<String> args) {
if (this.fileListOfIndexes == null) {
LOG.error("undefined list of indexes");
return -1;
}
if (args.size() != 1) {
LOG.error("Expected only one vcf file on input");
return -1;
}
File vcfFile = new File(args.get(0));
VariantContextWriter w = null;
IndexFile indexFile = null;
BufferedReader r = null;
String line;
try {
LOG.info("Opening " + vcfFile);
if (vcfFile.getName().endsWith(".vcf.gz")) {
indexFile = new BGZIndexFile(vcfFile);
} else if (vcfFile.getName().endsWith(".vcf")) {
indexFile = new RandomAccessIndexFile(vcfFile);
} else {
LOG.error("Not a .vcf or .vcf.gz file: " + vcfFile);
return -1;
}
indexFile.open();
w = super.openVariantContextWriter(outputFile);
w.writeHeader(indexFile.getHeader());
r = IOUtils.openFileForBufferedReading(fileListOfIndexes);
while ((line = r.readLine()) != null) {
if (line.isEmpty() || line.startsWith("#"))
continue;
long ith;
try {
ith = Long.parseLong(line);
} catch (Exception e) {
LOG.error("Bad index in " + line + " ignoring");
continue;
}
// 0-based index
ith--;
if (ith < 0 || ith >= indexFile.size()) {
LOG.error("Index out of bound in " + line + " ignoring");
continue;
}
String varStr = indexFile.getLine(ith);
w.add(indexFile.getCodec().decode(varStr));
}
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(indexFile);
CloserUtil.close(w);
CloserUtil.close(r);
}
return 0;
}
Aggregations