use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.
the class VcfVcf method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VCFIterator r, VariantContextWriter w) {
try {
CloseableIterator<VariantContext> iter = null;
LOG.info("opening file: " + this.TABIX);
VCFReader tabix = VCFReaderFactory.makeDefault().open(this.TABIX, true);
VCFHeader header3 = tabix.getHeader();
VCFHeader header1 = r.getHeader();
VCFHeader h2 = new VCFHeader(header1.getMetaDataInInputOrder(), header1.getSampleNamesInOrder());
for (String infoId : this.INFO_IDS) {
VCFInfoHeaderLine vihl = header3.getInfoHeaderLine(infoId);
if (vihl == null) {
LOG.warn("Not INFO=" + infoId + " in " + TABIX);
continue;
}
if (h2.getInfoHeaderLine(infoId) != null) {
LOG.warn("Input already contains INFO=" + vihl);
}
h2.addMetaDataLine(vihl);
}
if (ALT_CONFLICT_FLAG != null) {
h2.addMetaDataLine(new VCFInfoHeaderLine(ALT_CONFLICT_FLAG, 1, VCFHeaderLineType.Flag, "conflict ALT allele with " + this.TABIX));
}
w.writeHeader(h2);
while (r.hasNext()) {
VariantContext ctx1 = r.next();
VariantContextBuilder vcb = new VariantContextBuilder(ctx1);
String BEST_ID = null;
boolean best_id_match_alt = false;
List<VariantContext> variantsList = new ArrayList<VariantContext>();
iter = tabix.query(ctx1.getContig(), Math.max(0, ctx1.getStart() - 1), (ctx1.getEnd() + 1));
while (iter.hasNext()) {
VariantContext ctx3 = iter.next();
if (!ctx3.getContig().equals(ctx1.getContig()))
continue;
if (ctx3.getStart() != ctx1.getStart())
continue;
if (ctx3.getEnd() != ctx1.getEnd())
continue;
if (ctx1.getReference().equals(ctx3.getReference()) && ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
variantsList.clear();
variantsList.add(ctx3);
break;
} else {
variantsList.add(ctx3);
}
}
CloserUtil.close(iter);
iter = null;
for (VariantContext ctx3 : variantsList) {
if (this.REF_ALLELE_MATTERS && !ctx1.getReference().equals(ctx3.getReference())) {
continue;
}
if (this.ALT_ALLELES_MATTERS && !ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
continue;
}
if (ctx3.getID() != null && this.REPLACE_ID) {
if (BEST_ID != null && best_id_match_alt) {
// nothing
} else {
BEST_ID = ctx3.getID();
best_id_match_alt = ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles());
}
}
for (String id : this.INFO_IDS) {
Object info3 = ctx3.getAttribute(id);
if (info3 == null) {
continue;
}
Object info1 = ctx1.getAttribute(id);
if (info1 != null && !this.REPLACE_INFO_FIELD) {
continue;
}
vcb.attribute(id, info3);
}
if (ALT_CONFLICT_FLAG != null && !ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
vcb.attribute(ALT_CONFLICT_FLAG, true);
}
}
if (BEST_ID != null) {
vcb.id(BEST_ID);
}
w.add(vcb.make());
}
tabix.close();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
}
}
use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.
the class VcfPeekAf method beforeVcf.
@Override
protected int beforeVcf() {
final List<AFPeeker> all_peekers = new ArrayList<>();
all_peekers.add(new InfoAcAnPeeker());
all_peekers.add(new InfoAfPeeker());
all_peekers.add(new GtPeeker());
all_peekers.add(new CustomInfoPeeker());
this.indexedVcfFileReader = null;
if (this.buffer_size < 1) {
LOG.error("bad buffer-size");
return -1;
}
try {
if (this.list_peekers) {
try (PrintWriter out = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
for (final AFPeeker p : all_peekers) out.println(p.getName() + "\n\t" + p.getDescription());
out.flush();
}
System.exit(0);
}
if (StringUtils.isBlank(this.peekerName)) {
LOG.error("peeker name is empty");
return -1;
}
this.peeker = all_peekers.stream().filter(P -> P.getName().equals(this.peekerName)).findFirst().orElse(null);
if (this.peeker == null) {
LOG.error("peeker " + this.peekerName + " not found in " + all_peekers.stream().map(P -> P.getName()).collect(Collectors.joining(";")));
return -1;
}
final VCFReader reader0 = VCFReaderFactory.makeDefault().open(this.resourceVcfFile, true);
this.indexedVcfFileReader = new BufferedVCFReader(reader0, this.buffer_size);
this.peeker.initialize(this.indexedVcfFileReader.getHeader());
this.indexedVcfFileReader.setSimplifier(peeker::sanitize);
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
}
use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.
the class FixVcfMissingGenotypesTest method test01.
@Test
public void test01() throws Exception {
final TestSupport support = new TestSupport();
try {
Path nocallvcffile = support.createTmpPath(".vcf");
VCFReader r1 = VCFReaderFactory.makeDefault().open(Paths.get(support.resource("rotavirus_rf.vcf.gz")), false);
final VariantContextWriter vcw = VCFUtils.createVariantContextWriter(nocallvcffile.toFile());
vcw.writeHeader(r1.getHeader());
r1.iterator().stream().forEach(V -> {
VariantContextBuilder vcb = new VariantContextBuilder(V);
vcb.genotypes(V.getGenotypes().stream().map(G -> GenotypeBuilder.createMissing(G.getSampleName(), 2)).collect(Collectors.toList()));
vcw.add(vcb.make());
});
vcw.close();
r1.close();
final Path output = support.createTmpPath(".vcf");
final FixVcfMissingGenotypes cmd = new FixVcfMissingGenotypes();
Assert.assertEquals(0, cmd.instanceMain(new String[] { "-B", support.resource("S1.bam"), "-B", support.resource("S2.bam"), "-B", support.resource("S3.bam"), "-B", support.resource("S4.bam"), "-B", support.resource("S5.bam"), "-o", output.toString(), nocallvcffile.toString() }));
support.assertIsVcf(output);
Assert.assertFalse(support.variantStream(output).flatMap(V -> V.getGenotypes().stream()).noneMatch(G -> G.isHomRef()), "at least one GT should be 0/0");
} finally {
support.removeTmpFiles();
}
}
use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.
the class VcfBurdenMAFTest method test01.
@Test
public void test01() throws IOException {
try {
final String inputFile = support.resource("rotavirus_rf.vcf.gz");
try (final VCFReader r0 = VCFReaderFactory.makeDefault().open(Paths.get(inputFile), false)) {
final VCFHeader vcfheader = r0.getHeader();
if (vcfheader.getNGenotypeSamples() < 1) {
return;
}
}
final Path ped = support.createRandomPedigreeFromFile(inputFile);
if (ped == null) {
Reporter.getCurrentTestResult().setAttribute("warn", "No Pedigree for " + inputFile);
return;
}
final Path output = support.createTmpPath(".vcf");
Assert.assertEquals(new VcfBurdenMAF().instanceMain(new String[] { "-o", output.toString(), "--max-af", "5/100", "--pedigree", ped.toString(), inputFile.toString() }), 0);
support.assertIsVcf(output);
} finally {
support.removeTmpFiles();
}
}
use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.
the class VcfCutSamplesTest method test01.
@Test(dataProvider = "src01")
public void test01(final String vcfpath) throws IOException {
try {
VCFReader r = VCFReaderFactory.makeDefault().open(Paths.get(vcfpath), false);
final List<String> samples = new ArrayList<>(r.getHeader().getSampleNamesInOrder());
r.close();
if (samples.isEmpty())
return;
Collections.shuffle(samples, support.random);
final List<String> keep = samples.subList(0, support.random.nextInt(samples.size()));
final Path vcfOut = support.createTmpPath(".vcf");
List<String> args = new ArrayList<>();
args.add("-o");
args.add(vcfOut.toString());
for (String s : keep) {
args.add("-S");
args.add(s);
}
args.add(vcfpath);
Assert.assertEquals(new VcfCutSamples().instanceMain(args), 0);
support.assertIsVcf(vcfOut);
r = VCFReaderFactory.makeDefault().open(vcfOut, false);
Assert.assertEquals(new TreeSet<>(keep), new TreeSet<>(r.getHeader().getSampleNamesInOrder()));
r.close();
} finally {
support.removeTmpFiles();
}
}
Aggregations