use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class ConvertVcfChromosomes method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator iterin, VariantContextWriter out) {
final ContigNameConverter customMapping = ContigNameConverter.fromFile(mappingFile);
customMapping.setOnNotFound(this.onNotFound);
final Set<String> unseen = new HashSet<>();
final VCFHeader header1 = iterin.getHeader();
final VCFHeader header2 = new VCFHeader(header1.getMetaDataInInputOrder().stream().filter(L -> !L.getKey().equals(VCFHeader.CONTIG_KEY)).collect(Collectors.toSet()), header1.getSampleNamesInOrder());
if (header1.getSequenceDictionary() != null) {
header2.setSequenceDictionary(customMapping.convertDictionary(header1.getSequenceDictionary()));
}
out.writeHeader(header2);
while (iterin.hasNext()) {
final VariantContext ctx = iterin.next();
final String newName = customMapping.apply(ctx.getContig());
if (newName == null) {
if (unseen.size() < 1000 && !unseen.contains(ctx.getContig())) {
LOG.warn("Cannot find contig for " + ctx.getContig());
unseen.add(ctx.getContig());
}
// skip unknown chromosomes
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.chr(newName);
out.add(vcb.make());
}
return 0;
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class DownSampleVcf method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
final Random rand = new Random(this.seed == -1L ? System.currentTimeMillis() : this.seed);
final List<VariantContext> buffer = new ArrayList<>(this.reservoir_size);
final VCFHeader h2 = new VCFHeader(in.getHeader());
super.addMetaData(h2);
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(in.getHeader()).logger(LOG);
out.writeHeader(h2);
if (this.reservoir_size != 0) {
while (in.hasNext()) {
final VariantContext ctx = progess.watch(in.next());
if (buffer.size() < this.reservoir_size) {
buffer.add(ctx);
} else {
buffer.set(rand.nextInt(buffer.size()), ctx);
}
}
}
buffer.stream().forEach(V -> out.add(V));
progess.finish();
return 0;
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class VcfJaspar method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
final String ATT = "JASPAR";
GenomicSequence genomicSequence = null;
final VCFHeader header = new VCFHeader(in.getHeader());
addMetaData(header);
header.addMetaDataLine(new VCFInfoHeaderLine(ATT, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Jaspar pattern overlapping: Format: (Name|length|Score/1000|pos|strand)"));
out.writeHeader(header);
while (in.hasNext()) {
VariantContext var = in.next();
if (genomicSequence == null || !genomicSequence.getChrom().equals(var.getContig())) {
LOG.info("Loading sequence " + var.getContig());
genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, var.getContig());
}
final Set<String> hits = new HashSet<String>();
for (final Matrix matrix : this.jasparDb) {
int start0 = Math.max(0, var.getStart() - matrix.length());
for (int y = start0; y < var.getStart() && y + matrix.length() <= genomicSequence.length(); ++y) {
CharSequence forward = new SubSequence(genomicSequence, y, y + matrix.length());
CharSequence revcomp = new RevCompCharSequence(forward);
// run each strand
for (int strand = 0; strand < 2; ++strand) {
double score = matrix.score(strand == 0 ? forward : revcomp);
if (score <= 0)
continue;
if (score >= matrix.max() * this.fraction_of_max) {
StringBuilder b = new StringBuilder("(");
b.append(matrix.getName().replaceAll("[ \t;=]+", "/"));
b.append("|");
b.append(matrix.length());
b.append("|");
b.append((int) (1000.0 * (score / matrix.max())));
b.append("|");
b.append(y + 1);
b.append("|");
b.append(strand == 0 ? '+' : '-');
b.append(")");
hits.add(b.toString());
break;
}
}
}
}
if (hits.isEmpty()) {
out.add(var);
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(var);
vcb.attribute(ATT, hits.toArray(new String[hits.size()]));
out.add(vcb.make());
}
return RETURN_OK;
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class AnnPredictionParserFactory method createDefaultParser.
/**
* create a parser without the VCF header
*/
public AnnPredictionParser createDefaultParser() {
final VCFHeader tmpheader = new VCFHeader();
final VCFInfoHeaderLine info = new VCFInfoHeaderLine(AnnPredictionParser.getDefaultTag(), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "");
tmpheader.addMetaDataLine(info);
return new AnnPredictionParser(tmpheader, getTag());
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class VcfMultiToOneAlleleTest method createVcf.
private List<VariantContext> createVcf(final String params, final List<Genotype> genotypes) throws IOException {
Set<VCFHeaderLine> metaData = new HashSet<>();
VCFStandardHeaderLines.addStandardFormatLines(metaData, true, "GT", "DP");
VCFStandardHeaderLines.addStandardInfoLines(metaData, true, "AF", "AN", "AC", "DP");
final VCFHeader vcfheader = new VCFHeader(metaData, genotypes.stream().map(G -> G.getSampleName()).sorted().collect(Collectors.toSet()));
final VariantAttributesRecalculator calc = new VariantAttributesRecalculator();
calc.setHeader(vcfheader);
final File vcfOut = super.createTmpFile(".vcf");
final File vcfOut2 = super.createTmpFile(".vcf");
VariantContextBuilder vcb = new VariantContextBuilder();
vcb.chr("1");
vcb.start(1);
vcb.stop(1);
vcb.alleles(genotypes.stream().flatMap(G -> G.getAlleles().stream()).collect(Collectors.toSet()));
vcb.genotypes(genotypes);
final VariantContextWriter w = VCFUtils.createVariantContextWriter(vcfOut);
w.writeHeader(vcfheader);
w.add(calc.apply(vcb.make()));
w.close();
Assert.assertEquals(new VcfMultiToOneAllele().instanceMain(newCmd().add("-o").add(vcfOut2).split(params).add(vcfOut).make()), 0);
super.assertIsVcf(vcfOut2);
return super.variantStream(vcfOut2).collect(Collectors.toList());
}
Aggregations