use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class VcfRegulomeDB method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
VCFHeader header = in.getHeader();
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Format: Position|Distance|Rank"));
out.writeHeader(header);
while (in.hasNext()) {
List<String> regDataList = new ArrayList<String>();
VariantContext ctx = in.next();
progress.watch(ctx.getContig(), ctx.getStart());
int start = Math.max(0, ctx.getStart() - this.extend);
int end = ctx.getEnd() + this.extend;
for (Iterator<RegData> iter = this.regDataTabixFileReader.iterator(ctx.getContig(), start, end); iter.hasNext(); ) {
RegData curr = iter.next();
if (this.acceptRegex != null && !this.acceptRegex.matcher(curr.rank).matches()) {
continue;
}
String str = String.valueOf(curr.chromSart) + "|" + String.valueOf(Math.abs(curr.chromSart - (ctx.getStart() - 1))) + "|" + curr.rank;
regDataList.add(str);
}
if (regDataList.isEmpty()) {
out.add(ctx);
continue;
}
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(this.infoTag, regDataList.toArray());
out.add(vcb.make());
}
progress.finish();
return 0;
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class VcfRemoveGenotypeJs method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
try {
this.script = super.compileJavascript(scriptExpr, scriptFile);
final VCFHeader h2 = new VCFHeader(in.getHeader());
if (!this.filterName.isEmpty()) {
h2.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, 1, VCFHeaderLineType.String, "Genotype-level filter"));
}
addMetaData(h2);
out.writeHeader(h2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
final Bindings bindings = this.script.getEngine().createBindings();
bindings.put("header", in.getHeader());
while (in.hasNext()) {
final VariantContext ctx = progress.watch(in.next());
bindings.put("variant", ctx);
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
List<Genotype> genotypes = new ArrayList<>();
int countCalled = ctx.getNSamples();
for (int i = 0; i < ctx.getNSamples(); ++i) {
Genotype genotype = ctx.getGenotype(i);
bindings.put("genotype", genotype);
if (!genotype.isCalled() || genotype.isNoCall() || !genotype.isAvailable()) {
countCalled--;
} else if (genotype.isCalled() && !super.evalJavaScriptBoolean(this.script, bindings)) {
if (!this.filterName.isEmpty()) {
if (!genotype.isFiltered()) {
genotype = new GenotypeBuilder(genotype).filters(this.filterName).make();
}
} else if (this.replaceByHomRef) {
List<Allele> homRefList = new ArrayList<>(genotype.getPloidy());
for (int p = 0; p < genotype.getPloidy(); ++p) {
homRefList.add(ctx.getReference());
}
genotype = new GenotypeBuilder(genotype).alleles(homRefList).make();
} else {
genotype = GenotypeBuilder.createMissing(genotype.getSampleName(), genotype.getPloidy());
}
countCalled--;
}
genotypes.add(genotype);
}
if (countCalled == 0 && this.removeCtxNoGenotype) {
continue;
}
vcb.genotypes(genotypes);
out.add(vcb.make());
}
progress.finish();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
this.script = null;
}
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class VcfSkatSlidingWindow method doWork.
@Override
public int doWork(final List<String> args) {
if (this.nJobs < 1) {
this.nJobs = Math.max(1, Runtime.getRuntime().availableProcessors());
LOG.info("setting njobs to " + this.nJobs);
}
VcfIterator r = null;
try {
final VCFHeader header;
final SAMSequenceDictionary dict;
final File vcfFile = new File(oneAndOnlyOneFile(args));
try (final VCFFileReader vr = new VCFFileReader(vcfFile, true)) {
header = vr.getFileHeader();
dict = header.getSequenceDictionary();
}
if (dict == null || dict.isEmpty()) {
throw new JvarkitException.VcfDictionaryMissing(vcfFile);
}
if (!this.limit_contigs.isEmpty()) {
if (this.limit_contigs.stream().anyMatch(C -> dict.getSequence(C) == null)) {
LOG.error("user contig missing in vcf dictionary.");
return -1;
}
}
final Pedigree pedigree;
if (this.pedigreeFile != null) {
pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
} else {
pedigree = new Pedigree.Parser().parse(header);
}
final Set<Pedigree.Person> samples = new HashSet<>(pedigree.getPersons());
samples.removeIf(I -> !(I.isAffected() || I.isUnaffected()) || !header.getSampleNamesInOrder().contains(I.getId()));
this.writer = super.openFileOrStdoutAsPrintWriter(this.outputFile);
final Consumer<SkatCallerResult> writeResult = (R) -> {
synchronized (this.writer) {
this.writer.println(R.toString());
}
};
for (final SAMSequenceRecord ssr : dict.getSequences()) {
if (!this.limit_contigs.isEmpty() && !this.limit_contigs.contains(ssr.getSequenceName())) {
LOG.warning("skipping contig " + ssr.getSequenceName());
continue;
}
LOG.info("contig " + ssr.getSequenceName());
final ExecutorService executorService = new ThreadPoolExecutor(this.nJobs, this.nJobs, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingDeque<>(this.nJobs));
final List<Future<Integer>> results = new ArrayList<>(this.nJobs);
for (int i = 0; i < this.nJobs; i++) {
final int windowLen = Math.max(1, ssr.getSequenceLength() / this.nJobs);
final SkatWorker caller = new SkatWorker(vcfFile, new Interval(ssr.getSequenceName(), i * windowLen, Math.min(ssr.getSequenceLength(), (i + 1) * windowLen)), samples, this.skat.build(), writeResult);
results.add(executorService.submit(caller));
}
executorService.shutdown();
executorService.awaitTermination(365, TimeUnit.DAYS);
for (final Future<Integer> fl : results) {
try {
if (fl.get() != 0) {
LOG.error("An error occured");
return -1;
}
} catch (final Exception err) {
LOG.error(err);
return -1;
}
}
}
this.writer.flush();
this.writer.close();
this.writer = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(this.writer);
}
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class SortVcfOnRef2 method sortvcf.
protected int sortvcf(BufferedReader in) throws IOException {
if (this.refdict != null) {
LOG.info("load dict from " + this.refdict);
this.dict = SAMSequenceDictionaryExtractor.extractDictionary(this.refdict);
if (this.dict == null) {
LOG.error("cannot find sam sequence dictionary from " + refdict);
}
}
final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(in);
final VCFHeader h2 = new VCFHeader(cah.header);
if (this.dict != null) {
h2.setSequenceDictionary(this.dict);
} else {
this.dict = h2.getSequenceDictionary();
if (this.dict == null) {
LOG.error("No internal sequence dictionay found in input");
return -1;
}
}
addMetaData(h2);
if (this.dict.isEmpty()) {
LOG.warn("SEQUENCE DICTIONARY IS EMPTY/NULL");
}
CloseableIterator<ChromPosLine> iter = null;
SortingCollection<ChromPosLine> array = null;
VariantContextWriter w = null;
try {
array = SortingCollection.newInstance(ChromPosLine.class, new VariantCodec(), new VariantComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
array.setDestructiveIteration(true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dict);
String line;
while ((line = in.readLine()) != null) {
final ChromPosLine cpl = new ChromPosLine(line);
progress.watch(cpl.tid, cpl.pos);
array.add(cpl);
}
array.doneAdding();
progress.finish();
w = super.openVariantContextWriter(outputFile);
w.writeHeader(h2);
iter = array.iterator();
while (iter.hasNext()) {
w.add(cah.codec.decode(iter.next().line));
if (w.checkError())
break;
}
return RETURN_OK;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(w);
CloserUtil.close(iter);
if (array != null)
array.cleanup();
}
}
use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.
the class GenomicsDBImportIntegrationTest method testPreserveContigOrderingInHeader.
@Test
public void testPreserveContigOrderingInHeader() throws IOException {
final String workspace = createTempDir("testPreserveContigOrderingInHeader-").getAbsolutePath() + "/workspace";
writeToGenomicsDB(Arrays.asList(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting2.g.vcf"), new SimpleInterval("chr20", 17959479, 17959479), workspace, 0, false, 0);
try (final GenomicsDBFeatureReader<VariantContext, PositionalBufferedStream> genomicsDBFeatureReader = new GenomicsDBFeatureReader<>(new File(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME).getAbsolutePath(), new File(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME).getAbsolutePath(), workspace, GenomicsDBConstants.DEFAULT_ARRAY_NAME, b38_reference_20_21, null, new BCF2Codec());
final AbstractFeatureReader<VariantContext, LineIterator> inputGVCFReader = AbstractFeatureReader.getFeatureReader(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", new VCFCodec(), true)) {
final SAMSequenceDictionary dictionaryFromGenomicsDB = ((VCFHeader) genomicsDBFeatureReader.getHeader()).getSequenceDictionary();
final SAMSequenceDictionary dictionaryFromInputGVCF = ((VCFHeader) inputGVCFReader.getHeader()).getSequenceDictionary();
Assert.assertEquals(dictionaryFromGenomicsDB, dictionaryFromInputGVCF, "Sequence dictionary from GenomicsDB does not match original sequence dictionary from input GVCF");
}
}
Aggregations