use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class GenotypeBinding method entryToObject.
@Override
public Genotype entryToObject(TupleInput in) {
GenotypeBuilder gb = new GenotypeBuilder(in.readString());
if (in.readBoolean())
gb.DP(in.readInt());
if (in.readBoolean())
gb.AD(arrayOfIntToEntry(in));
if (in.readBoolean())
gb.GQ(in.readInt());
if (in.readBoolean())
gb.PL(arrayOfIntToEntry(in));
/* ALLELES ======================================== */
int n = in.readInt();
List<Allele> alleles = new ArrayList<Allele>(n);
for (int i = 0; i < n; ++i) {
alleles.add(this.alleleBinding.entryToObject(in));
}
gb.alleles(alleles);
/* ATTRIBUTES ===================================== */
n = in.readInt();
for (int i = 0; i < n; ++i) {
String key = in.readString();
gb.attribute(key, super.readAttribute(in));
}
return gb.make();
}
use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class VariantContextBinding method entryToObject.
@Override
public VariantContext entryToObject(TupleInput in) {
VariantContextBuilder vcb = new VariantContextBuilder();
vcb.chr(in.readString());
vcb.start(in.readInt());
vcb.stop(in.readInt());
if (in.readBoolean()) {
vcb.id(in.readString());
}
/* ALLELES ======================== */
int n = in.readInt();
List<Allele> alleles = new ArrayList<Allele>(n);
for (int i = 0; i < n; ++i) {
alleles.add(this.alleleBinding.entryToObject(in));
}
vcb.alleles(alleles);
/* QUAL ======================== */
if (in.readBoolean()) {
vcb.log10PError(in.readDouble());
}
/* FILTERS ======================== */
int n_filters = in.readInt();
Set<String> filters = new HashSet<String>(n_filters);
for (int i = 0; i < n_filters; ++i) {
filters.add(in.readString());
}
vcb.filters(filters);
/* INFO ======================== */
int n_infokeys = in.readInt();
Map<String, Object> hash = new HashMap<String, Object>(n_infokeys);
for (int i = 0; i < n_infokeys; ++i) {
String key = in.readString();
Object value = super.readAttribute(in);
hash.put(key, value);
}
vcb.attributes(hash);
/* GENOTYPES ======================== */
int n_genotypes = in.readInt();
List<Genotype> genotypes = new ArrayList<Genotype>(n_genotypes);
for (int i = 0; i < n_genotypes; ++i) {
genotypes.add(this.genotypeBinding.entryToObject(in));
}
vcb.genotypes(genotypes);
return vcb.make();
}
use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class VcfCalledWithAnotherMethod method doVcfToVcf.
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
final List<ExternalVcf> externalVcfs = new ArrayList<>();
try {
final VCFHeader header = in.getHeader();
this.dictionary = header.getSequenceDictionary();
/**
* open primitive input
*/
if (this.dictionary == null) {
LOG.error("no dictionary in input");
return -1;
}
final Set<File> samtoolsFiles = new HashSet<>();
this.externalVcfStrs.stream().filter(S -> S.endsWith(".list")).map(S -> Paths.get(S)).forEach(P -> {
try {
samtoolsFiles.addAll(Files.readAllLines(P).stream().filter(L -> !(L.startsWith("#") || L.trim().isEmpty())).map(S -> new File(S)).collect(Collectors.toSet()));
} catch (final Exception err) {
throw new RuntimeIOException(err);
}
});
samtoolsFiles.addAll(this.externalVcfStrs.stream().filter(S -> !S.endsWith(".list")).map(S -> new File(S)).collect(Collectors.toSet()));
externalVcfs.addAll(samtoolsFiles.stream().map(F -> new ExternalVcf(F)).collect(Collectors.toList()));
/**
* check for uniq keys
*/
final Set<String> uniqKeys = new HashSet<>();
for (final ExternalVcf extvcf : externalVcfs) {
int n = 0;
for (; ; ) {
final String newkey = extvcf.key + (n == 0 ? "" : "_" + n);
if (!uniqKeys.contains(newkey)) {
extvcf.key = newkey;
uniqKeys.add(newkey);
break;
}
++n;
}
}
final VCFHeader h2 = new VCFHeader(header);
for (final ExternalVcf extvcf : externalVcfs) {
h2.addMetaDataLine(new VCFHeaderLine("otherVcf:" + extvcf.key, extvcf.vcfFile.getPath()));
}
final VCFFilterHeaderLine variantNotFoundElsewhereFILTER = (filterNameVariantNotFoundElseWhere.isEmpty() ? null : new VCFFilterHeaderLine(filterNameVariantNotFoundElseWhere, "Variant Was not found in other VCFs: " + externalVcfs.stream().map(S -> S.vcfFile.getPath()).collect(Collectors.joining(", "))));
if (variantNotFoundElsewhereFILTER != null) {
h2.addMetaDataLine(variantNotFoundElsewhereFILTER);
}
final VCFInfoHeaderLine variantFoundElseWhereKeys = new VCFInfoHeaderLine(this.infoFoundKey, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Variant was found in the VCFs designed by those keys");
h2.addMetaDataLine(variantFoundElseWhereKeys);
final VCFInfoHeaderLine variantFoundElseWhereCount = new VCFInfoHeaderLine(this.infoFoundKey, 1, VCFHeaderLineType.Integer, "Number of times the Variant was found in the VCFs");
h2.addMetaDataLine(variantFoundElseWhereCount);
final VCFFormatHeaderLine genotypeCountSame = new VCFFormatHeaderLine(this.formatCountSame, 1, VCFHeaderLineType.Integer, "Number of times the Genotype was found the same in other VCFs");
h2.addMetaDataLine(genotypeCountSame);
final VCFFormatHeaderLine genotypeCountDiscordant = new VCFFormatHeaderLine(this.formatCountDiscordant, 1, VCFHeaderLineType.Integer, "Number of times the Genotype was found dicordant in other VCFs");
h2.addMetaDataLine(genotypeCountDiscordant);
super.addMetaData(h2);
final VariantContextWriter w = super.openVariantContextWriter(outputFile);
w.writeHeader(h2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dictionary);
while (in.hasNext()) {
final VariantContext ctx = progress.watch(in.next());
final List<GenotypeCount> genotypeCounts = ctx.getGenotypes().stream().map(G -> new GenotypeCount(G)).collect(Collectors.toList());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
final Set<String> variantFoundInOtherVcfs = new HashSet<>();
for (final ExternalVcf extvcf : externalVcfs) {
final List<VariantContext> otherVariants = extvcf.get(ctx);
if (otherVariants.stream().filter(CTX2 -> {
if (ctx.getAlternateAlleles().isEmpty())
return true;
for (final Allele a : ctx.getAlternateAlleles()) {
if (CTX2.hasAllele(a))
return true;
}
return false;
}).findAny().isPresent()) {
variantFoundInOtherVcfs.add(extvcf.key);
}
for (final GenotypeCount gt : genotypeCounts) {
for (final VariantContext otherVariant : otherVariants) {
final Genotype otherGt = otherVariant.getGenotype(gt.gt.getSampleName());
if (otherGt == null)
continue;
if (gt.gt.sameGenotype(otherGt) || (this.noCallSameAsHomRef && ((gt.gt.isNoCall() && otherGt.isHomRef()) || (gt.gt.isHomRef() && otherGt.isNoCall())))) {
gt.countSame++;
} else {
gt.countDiscordant++;
}
}
}
}
vcb.genotypes(genotypeCounts.stream().map(G -> {
final GenotypeBuilder gb = new GenotypeBuilder(G.gt);
gb.attribute(genotypeCountSame.getID(), G.countSame);
gb.attribute(genotypeCountDiscordant.getID(), G.countDiscordant);
return gb.make();
}).collect(Collectors.toList()));
vcb.attribute(variantFoundElseWhereCount.getID(), variantFoundInOtherVcfs.size());
if (variantFoundInOtherVcfs.isEmpty()) {
if (variantNotFoundElsewhereFILTER != null) {
vcb.filter(variantNotFoundElsewhereFILTER.getID());
}
} else {
if (variantNotFoundElsewhereFILTER != null && !ctx.isFiltered()) {
vcb.passFilters();
}
vcb.attribute(variantFoundElseWhereKeys.getID(), new ArrayList<>(variantFoundInOtherVcfs));
}
w.add(vcb.make());
}
progress.finish();
while (!externalVcfs.isEmpty()) externalVcfs.remove(0).close();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
while (!externalVcfs.isEmpty()) externalVcfs.remove(0).close();
}
}
use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class VcfCutSamples method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
VCFHeader header = in.getHeader();
final Set<String> samples1 = new HashSet<String>(header.getSampleNamesInOrder());
for (String my : this.getUserSamples()) {
if (!samples1.contains(my)) {
String msg = "user sample " + my + " is not present in VCF Header : " + samples1;
if (this.missing_sample_is_error) {
throw new RuntimeException(msg);
} else {
LOG.warning(msg);
}
}
}
final List<String> samples2 = new ArrayList<String>();
for (final String sample : header.getSampleNamesInOrder()) {
if (this.getUserSamples().contains(sample)) {
if (!invert) {
samples2.add(sample);
}
} else {
if (invert) {
samples2.add(sample);
}
}
}
final VCFHeader header2 = new VCFHeader(header.getMetaDataInInputOrder(), samples2);
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
this.recalculator.setHeader(header2);
out.writeHeader(header2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
while (in.hasNext()) {
final VariantContext ctx = progress.watch(in.next());
final VariantContextBuilder vb = new VariantContextBuilder(ctx);
final List<Genotype> genotypes = new ArrayList<Genotype>();
final Set<Allele> alleles = new HashSet<Allele>();
boolean only_no_call = true;
for (final String sample : samples2) {
final Genotype g = ctx.getGenotype(sample);
if (g.isNoCall() || !g.isCalled())
continue;
alleles.addAll(g.getAlleles());
genotypes.add(g);
if (g.isCalled())
only_no_call = false;
}
if (removeCtxIfNoCall && only_no_call)
continue;
alleles.add(ctx.getReference());
vb.alleles(alleles);
vb.genotypes(genotypes);
out.add(this.recalculator.apply(vb.make()));
}
progress.finish();
return 0;
}
use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class IndexCovToVcf method doWork.
@Override
public int doWork(final List<String> args) {
if (this.deletionTreshold >= 1.0f) {
LOG.error("Bad deletion treshold >=1.0");
return -1;
}
if (this.duplicationTreshold <= 1.0f) {
LOG.error("Bad duplication treshold <=1.0");
return -1;
}
if (this.deletionTreshold >= this.duplicationTreshold) {
LOG.error("Bad tresholds del>=dup");
return -1;
}
final Pattern tab = Pattern.compile("[\t]");
BufferedReader r = null;
VariantContextWriter vcw = null;
try {
final SAMSequenceDictionary dict;
if (this.refFile == null) {
dict = null;
} else {
dict = SAMSequenceDictionaryExtractor.extractDictionary(this.refFile);
if (dict == null) {
LOG.error("Cannot find dict in " + this.refFile);
return -1;
}
}
r = super.openBufferedReader(oneFileOrNull(args));
String line = r.readLine();
if (line == null) {
LOG.error("Cannot read first line of input");
return -1;
}
String[] tokens = tab.split(line);
if (tokens.length < 4 || !tokens[0].equals("#chrom") || !tokens[1].equals("start") || !tokens[2].equals("end")) {
LOG.error("bad first line " + line);
return -1;
}
final Set<VCFHeaderLine> metaData = new HashSet<>();
VCFStandardHeaderLines.addStandardFormatLines(metaData, true, "GT");
VCFStandardHeaderLines.addStandardInfoLines(metaData, true, "END");
final VCFFormatHeaderLine foldHeader = new VCFFormatHeaderLine("F", 1, VCFHeaderLineType.Float, "Relative number of copy: 0.5 deletion 1 normal 2.0 duplication");
metaData.add(foldHeader);
final VCFFormatHeaderLine formatIsDeletion = new VCFFormatHeaderLine("DEL", 1, VCFHeaderLineType.Integer, "set to 1 if relative number of copy <= " + this.deletionTreshold);
metaData.add(formatIsDeletion);
final VCFFormatHeaderLine formatIsDuplication = new VCFFormatHeaderLine("DUP", 1, VCFHeaderLineType.Integer, "set to 1 if relative number of copy >= " + this.duplicationTreshold);
metaData.add(formatIsDuplication);
final VCFFilterHeaderLine filterAllDel = new VCFFilterHeaderLine("ALL_DEL", "number of samples greater than 1 and all are deletions");
metaData.add(filterAllDel);
final VCFFilterHeaderLine filterAllDup = new VCFFilterHeaderLine("ALL_DUP", "number of samples greater than 1 and all are duplication");
metaData.add(filterAllDup);
final VCFFilterHeaderLine filterNoSV = new VCFFilterHeaderLine("NO_SV", "There is no DUP or DEL in this variant");
metaData.add(filterNoSV);
final VCFInfoHeaderLine infoNumDup = new VCFInfoHeaderLine("NDUP", 1, VCFHeaderLineType.Integer, "Number of samples being duplicated");
metaData.add(infoNumDup);
final VCFInfoHeaderLine infoNumDel = new VCFInfoHeaderLine("NDEL", 1, VCFHeaderLineType.Integer, "Number of samples being deleted");
metaData.add(infoNumDel);
final List<String> samples = Arrays.asList(tokens).subList(3, tokens.length);
final VCFHeader vcfHeader = new VCFHeader(metaData, samples);
if (dict != null) {
vcfHeader.setSequenceDictionary(dict);
}
vcw = super.openVariantContextWriter(outputFile);
vcw.writeHeader(vcfHeader);
// final List<Allele> NO_CALL_NO_CALL = Arrays.asList(Allele.NO_CALL,Allele.NO_CALL);
final Allele DUP_ALLELE = Allele.create("<DUP>", false);
final Allele DEL_ALLELE = Allele.create("<DEL>", false);
final Allele REF_ALLELE = Allele.create("N", true);
while ((line = r.readLine()) != null) {
if (StringUtil.isBlank(line))
continue;
tokens = tab.split(line);
if (tokens.length != 3 + samples.size()) {
throw new JvarkitException.TokenErrors("expected " + (samples.size() + 3) + "columns.", tokens);
}
final Set<Allele> alleles = new HashSet<>();
alleles.add(REF_ALLELE);
final VariantContextBuilder vcb = new VariantContextBuilder();
vcb.chr(tokens[0]);
vcb.start(Integer.parseInt(tokens[1]));
final int chromEnd = Integer.parseInt(tokens[2]);
vcb.stop(chromEnd);
vcb.attribute(VCFConstants.END_KEY, chromEnd);
if (dict != null) {
final SAMSequenceRecord ssr = dict.getSequence(tokens[0]);
if (ssr == null) {
LOG.error(JvarkitException.ContigNotFoundInDictionary.getMessage(tokens[0], dict));
return -1;
}
if (chromEnd > ssr.getSequenceLength()) {
LOG.warn("WARNING sequence length in " + line + " is greater than in dictionary ");
}
}
int count_dup = 0;
int count_del = 0;
final List<Genotype> genotypes = new ArrayList<>(samples.size());
for (int i = 3; i < tokens.length; i++) {
final float f = Float.parseFloat(tokens[i]);
if (f < 0 || Float.isNaN(f) || !Float.isFinite(f)) {
LOG.error("Bad fold " + f + " in " + line);
}
final GenotypeBuilder gb;
if (f <= this.deletionTreshold) {
gb = new GenotypeBuilder(samples.get(i - 3), Collections.singletonList(DEL_ALLELE));
alleles.add(DEL_ALLELE);
gb.attribute(formatIsDeletion.getID(), 1);
count_del++;
} else if (f >= this.duplicationTreshold) {
gb = new GenotypeBuilder(samples.get(i - 3), Collections.singletonList(DUP_ALLELE));
alleles.add(DUP_ALLELE);
gb.attribute(formatIsDuplication.getID(), 1);
count_dup++;
} else {
gb = new GenotypeBuilder(samples.get(i - 3), Collections.singletonList(REF_ALLELE));
gb.attribute(formatIsDuplication.getID(), 0);
}
gb.attribute(foldHeader.getID(), f);
genotypes.add(gb.make());
}
vcb.alleles(alleles);
vcb.genotypes(genotypes);
if (count_dup == samples.size() && samples.size() != 1) {
vcb.filter(filterAllDup.getID());
}
if (count_del == samples.size() && samples.size() != 1) {
vcb.filter(filterAllDel.getID());
}
if (count_dup == 0 && count_del == 0) {
vcb.filter(filterNoSV.getID());
}
vcb.attribute(infoNumDel.getID(), count_del);
vcb.attribute(infoNumDup.getID(), count_dup);
vcw.add(vcb.make());
}
vcw.close();
vcw = null;
r.close();
r = null;
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(vcw);
}
}
Aggregations