use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class SortVcfOnInfo method doWork.
@Override
public int doWork(final List<String> args) {
CloseableIterator<VcfLine> iter = null;
VariantContextWriter w = null;
SortingCollection<VcfLine> sorted = null;
LineIterator r = null;
try {
if (args.isEmpty()) {
LOG.info("reading from stdin");
r = IOUtils.openStreamForLineIterator(stdin());
} else if (args.size() == 1) {
String filename = args.get(0);
LOG.info("Reading " + filename);
r = IOUtils.openURIForLineIterator(filename);
} else {
LOG.error("Illegal number of arguments.");
return -1;
}
final VCFUtils.CodecAndHeader ch = VCFUtils.parseHeader(r);
VCFHeader header = ch.header;
this.codec = ch.codec;
this.infoDecl = header.getInfoHeaderLine(this.infoField);
if (this.infoDecl == null) {
final StringBuilder msg = new StringBuilder("VCF doesn't contain the INFO field :" + infoField + ". Available:");
for (VCFInfoHeaderLine vil : header.getInfoHeaderLines()) msg.append(" ").append(vil.getID());
LOG.error(msg.toString());
return -1;
}
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
w = super.openVariantContextWriter(this.outputFile);
w.writeHeader(header);
sorted = SortingCollection.newInstance(VcfLine.class, new VariantCodec(), (V1, V2) -> V1.compareTo(V2), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
sorted.setDestructiveIteration(true);
while (r.hasNext()) {
final VcfLine vc = new VcfLine(r.next());
progress.watch(vc.getContext());
sorted.add(vc);
}
CloserUtil.close(r);
r = null;
sorted.doneAdding();
progress.finish();
LOG.info("now writing...");
iter = sorted.iterator();
while (iter.hasNext()) {
w.add(iter.next().getContext());
}
iter.close();
iter = null;
w.close();
w = null;
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(iter);
try {
if (sorted != null)
sorted.cleanup();
} catch (Exception err) {
}
CloserUtil.close(w);
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VcfGeneOntology method filterVcfIterator.
private void filterVcfIterator(final VcfIterator in) throws IOException {
VariantContextWriter w = null;
try {
VCFHeader header = in.getHeader();
VCFHeader h2 = new VCFHeader(header);
h2.addMetaDataLine(new VCFInfoHeaderLine(TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "GO terms from GO " + GO + " and GOA=" + GOA));
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
if (filterName != null) {
h2.addMetaDataLine(new VCFFilterHeaderLine(filterName, "Flag GO terms " + (inverse_filter ? " not descendant of " : "") + " the provided GO terms"));
}
w = super.openVariantContextWriter(outputFile);
w.writeHeader(h2);
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
final SnpEffPredictionParser snpEffPredictionParser = new SnpEffPredictionParserFactory().header(header).get();
final VepPredictionParser vepPredictionParser = new VepPredictionParserFactory().header(header).get();
while (in.hasNext()) {
if (System.out.checkError())
break;
VariantContext ctx = progess.watch(in.next());
/* symbols for this variant */
Set<String> symbols = new HashSet<String>();
/* scan SNPEFF gene */
for (SnpEffPrediction pred : snpEffPredictionParser.getPredictions(ctx)) {
String genName = pred.getGeneName();
if (genName == null || genName.isEmpty())
continue;
symbols.add(genName);
}
/* scan VEP gene */
for (VepPrediction pred : vepPredictionParser.getPredictions(ctx)) {
String genName = pred.getGeneName();
if (!(genName == null || genName.isEmpty())) {
symbols.add(genName);
}
genName = pred.getGene();
if (!(genName == null || genName.isEmpty())) {
symbols.add(genName);
}
genName = pred.getHGNC();
if (!(genName == null || genName.isEmpty())) {
symbols.add(genName);
}
}
/* only keep known GENES from GOA */
symbols.retainAll(this.name2go.keySet());
boolean found_child_of_filter = false;
/* ATTS */
List<String> atts = new ArrayList<String>();
/* loop over symbols */
for (String symbol : symbols) {
/* go terms associated to this symbol */
Set<GoTree.Term> t2 = this.name2go.get(symbol);
if (t2 == null || t2.isEmpty())
continue;
StringBuilder sb = new StringBuilder(symbol);
sb.append("|");
boolean first = true;
for (GoTree.Term gt : t2) {
/* user gave terms to filter */
if (!found_child_of_filter && this.goTermToFilter != null) {
for (GoTree.Term userTerm : this.goTermToFilter) {
if (userTerm.hasDescendant(gt.getAcn())) {
found_child_of_filter = true;
break;
}
}
}
if (!first)
sb.append("&");
sb.append(gt.getAcn());
first = false;
}
atts.add(sb.toString());
}
/* no go term was found */
if (atts.isEmpty()) {
if (!removeIfNoGo) {
w.add(ctx);
}
continue;
}
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
/* check children of user's terms */
if (this.goTermToFilter != null) {
/* keep if found children*/
if ((this.inverse_filter && found_child_of_filter) || (!this.inverse_filter && !found_child_of_filter)) {
/* don't remove, but set filter */
if (this.filterName != null) {
Set<String> filters = new HashSet<String>(ctx.getFilters());
filters.add(this.filterName);
vcb.filters(filters);
} else {
continue;
}
}
}
/* add go terms */
vcb.attribute(this.TAG, atts);
w.add(vcb.make());
}
progess.finish();
w.close();
w = null;
} finally {
CloserUtil.close(w);
w = null;
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VcfPeekVcf method doVcfToVcf.
/**
* public for knime
*/
@Override
public int doVcfToVcf(final String inputName, final VcfIterator vcfIn, final VariantContextWriter out) {
try {
final Set<String> unmatchedcontigs = new HashSet<>();
final VCFHeader h = vcfIn.getHeader();
final VCFHeader h2 = new VCFHeader(h);
super.addMetaData(h2);
final Map<String, VCFInfoHeaderLine> databaseTags = new HashMap<String, VCFInfoHeaderLine>();
final VCFHeader databaseHeader = this.indexedVcfFileReader.getFileHeader();
final ContigNameConverter nameConverter = (h.getSequenceDictionary() != null && !h.getSequenceDictionary().isEmpty() && databaseHeader.getSequenceDictionary() != null && !databaseHeader.getSequenceDictionary().isEmpty() ? ContigNameConverter.fromDictionaries(h.getSequenceDictionary(), databaseHeader.getSequenceDictionary()) : ContigNameConverter.getIdentity()).setOnNotFound(this.onContigNotFound);
;
for (final String key : this.peek_info_tags) {
VCFInfoHeaderLine hinfo = databaseHeader.getInfoHeaderLine(key);
if (hinfo == null) {
final String msg = "INFO name=" + key + " missing in " + this.resourceVcfFile;
if (this.missingIdIsError) {
LOG.warn(msg);
continue;
} else {
LOG.error(msg);
return -1;
}
}
switch(hinfo.getCountType()) {
case G:
throw new JvarkitException.UserError("Cannot handle VCFHeaderLineCount.G for " + hinfo.getID());
default:
databaseTags.put(hinfo.getID(), hinfo);
break;
}
hinfo = VCFUtils.renameVCFInfoHeaderLine(hinfo, this.peekTagPrefix + key);
if (h2.getInfoHeaderLine(hinfo.getID()) != null) {
throw new JvarkitException.UserError("key " + this.peekTagPrefix + key + " already defined in VCF header");
}
h2.addMetaDataLine(hinfo);
;
}
out.writeHeader(h2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(h).logger(LOG);
while (vcfIn.hasNext()) {
final VariantContext ctx = progress.watch(vcfIn.next());
final String outContig = nameConverter.apply(ctx.getContig());
if (outContig == null) {
unmatchedcontigs.add(ctx.getContig());
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
CloseableIterator<VariantContext> iter = this.indexedVcfFileReader.query(outContig, Math.max(0, ctx.getStart() - 1), (ctx.getEnd() + 1));
while (iter.hasNext()) {
final VariantContext ctx2 = iter.next();
if (!outContig.equals(ctx2.getContig()))
continue;
if (ctx.getStart() != ctx2.getStart())
continue;
if (!ctx.getReference().equals(ctx2.getReference()))
continue;
final boolean okAllele;
switch(altAlleleMatcher) {
case all:
{
okAllele = ctx.getAlternateAlleles().stream().filter(A -> ctx2.hasAlternateAllele(A)).count() == ctx.getAlternateAlleles().size();
break;
}
case at_least_one:
{
okAllele = ctx.getAlternateAlleles().stream().filter(A -> ctx2.hasAlternateAllele(A)).findAny().isPresent();
break;
}
case none:
okAllele = true;
break;
default:
throw new IllegalStateException(altAlleleMatcher.name());
}
if (!okAllele)
continue;
if (this.peekId && ctx2.hasID()) {
vcb.id(ctx2.getID());
}
boolean somethingWasChanged = false;
for (final String key : databaseTags.keySet()) {
if (!ctx2.hasAttribute(key))
continue;
final VCFInfoHeaderLine dbHeader = databaseTags.get(key);
switch(dbHeader.getCountType()) {
case A:
{
final List<Object> newatt = new ArrayList<>();
final List<Object> ctx2att = ctx2.getAttributeAsList(key);
for (int i = 0; i < ctx.getAlternateAlleles().size(); ++i) {
final Allele ctxalt = ctx.getAlternateAllele(i);
int index2 = ctx2.getAlternateAlleles().indexOf(ctxalt);
if (index2 == -1 || index2 >= ctx2att.size()) {
newatt.add(null);
} else {
newatt.add(ctx2att.get(index2));
}
}
if (newatt.stream().filter(Obj -> !(Obj == null || VCFConstants.EMPTY_INFO_FIELD.equals(Obj))).count() > 0) {
vcb.attribute(this.peekTagPrefix + key, newatt);
somethingWasChanged = true;
}
break;
}
case R:
{
final List<Object> newatt = new ArrayList<>();
final List<Object> ctx2att = ctx2.getAttributeAsList(key);
for (int i = 0; i < ctx.getAlleles().size(); ++i) {
final Allele ctxalt = ctx.getAlleles().get(i);
int index2 = ctx2.getAlleleIndex(ctxalt);
if (index2 == -1 || index2 >= ctx2att.size()) {
newatt.add(null);
} else {
newatt.add(ctx2att.get(index2));
}
}
if (newatt.stream().filter(Obj -> !(Obj == null || VCFConstants.EMPTY_INFO_FIELD.equals(Obj))).count() > 0) {
vcb.attribute(this.peekTagPrefix + key, newatt);
somethingWasChanged = true;
}
break;
}
default:
{
final Object o = ctx2.getAttribute(key);
vcb.attribute(this.peekTagPrefix + key, o);
somethingWasChanged = true;
break;
}
}
}
if (somethingWasChanged)
break;
}
iter.close();
iter = null;
out.add(vcb.make());
if (out.checkError())
break;
}
progress.finish();
if (!unmatchedcontigs.isEmpty()) {
LOG.debug("Unmatched contigs: " + unmatchedcontigs.stream().collect(Collectors.joining("; ")));
}
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VcfVcf method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator r, VariantContextWriter w) {
try {
CloseableIterator<VariantContext> iter = null;
LOG.info("opening file: " + this.TABIX);
IndexedVcfFileReader tabix = new IndexedVcfFileReader(this.TABIX);
VCFHeader header3 = tabix.getHeader();
VCFHeader header1 = r.getHeader();
VCFHeader h2 = new VCFHeader(header1.getMetaDataInInputOrder(), header1.getSampleNamesInOrder());
for (String infoId : this.INFO_IDS) {
VCFInfoHeaderLine vihl = header3.getInfoHeaderLine(infoId);
if (vihl == null) {
LOG.warn("Not INFO=" + infoId + " in " + TABIX);
continue;
}
if (h2.getInfoHeaderLine(infoId) != null) {
LOG.warn("Input already contains INFO=" + vihl);
}
h2.addMetaDataLine(vihl);
}
if (ALT_CONFLICT_FLAG != null) {
h2.addMetaDataLine(new VCFInfoHeaderLine(ALT_CONFLICT_FLAG, 1, VCFHeaderLineType.Flag, "conflict ALT allele with " + this.TABIX));
}
w.writeHeader(h2);
while (r.hasNext()) {
VariantContext ctx1 = r.next();
VariantContextBuilder vcb = new VariantContextBuilder(ctx1);
String BEST_ID = null;
boolean best_id_match_alt = false;
List<VariantContext> variantsList = new ArrayList<VariantContext>();
iter = tabix.iterator(ctx1.getChr(), Math.max(0, ctx1.getStart() - 1), (ctx1.getEnd() + 1));
while (iter.hasNext()) {
VariantContext ctx3 = iter.next();
if (!ctx3.getContig().equals(ctx1.getContig()))
continue;
if (ctx3.getStart() != ctx1.getStart())
continue;
if (ctx3.getEnd() != ctx1.getEnd())
continue;
if (ctx1.getReference().equals(ctx3.getReference()) && ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
variantsList.clear();
variantsList.add(ctx3);
break;
} else {
variantsList.add(ctx3);
}
}
CloserUtil.close(iter);
iter = null;
for (VariantContext ctx3 : variantsList) {
if (this.REF_ALLELE_MATTERS && !ctx1.getReference().equals(ctx3.getReference())) {
continue;
}
if (this.ALT_ALLELES_MATTERS && !ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
continue;
}
if (ctx3.getID() != null && this.REPLACE_ID) {
if (BEST_ID != null && best_id_match_alt) {
// nothing
} else {
BEST_ID = ctx3.getID();
best_id_match_alt = ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles());
}
}
for (String id : this.INFO_IDS) {
Object info3 = ctx3.getAttribute(id);
if (info3 == null) {
continue;
}
Object info1 = ctx1.getAttribute(id);
if (info1 != null && !this.REPLACE_INFO_FIELD) {
continue;
}
vcb.attribute(id, info3);
}
if (ALT_CONFLICT_FLAG != null && !ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
vcb.attribute(ALT_CONFLICT_FLAG, true);
}
}
if (BEST_ID != null) {
vcb.id(BEST_ID);
}
w.add(vcb.make());
}
tabix.close();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VCFReplaceTag method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator r, final VariantContextWriter w) {
final VCFHeader header = r.getHeader();
final HashSet<VCFHeaderLine> copyMeta = new HashSet<>(header.getMetaDataInInputOrder());
for (final String key : this.transformMap.keySet()) {
switch(this.replaceTypeNo) {
case // INFO
0:
{
final VCFInfoHeaderLine info = header.getInfoHeaderLine(key);
if (info != null) {
copyMeta.remove(info);
copyMeta.add(VCFUtils.renameVCFInfoHeaderLine(info, this.transformMap.get(key)));
}
break;
}
case // FORMAT
1:
{
final VCFFormatHeaderLine fmt = header.getFormatHeaderLine(key);
if (fmt != null) {
copyMeta.remove(fmt);
copyMeta.add(VCFUtils.renameVCFFormatHeaderLine(fmt, this.transformMap.get(key)));
}
break;
}
case // FILTER
2:
{
final VCFFilterHeaderLine filter = header.getFilterHeaderLine(key);
if (filter != null) {
copyMeta.remove(filter);
copyMeta.add(VCFUtils.renameVCFFilterHeaderLine(filter, this.transformMap.get(key)));
}
break;
}
default:
throw new IllegalStateException("" + this.replaceTypeNo);
}
}
final VCFHeader h2 = new VCFHeader(copyMeta, header.getSampleNamesInOrder());
addMetaData(h2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(h2);
w.writeHeader(h2);
while (r.hasNext()) {
VariantContext ctx = progress.watch(r.next());
VariantContextBuilder b = new VariantContextBuilder(ctx);
switch(this.replaceTypeNo) {
case // INFO
0:
{
for (String key : this.transformMap.keySet()) {
Object o = ctx.getAttribute(key);
if (o != null) {
b.rmAttribute(key);
b.attribute(this.transformMap.get(key), o);
}
}
break;
}
case // FORMAT
1:
{
List<Genotype> newgenotypes = new ArrayList<>(ctx.getNSamples());
for (int i = 0; i < ctx.getNSamples(); ++i) {
Genotype g = ctx.getGenotype(i);
Map<String, Object> atts = g.getExtendedAttributes();
GenotypeBuilder gb = new GenotypeBuilder(g);
for (String key : this.transformMap.keySet()) {
Object o = atts.get(key);
if (o != null) {
atts.remove(key);
atts.put(this.transformMap.get(key), o);
}
}
gb.attributes(atts);
newgenotypes.add(gb.make());
}
b.genotypes(newgenotypes);
break;
}
case // FILTER
2:
{
Set<String> filters = new HashSet<>(ctx.getFilters());
for (String key : this.transformMap.keySet()) {
if (filters.contains(key)) {
filters.remove(key);
filters.add(this.transformMap.get(key));
}
}
b.filters(filters);
break;
}
default:
throw new IllegalStateException("" + this.replaceTypeNo);
}
w.add(b.make());
if (w.checkError())
break;
}
progress.finish();
LOG.info("done");
return 0;
}
Aggregations