use of htsjdk.variant.vcf.VCFInfoHeaderLine in project hmftools by hartwigmedical.
the class BreakPointInspectorApplication method main.
public static void main(final String... args) throws IOException {
final AnalysisBuilder analysisBuilder = new AnalysisBuilder();
final Options options = createOptions();
try {
final CommandLine cmd = createCommandLine(options, args);
final String refPath = cmd.getOptionValue(REF_PATH);
final String refSlicePath = cmd.getOptionValue(REF_SLICE);
final String tumorPath = cmd.getOptionValue(TUMOR_PATH);
final String tumorSlicePath = cmd.getOptionValue(TUMOR_SLICE);
final String vcfPath = cmd.getOptionValue(VCF);
if (cmd.hasOption(PROXIMITY)) {
analysisBuilder.setRange(Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500")));
}
if (cmd.hasOption(CONTAMINATION)) {
analysisBuilder.setContaminationFraction(Float.parseFloat(cmd.getOptionValue(CONTAMINATION, "0")));
}
if (refPath == null || tumorPath == null || vcfPath == null) {
printHelpAndExit(options);
return;
}
final File tumorBAM = new File(tumorPath);
final SamReader tumorReader = SamReaderFactory.makeDefault().open(tumorBAM);
final File refBAM = new File(refPath);
final SamReader refReader = SamReaderFactory.makeDefault().open(refBAM);
final File vcfFile = new File(vcfPath);
final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false);
final List<String> samples = vcfReader.getFileHeader().getGenotypeSamples();
if (samples.size() != 2) {
System.err.println("could not determine tumor and sample from VCF");
System.exit(1);
return;
}
TSVOutput.PrintHeaders();
final Analysis analysis = analysisBuilder.setRefReader(refReader).setTumorReader(tumorReader).createAnalysis();
final List<QueryInterval> combinedQueryIntervals = Lists.newArrayList();
final Map<String, VariantContext> variantMap = new HashMap<>();
final List<VariantContext> variants = Lists.newArrayList();
for (VariantContext variant : vcfReader) {
variantMap.put(variant.getID(), variant);
final VariantContext mateVariant = variant;
if (variant.hasAttribute("MATEID")) {
variant = variantMap.get(variant.getAttributeAsString("MATEID", ""));
if (variant == null) {
continue;
}
}
final String location = variant.getContig() + ":" + Integer.toString(variant.getStart());
final Location location1 = Location.parseLocationString(location, tumorReader.getFileHeader().getSequenceDictionary());
final Range uncertainty1 = extractCIPOS(variant);
final List<Integer> CIEND = variant.getAttributeAsIntList("CIEND", 0);
Range uncertainty2 = CIEND.size() == 2 ? new Range(CIEND.get(0), CIEND.get(1)) : null;
final boolean IMPRECISE = variant.hasAttribute("IMPRECISE");
HMFVariantType svType;
final Location location2;
switch(variant.getStructuralVariantType()) {
case INS:
svType = HMFVariantType.INS;
location2 = location1.set(variant.getAttributeAsInt("END", 0));
break;
case INV:
if (variant.hasAttribute("INV3")) {
svType = HMFVariantType.INV3;
} else if (variant.hasAttribute("INV5")) {
svType = HMFVariantType.INV5;
} else {
System.err.println(variant.getID() + " : expected either INV3 or INV5 flag");
continue;
}
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case DEL:
svType = HMFVariantType.DEL;
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case DUP:
svType = HMFVariantType.DUP;
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case BND:
// process the breakend string
final String call = variant.getAlternateAllele(0).getDisplayString();
final String[] leftSplit = call.split("\\]");
final String[] rightSplit = call.split("\\[");
if (leftSplit.length >= 2) {
location2 = Location.parseLocationString(leftSplit[1], tumorReader.getFileHeader().getSequenceDictionary());
if (leftSplit[0].length() > 0) {
svType = HMFVariantType.INV3;
uncertainty2 = Range.invert(uncertainty1);
} else {
svType = HMFVariantType.DUP;
uncertainty2 = uncertainty1;
}
} else if (rightSplit.length >= 2) {
location2 = Location.parseLocationString(rightSplit[1], tumorReader.getFileHeader().getSequenceDictionary());
if (rightSplit[0].length() > 0) {
svType = HMFVariantType.DEL;
uncertainty2 = uncertainty1;
} else {
svType = HMFVariantType.INV5;
uncertainty2 = Range.invert(uncertainty1);
}
} else {
System.err.println(variant.getID() + " : could not parse breakpoint");
continue;
}
if (IMPRECISE) {
uncertainty2 = extractCIPOS(mateVariant);
}
break;
default:
System.err.println(variant.getID() + " : UNEXPECTED SVTYPE=" + variant.getStructuralVariantType());
continue;
}
final HMFVariantContext ctx = new HMFVariantContext(variant.getID(), location1, location2, svType, IMPRECISE);
ctx.Filter.addAll(variant.getFilters().stream().filter(s -> !s.startsWith("BPI")).collect(Collectors.toSet()));
ctx.Uncertainty1 = uncertainty1;
ctx.Uncertainty2 = ObjectUtils.firstNonNull(uncertainty2, fixup(uncertainty1, IMPRECISE, svType == HMFVariantType.INV3 || svType == HMFVariantType.INV5));
ctx.HomologySequence = variant.getAttributeAsString("HOMSEQ", "");
if (variant.hasAttribute("LEFT_SVINSSEQ") && variant.hasAttribute("RIGHT_SVINSSEQ")) {
ctx.InsertSequence = variant.getAttributeAsString("LEFT_SVINSSEQ", "") + "..." + variant.getAttributeAsString("RIGHT_SVINSSEQ", "");
} else {
ctx.InsertSequence = variant.getAttributeAsString("SVINSSEQ", "");
}
ctx.BND = variant.getStructuralVariantType() == StructuralVariantType.BND;
switch(ctx.Type) {
case INS:
case DEL:
ctx.OrientationBP1 = 1;
ctx.OrientationBP2 = -1;
break;
case INV3:
ctx.OrientationBP1 = 1;
ctx.OrientationBP2 = 1;
break;
case INV5:
ctx.OrientationBP1 = -1;
ctx.OrientationBP2 = -1;
break;
case DUP:
ctx.OrientationBP1 = -1;
ctx.OrientationBP2 = 1;
break;
}
final StructuralVariantResult result = analysis.processStructuralVariant(ctx);
combinedQueryIntervals.addAll(asList(result.QueryIntervals));
TSVOutput.print(variant, ctx, result);
final BiConsumer<VariantContext, Boolean> vcfUpdater = (v, swap) -> {
final Set<String> filters = v.getCommonInfo().getFiltersMaybeNull();
if (filters != null) {
filters.clear();
}
// we will map BreakpointError to a flag
if (result.Filters.contains(Filter.Filters.BreakpointError.toString())) {
v.getCommonInfo().putAttribute("BPI_AMBIGUOUS", true, true);
} else {
v.getCommonInfo().addFilters(result.Filters);
}
if (result.Filters.isEmpty()) {
final List<Double> af = asList(result.AlleleFrequency.getLeft(), result.AlleleFrequency.getRight());
v.getCommonInfo().putAttribute(AlleleFrequency.VCF_INFO_TAG, swap ? Lists.reverse(af) : af, true);
}
if (result.Breakpoints.getLeft() != null) {
v.getCommonInfo().putAttribute(swap ? "BPI_END" : "BPI_START", result.Breakpoints.getLeft().Position, true);
}
if (result.Breakpoints.getRight() != null) {
v.getCommonInfo().putAttribute(swap ? "BPI_START" : "BPI_END", result.Breakpoints.getRight().Position, true);
}
// remove CIPOS / CIEND when we have an insert sequence
if (!v.hasAttribute("IMPRECISE") && v.hasAttribute("SVINSSEQ")) {
v.getCommonInfo().removeAttribute("CIPOS");
v.getCommonInfo().removeAttribute("CIEND");
}
variants.add(v);
};
vcfUpdater.accept(variant, false);
if (mateVariant != variant) {
vcfUpdater.accept(mateVariant, true);
}
}
// TODO: update START, END with BPI values and save Manta values in new attributes
final String vcfOutputPath = cmd.getOptionValue(VCF_OUT);
if (vcfOutputPath != null) {
final VCFHeader header = vcfReader.getFileHeader();
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_START", 1, VCFHeaderLineType.Integer, "BPI adjusted breakend location"));
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_END", 1, VCFHeaderLineType.Integer, "BPI adjusted breakend location"));
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_AMBIGUOUS", 0, VCFHeaderLineType.Flag, "BPI could not determine the breakpoints, inspect manually"));
header.addMetaDataLine(new VCFHeaderLine("bpiVersion", BreakPointInspectorApplication.class.getPackage().getImplementationVersion()));
Filter.UpdateVCFHeader(header);
AlleleFrequency.UpdateVCFHeader(header);
// setup VCF
final VariantContextWriter writer = new VariantContextWriterBuilder().setReferenceDictionary(header.getSequenceDictionary()).setOutputFile(vcfOutputPath).build();
writer.writeHeader(header);
// write variants
variants.sort(new VariantContextComparator(header.getSequenceDictionary()));
variants.forEach(writer::add);
writer.close();
}
final QueryInterval[] optimizedIntervals = QueryInterval.optimizeIntervals(combinedQueryIntervals.toArray(new QueryInterval[combinedQueryIntervals.size()]));
if (tumorSlicePath != null) {
writeToSlice(tumorSlicePath, tumorReader, optimizedIntervals);
}
if (refSlicePath != null) {
writeToSlice(refSlicePath, refReader, optimizedIntervals);
}
refReader.close();
tumorReader.close();
} catch (ParseException e) {
printHelpAndExit(options);
System.exit(1);
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project ASCIIGenome by dariober.
the class GenotypeMatrix method formatJsScriptWithInfo.
/**
* Replace INFO tags in the jsScript with the actual values found in the variant context object
*/
@SuppressWarnings("unchecked")
private String formatJsScriptWithInfo(String jsScript, VariantContext ctx, VCFHeader vcfHeader) {
Iterator<VCFInfoHeaderLine> iter = vcfHeader.getInfoHeaderLines().iterator();
while (iter.hasNext()) {
// We iterate through each key in the header and see if there is a match in JS script.
VCFInfoHeaderLine headerLine = iter.next();
String key = headerLine.getID();
if (jsScript.contains('{' + key + '}') || jsScript.contains("{INFO/" + key + '}')) {
Object unkValue = ctx.getAttributes().get(key);
String fmtValue;
try {
List<Object> unknList = (List<Object>) unkValue;
StringBuilder listParam = new StringBuilder();
listParam.append("[");
for (Object unk : unknList) {
listParam.append(this.formatObjectForJS(key, unk, vcfHeader.getInfoHeaderLine(key).getType()) + ", ");
}
fmtValue = listParam.append("]").toString();
} catch (ClassCastException e) {
fmtValue = this.formatObjectForJS(key, unkValue, vcfHeader.getInfoHeaderLine(key).getType());
} catch (NullPointerException e) {
if (headerLine.getType().equals(VCFHeaderLineType.Flag)) {
// A flag type returns null if the flag is missing, which is odd. Shouldn't it return false?
fmtValue = "false";
} else {
fmtValue = "null";
}
}
jsScript = jsScript.replace("{INFO/" + key + '}', fmtValue);
jsScript = jsScript.replace('{' + key + '}', fmtValue);
}
}
return jsScript;
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VcfBiomart method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator iter, final VariantContextWriter out) {
HttpGet httpGet = null;
final Pattern tab = Pattern.compile("[\t]");
try {
final TransformerFactory factory = TransformerFactory.newInstance();
final Transformer transformer = factory.newTransformer();
// transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
final VCFHeader header = iter.getHeader();
StringBuilder desc = new StringBuilder("Biomart query. Format: ");
desc.append(this.attributes.stream().map(S -> this.printLabels ? S + "|" + S : S).collect(Collectors.joining("|")));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header.addMetaDataLine(new VCFInfoHeaderLine(this.TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, desc.toString()));
out.writeHeader(header);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
while (iter.hasNext()) {
final VariantContext ctx = progress.watch(iter.next());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.rmAttribute(this.TAG);
this.filterColumnContig.set(ctx.getContig());
this.filterColumnStart.set(String.valueOf(ctx.getStart()));
this.filterColumnEnd.set(String.valueOf(ctx.getEnd()));
final StringWriter domToStr = new StringWriter();
transformer.transform(new DOMSource(this.domQuery), new StreamResult(domToStr));
final URIBuilder builder = new URIBuilder(this.serviceUrl);
builder.addParameter("query", domToStr.toString());
// System.err.println("\nwget -O - 'http://grch37.ensembl.org/biomart/martservice?query="+escapedQuery+"'\n");
// escapedQuery = URLEncoder.encode(escapedQuery,"UTF-8");
httpGet = new HttpGet(builder.build());
final CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
int responseCode = httpResponse.getStatusLine().getStatusCode();
if (responseCode != 200) {
throw new RuntimeIOException("Response code was not 200. Detected response was " + responseCode);
}
InputStream response = httpResponse.getEntity().getContent();
if (this.teeResponse) {
response = new TeeInputStream(response, stderr(), false);
}
final BufferedReader br = new BufferedReader(new InputStreamReader(response));
final Set<String> infoAtts = br.lines().filter(L -> !StringUtil.isBlank(L)).filter(L -> !L.equals("[success]")).map(L -> tab.split(L)).map(T -> {
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < this.attributes.size(); i++) {
if (i > 0)
sb.append("|");
if (this.printLabels)
sb.append(escapeInfo(this.attributes.get(i))).append("|");
sb.append(i < T.length ? escapeInfo(T[i]) : "");
}
return sb.toString();
}).collect(Collectors.toCollection(LinkedHashSet::new));
CloserUtil.close(br);
CloserUtil.close(response);
CloserUtil.close(httpResponse);
if (!infoAtts.isEmpty()) {
vcb.attribute(this.TAG, new ArrayList<>(infoAtts));
}
out.add(vcb.make());
}
progress.finish();
return 0;
} catch (final Exception err) {
LOG.error(err);
throw new RuntimeIOException(err);
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VcfEnsemblVepRest method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator vcfIn, final VariantContextWriter out) {
try {
final VCFHeader header = vcfIn.getHeader();
final List<VariantContext> buffer = new ArrayList<>(this.batchSize + 1);
final VCFHeader h2 = new VCFHeader(header);
addMetaData(h2);
for (final String tag : this.outputTags) {
h2.addMetaDataLine(new VCFInfoHeaderLine(tag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "__CUSTOM_DESCRIPTION__" + tag));
}
out.writeHeader(h2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
for (; ; ) {
VariantContext ctx = null;
if (vcfIn.hasNext()) {
buffer.add((ctx = progress.watch(vcfIn.next())));
}
if (ctx == null || buffer.size() >= this.batchSize) {
if (!buffer.isEmpty()) {
final Map<String, EnsVepPrediction> input2pred = vep(buffer);
for (final VariantContext ctx2 : buffer) {
final String inputStr = createInputContext(ctx2);
final EnsVepPrediction pred = input2pred.get(inputStr);
final VariantContextBuilder vcb = new VariantContextBuilder(ctx2);
for (final String tag : this.outputTags) {
vcb.rmAttribute(tag);
}
if (pred == null) {
LOG.info("No Annotation found for " + inputStr);
out.add(vcb.make());
continue;
}
for (final String tag : this.outputTags) {
final Set<String> info = pred.tag2infoLines.get(tag);
if (info == null || info.isEmpty())
continue;
vcb.attribute(tag, new ArrayList<>(info));
}
out.add(vcb.make());
}
// end of loop over variants
}
// end of if buffer is not empty
if (ctx == null)
break;
buffer.clear();
}
}
progress.finish();
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VcfEnsemblReg method annotate.
private void annotate(Track track, File inf, File outf) throws IOException {
boolean contained = false;
LOG.info("Processing " + track.id + " (" + track.shortLabel + ") " + track.url);
VcfIterator in = VCFUtils.createVcfIteratorFromFile(inf);
VCFHeader header = in.getHeader();
VCFInfoHeaderLine info = null;
SeekableStream sstream = SeekableStreamFactory.getInstance().getStreamFor(track.url);
BBFileReader bigFile = new BBFileReader(track.url.toString(), new SeekableStreamAdaptor(sstream));
VariantContextWriter w1 = VCFUtils.createVariantContextWriter(outf);
if (bigFile.isBigWigFile()) {
info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.Float, String.valueOf(track.longLabel) + " " + track.url);
} else {
info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.String, String.valueOf(track.longLabel) + " " + track.url);
}
header.addMetaDataLine(info);
w1.writeHeader(in.getHeader());
while (in.hasNext()) {
VariantContext ctx = in.next();
String chrom = ctx.getContig();
if (!chrom.startsWith("chr"))
chrom = "chr" + chrom;
if (!chrom.matches("(chrX|chrY|chr[0-9]|chr1[0-9]|chr2[12])")) {
w1.add(ctx);
} else if (bigFile.isBigWigFile()) {
BigWigIterator iter = bigFile.getBigWigIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
Float wigValue = null;
while (iter != null && iter.hasNext() && wigValue == null) {
WigItem item = iter.next();
wigValue = item.getWigValue();
}
if (wigValue == null) {
w1.add(ctx);
continue;
}
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(track.id, wigValue);
w1.add(vcb.make());
} else {
BigBedIterator iter = bigFile.getBigBedIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
Set<String> bedValues = new HashSet<String>();
while (iter != null && iter.hasNext()) {
BedFeature item = iter.next();
String[] rest = item.getRestOfFields();
if (rest == null || rest.length != 6) {
System.err.println(track.id + " " + Arrays.toString(item.getRestOfFields()));
continue;
}
String color = null;
if (track.parent != null) {
if (track.parent.startsWith("Segway_17SegmentationSummaries")) {
color = segway_17SegmentationSummaries(rest[5]);
} else if (track.parent.startsWith("ProjectedSegments")) {
color = projectedSegments(rest[5]);
} else if (track.parent.startsWith("RegBuildOverview")) {
color = regBuildOverview(rest[5]);
} else if (track.parent.startsWith("Segway_17CellSegments")) {
color = segway_17CellSegments(rest[5]);
} else {
System.err.println("Unknown parent:" + track.parent);
}
}
if (color == null)
continue;
bedValues.add(rest[0] + "|" + color);
}
if (bedValues.isEmpty()) {
w1.add(ctx);
continue;
}
StringBuilder sb = new StringBuilder();
for (String s : bedValues) {
if (sb.length() != 0)
sb.append(",");
sb.append(s);
}
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(track.id, sb.toString());
w1.add(vcb.make());
}
}
sstream.close();
in.close();
w1.close();
}
Aggregations