use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class TestUtils method createRandomPedigreeFromFile.
protected File createRandomPedigreeFromFile(final String vcfFile) throws IOException {
class TmpSample {
String fam = "F1";
String name;
String father = "0";
String mother = "0";
int sex = 0;
int status = 0;
}
final File vcfIn = new File(vcfFile);
final VCFFileReader r = new VCFFileReader(vcfIn, false);
final VCFHeader header = r.getFileHeader();
r.close();
final List<String> samples = new ArrayList<>(header.getSampleNamesInOrder());
final List<TmpSample> ped = new ArrayList<>();
if (samples.isEmpty())
return null;
while (!samples.isEmpty()) {
final TmpSample indi = new TmpSample();
indi.name = samples.remove(0);
indi.fam = ped.stream().filter(P -> P.father.equals(indi.name) || P.mother.equals(indi.name)).map(P -> P.fam).findFirst().orElse("F" + this.random.nextInt());
if (ped.stream().anyMatch(P -> P.father.equals(indi.name))) {
indi.sex = 1;
} else if (ped.stream().anyMatch(P -> P.mother.equals(indi.name))) {
indi.sex = 2;
} else {
indi.sex = this.random.nextBoolean() ? 1 : 2;
}
if (random.nextBoolean()) {
final List<String> remain = new ArrayList<>(samples);
if (!remain.isEmpty()) {
indi.father = remain.remove(0);
} else {
indi.father = "0";
}
if (!remain.isEmpty()) {
indi.mother = remain.remove(0);
} else {
indi.mother = "0";
}
} else {
indi.father = "0";
indi.mother = "0";
}
indi.status = this.random.nextInt(2);
ped.add(indi);
}
final File pedFile = createTmpFile(".ped");
final PrintWriter pw = new PrintWriter(pedFile);
for (TmpSample ts : ped) {
pw.print(ts.fam);
pw.print('\t');
pw.print(ts.name);
pw.print('\t');
pw.print(ts.father);
pw.print('\t');
pw.print(ts.mother);
pw.print('\t');
pw.print(ts.sex);
pw.print('\t');
pw.print(ts.status);
pw.println();
}
pw.flush();
pw.close();
return pedFile;
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class VcfBiomart method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator iter, final VariantContextWriter out) {
HttpGet httpGet = null;
final Pattern tab = Pattern.compile("[\t]");
try {
final TransformerFactory factory = TransformerFactory.newInstance();
final Transformer transformer = factory.newTransformer();
// transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
final VCFHeader header = iter.getHeader();
StringBuilder desc = new StringBuilder("Biomart query. Format: ");
desc.append(this.attributes.stream().map(S -> this.printLabels ? S + "|" + S : S).collect(Collectors.joining("|")));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header.addMetaDataLine(new VCFInfoHeaderLine(this.TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, desc.toString()));
out.writeHeader(header);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
while (iter.hasNext()) {
final VariantContext ctx = progress.watch(iter.next());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.rmAttribute(this.TAG);
this.filterColumnContig.set(ctx.getContig());
this.filterColumnStart.set(String.valueOf(ctx.getStart()));
this.filterColumnEnd.set(String.valueOf(ctx.getEnd()));
final StringWriter domToStr = new StringWriter();
transformer.transform(new DOMSource(this.domQuery), new StreamResult(domToStr));
final URIBuilder builder = new URIBuilder(this.serviceUrl);
builder.addParameter("query", domToStr.toString());
// System.err.println("\nwget -O - 'http://grch37.ensembl.org/biomart/martservice?query="+escapedQuery+"'\n");
// escapedQuery = URLEncoder.encode(escapedQuery,"UTF-8");
httpGet = new HttpGet(builder.build());
final CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
int responseCode = httpResponse.getStatusLine().getStatusCode();
if (responseCode != 200) {
throw new RuntimeIOException("Response code was not 200. Detected response was " + responseCode);
}
InputStream response = httpResponse.getEntity().getContent();
if (this.teeResponse) {
response = new TeeInputStream(response, stderr(), false);
}
final BufferedReader br = new BufferedReader(new InputStreamReader(response));
final Set<String> infoAtts = br.lines().filter(L -> !StringUtil.isBlank(L)).filter(L -> !L.equals("[success]")).map(L -> tab.split(L)).map(T -> {
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < this.attributes.size(); i++) {
if (i > 0)
sb.append("|");
if (this.printLabels)
sb.append(escapeInfo(this.attributes.get(i))).append("|");
sb.append(i < T.length ? escapeInfo(T[i]) : "");
}
return sb.toString();
}).collect(Collectors.toCollection(LinkedHashSet::new));
CloserUtil.close(br);
CloserUtil.close(response);
CloserUtil.close(httpResponse);
if (!infoAtts.isEmpty()) {
vcb.attribute(this.TAG, new ArrayList<>(infoAtts));
}
out.add(vcb.make());
}
progress.finish();
return 0;
} catch (final Exception err) {
LOG.error(err);
throw new RuntimeIOException(err);
}
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class VcfEnsemblVepRest method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator vcfIn, final VariantContextWriter out) {
try {
final VCFHeader header = vcfIn.getHeader();
final List<VariantContext> buffer = new ArrayList<>(this.batchSize + 1);
final VCFHeader h2 = new VCFHeader(header);
addMetaData(h2);
for (final String tag : this.outputTags) {
h2.addMetaDataLine(new VCFInfoHeaderLine(tag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "__CUSTOM_DESCRIPTION__" + tag));
}
out.writeHeader(h2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
for (; ; ) {
VariantContext ctx = null;
if (vcfIn.hasNext()) {
buffer.add((ctx = progress.watch(vcfIn.next())));
}
if (ctx == null || buffer.size() >= this.batchSize) {
if (!buffer.isEmpty()) {
final Map<String, EnsVepPrediction> input2pred = vep(buffer);
for (final VariantContext ctx2 : buffer) {
final String inputStr = createInputContext(ctx2);
final EnsVepPrediction pred = input2pred.get(inputStr);
final VariantContextBuilder vcb = new VariantContextBuilder(ctx2);
for (final String tag : this.outputTags) {
vcb.rmAttribute(tag);
}
if (pred == null) {
LOG.info("No Annotation found for " + inputStr);
out.add(vcb.make());
continue;
}
for (final String tag : this.outputTags) {
final Set<String> info = pred.tag2infoLines.get(tag);
if (info == null || info.isEmpty())
continue;
vcb.attribute(tag, new ArrayList<>(info));
}
out.add(vcb.make());
}
// end of loop over variants
}
// end of if buffer is not empty
if (ctx == null)
break;
buffer.clear();
}
}
progress.finish();
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
}
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class VcfEnsemblReg method annotate.
private void annotate(Track track, File inf, File outf) throws IOException {
boolean contained = false;
LOG.info("Processing " + track.id + " (" + track.shortLabel + ") " + track.url);
VcfIterator in = VCFUtils.createVcfIteratorFromFile(inf);
VCFHeader header = in.getHeader();
VCFInfoHeaderLine info = null;
SeekableStream sstream = SeekableStreamFactory.getInstance().getStreamFor(track.url);
BBFileReader bigFile = new BBFileReader(track.url.toString(), new SeekableStreamAdaptor(sstream));
VariantContextWriter w1 = VCFUtils.createVariantContextWriter(outf);
if (bigFile.isBigWigFile()) {
info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.Float, String.valueOf(track.longLabel) + " " + track.url);
} else {
info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.String, String.valueOf(track.longLabel) + " " + track.url);
}
header.addMetaDataLine(info);
w1.writeHeader(in.getHeader());
while (in.hasNext()) {
VariantContext ctx = in.next();
String chrom = ctx.getContig();
if (!chrom.startsWith("chr"))
chrom = "chr" + chrom;
if (!chrom.matches("(chrX|chrY|chr[0-9]|chr1[0-9]|chr2[12])")) {
w1.add(ctx);
} else if (bigFile.isBigWigFile()) {
BigWigIterator iter = bigFile.getBigWigIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
Float wigValue = null;
while (iter != null && iter.hasNext() && wigValue == null) {
WigItem item = iter.next();
wigValue = item.getWigValue();
}
if (wigValue == null) {
w1.add(ctx);
continue;
}
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(track.id, wigValue);
w1.add(vcb.make());
} else {
BigBedIterator iter = bigFile.getBigBedIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
Set<String> bedValues = new HashSet<String>();
while (iter != null && iter.hasNext()) {
BedFeature item = iter.next();
String[] rest = item.getRestOfFields();
if (rest == null || rest.length != 6) {
System.err.println(track.id + " " + Arrays.toString(item.getRestOfFields()));
continue;
}
String color = null;
if (track.parent != null) {
if (track.parent.startsWith("Segway_17SegmentationSummaries")) {
color = segway_17SegmentationSummaries(rest[5]);
} else if (track.parent.startsWith("ProjectedSegments")) {
color = projectedSegments(rest[5]);
} else if (track.parent.startsWith("RegBuildOverview")) {
color = regBuildOverview(rest[5]);
} else if (track.parent.startsWith("Segway_17CellSegments")) {
color = segway_17CellSegments(rest[5]);
} else {
System.err.println("Unknown parent:" + track.parent);
}
}
if (color == null)
continue;
bedValues.add(rest[0] + "|" + color);
}
if (bedValues.isEmpty()) {
w1.add(ctx);
continue;
}
StringBuilder sb = new StringBuilder();
for (String s : bedValues) {
if (sb.length() != 0)
sb.append(",");
sb.append(s);
}
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(track.id, sb.toString());
w1.add(vcb.make());
}
}
sstream.close();
in.close();
w1.close();
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class VcfEpistatis01 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.number_of_jobs < 1) {
LOG.error("bad number of jobs");
return -1;
}
try {
final int variantsCount;
final List<VariantContext> inMemoryVariants;
final File vcfFile = new File(oneAndOnlyOneFile(args));
final File tmpIndexFile;
if (vcfFile.equals(this.outputFile)) {
LOG.error("input == output");
return -1;
}
VCFFileReader vcfFileReader = new VCFFileReader(vcfFile, false);
final VCFHeader header = vcfFileReader.getFileHeader();
final Pedigree pedigree;
if (this.pedigreeFile != null) {
pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
} else {
pedigree = new Pedigree.Parser().parse(header);
}
pedigree.verifyPersonsHaveUniqueNames();
final Map<String, Integer> sample2index = header.getSampleNameToOffset();
final int[] caseIndexes = pedigree.getAffected().stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
final int[] ctrlIndexes = new ArrayList<>(pedigree.getUnaffected()).stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
if (caseIndexes.length == 0 || ctrlIndexes.length == 0) {
LOG.error("empty ped or no case/ctrl");
vcfFileReader.close();
return -1;
}
if (this.load_variants_in_memory) {
LOG.info("loading variants in memory");
tmpIndexFile = null;
final CloseableIterator<VariantContext> iter2 = vcfFileReader.iterator();
inMemoryVariants = Collections.unmodifiableList(iter2.stream().filter(this.variantFilter).filter(// should fix https://github.com/samtools/htsjdk/issues/1026 ?
V -> V.getGenotypes().stream().filter(G -> G.isCalled()).count() > 0).collect(Collectors.toList()));
variantsCount = inMemoryVariants.size();
iter2.close();
} else {
tmpIndexFile = File.createTempFile("epistatsis", VcfOffsetsIndexFactory.INDEX_EXTENSION);
tmpIndexFile.deleteOnExit();
new VcfOffsetsIndexFactory().setLogger(LOG).setPredicate(variantFilter).indexVcfFile(vcfFile, tmpIndexFile);
final VcfList tmpList = VcfList.fromFile(vcfFile, tmpIndexFile);
variantsCount = tmpList.size();
tmpList.close();
inMemoryVariants = null;
}
vcfFileReader.close();
LOG.info("Number of variants: " + variantsCount);
Result bestResult = null;
int x = this.start_index_at;
while (x + 1 < variantsCount) {
final List<Runner> runners = new Vector<>(this.number_of_jobs);
while (x + 1 < variantsCount && runners.size() < this.number_of_jobs) {
LOG.info("starting " + x + "/" + variantsCount);
runners.add(new Runner(inMemoryVariants == null ? VcfList.fromFile(vcfFile, tmpIndexFile) : new Vector<>(inMemoryVariants), x, caseIndexes, ctrlIndexes));
++x;
}
final ExecutorService execSvc;
if (this.number_of_jobs == 1) {
execSvc = null;
} else {
execSvc = Executors.newFixedThreadPool(this.number_of_jobs);
;
}
if (this.number_of_jobs == 1) {
runners.get(0).call();
} else {
execSvc.invokeAll(runners);
}
if (execSvc != null) {
execSvc.shutdown();
execSvc.awaitTermination(10000L, TimeUnit.DAYS);
execSvc.shutdownNow();
}
runners.stream().mapToLong(R -> R.duration).min().ifPresent(D -> {
LOG.info("That took " + (D / 1000f) + " seconds.");
});
for (final Runner r : runners) {
final Result rez = r.result;
if (rez == null)
continue;
if (bestResult == null || bestResult.score < rez.score) {
bestResult = rez;
if (this.output_score) {
final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
pw.println(bestResult.score + "\t" + bestResult.toString());
pw.flush();
pw.close();
} else {
final VariantContextWriter w = openVariantContextWriter(this.outputFile);
final VCFHeader header2 = new VCFHeader(header);
header2.addMetaDataLine(new VCFHeaderLine(VcfEpistatis01.class.getName(), bestResult.toString()));
w.writeHeader(header2);
w.add(bestResult.ctx1);
w.add(bestResult.ctx2);
w.close();
}
}
}
LOG.info("best: " + bestResult);
}
if (tmpIndexFile != null)
tmpIndexFile.delete();
return 0;
} catch (final Exception err) {
err.printStackTrace();
LOG.error(err);
return -1;
} finally {
}
}
Aggregations