use of htsjdk.variant.vcf.AbstractVCFCodec in project jvarkit by lindenb.
the class VcfRegistryCGI method doWork.
private void doWork(XMLStreamWriter w, final GroupFile gf) throws XMLStreamException {
Position pos = parsePosition();
if (pos == null)
return;
w.writeStartElement("div");
w.writeStartElement("h2");
w.writeCharacters(pos.chrom + ":" + pos.pos);
w.writeEndElement();
w.writeStartElement("table");
w.writeStartElement("thead");
w.writeStartElement("tr");
for (String header : new String[] { "CHROM", "POS", "ID", "REF", "QUAL", "Sample", "Alleles", "DP", "GQ", "File" }) {
w.writeStartElement("th");
w.writeCharacters(header);
// td
w.writeEndElement();
}
// tr
w.writeEndElement();
// thead
w.writeEndElement();
w.writeStartElement("tbody");
Set<String> samplesWithGenotypes = new HashSet<String>();
Set<String> allSamples = new HashSet<String>();
for (VcfFile f : getVcfFiles(gf)) {
TabixReader tabixReader = null;
TabixReader.Iterator iter = null;
BlockCompressedInputStream bgzin = null;
VCFHeader header = null;
AbstractVCFCodec vcfCodec = VCFUtils.createDefaultVCFCodec();
LineIterator lineIterator = null;
for (int i = 0; i < 2; i++) {
try {
if (i == 0) {
bgzin = new BlockCompressedInputStream(f.file);
lineIterator = new LineIteratorImpl(new SynchronousLineReader(bgzin));
header = (VCFHeader) vcfCodec.readActualHeader(lineIterator);
allSamples.addAll(header.getGenotypeSamples());
} else {
tabixReader = new TabixReader(f.file.getPath());
String line;
int[] x = tabixReader.parseReg(pos.chrom + ":" + pos.pos + "-" + (pos.pos));
if (x != null && x.length > 2 && x[0] != -1) {
iter = tabixReader.query(x[0], x[1], x[2]);
} else {
}
while (iter != null && (line = iter.next()) != null) {
VariantContext var = vcfCodec.decode(line);
for (String sample : header.getSampleNamesInOrder()) {
final Genotype genotype = var.getGenotype(sample);
if (genotype == null || !genotype.isCalled())
continue;
if (!genotype.isAvailable())
continue;
samplesWithGenotypes.add(sample);
w.writeStartElement("tr");
w.writeStartElement("td");
w.writeCharacters(var.getContig());
w.writeEndElement();
w.writeStartElement("td");
w.writeCharacters(String.valueOf(var.getStart()));
w.writeEndElement();
if (var.hasID()) {
w.writeStartElement("td");
if (var.getID().matches("rs[0-9]+")) {
w.writeStartElement("a");
w.writeAttribute("href", "http://www.ncbi.nlm.nih.gov/snp/" + var.getID().substring(2));
w.writeCharacters(var.getID());
// a
w.writeEndElement();
} else {
w.writeCharacters(var.getID());
}
// td
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
if (var.getReference() != null) {
w.writeStartElement("td");
w.writeCharacters(var.getReference().getBaseString());
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
if (var.hasLog10PError()) {
w.writeStartElement("td");
w.writeCharacters(String.valueOf((int) var.getPhredScaledQual()));
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
w.writeStartElement("td");
w.writeCharacters(sample);
w.writeEndElement();
List<Allele> alleles = genotype.getAlleles();
w.writeStartElement("td");
w.writeStartElement("span");
if (genotype.isHomRef()) {
w.writeAttribute("style", "color:green;");
} else if (genotype.isHomVar()) {
w.writeAttribute("style", "color:red;");
} else if (genotype.isHet()) {
w.writeAttribute("style", "color:blue;");
}
for (int j = 0; j < alleles.size(); ++j) {
if (j > 0)
w.writeCharacters(genotype.isPhased() ? "|" : "/");
w.writeCharacters(alleles.get(j).getBaseString());
}
// span
w.writeEndElement();
w.writeEndElement();
if (genotype.hasDP()) {
w.writeStartElement("td");
w.writeCharacters(String.valueOf(genotype.getDP()));
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
if (genotype.hasGQ()) {
w.writeStartElement("td");
w.writeCharacters(String.valueOf(genotype.getGQ()));
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
w.writeStartElement("td");
w.writeCharacters(f.file.getName());
w.writeEndElement();
// tr
w.writeEndElement();
w.flush();
}
}
}
} catch (Exception err) {
w.writeComment("BOUM " + err);
header = null;
lastException = err;
} finally {
CloserUtil.close(lineIterator);
CloserUtil.close(bgzin);
CloserUtil.close(tabixReader);
CloserUtil.close(iter);
}
if (i == 0 && header == null)
break;
}
w.flush();
}
// tbody
w.writeEndElement();
// table
w.writeEndElement();
allSamples.removeAll(samplesWithGenotypes);
if (!allSamples.isEmpty()) {
w.writeStartElement("h3");
w.writeCharacters("Samples not found");
w.writeEndElement();
w.writeStartElement("ol");
for (String sample : new TreeSet<String>(allSamples)) {
w.writeStartElement("li");
w.writeCharacters(sample);
w.writeEndElement();
}
w.writeEndElement();
}
writeHTMLException(w, this.lastException);
// div
w.writeEndElement();
}
use of htsjdk.variant.vcf.AbstractVCFCodec in project jvarkit by lindenb.
the class VCFUtils method findCodecFromLines.
/**
* find a codec from the lines header. if not found, return default codec
*/
public static AbstractVCFCodec findCodecFromLines(final List<String> list) {
for (final String line : list) {
String formatString = line;
if (formatString.startsWith("##")) {
formatString = formatString.substring(2);
}
int eq = formatString.indexOf('=');
if (eq == -1)
continue;
if (!VCFHeaderVersion.isFormatString(formatString.substring(0, eq)))
continue;
VCFHeaderVersion version = VCFHeaderVersion.getHeaderVersion(line);
if (version == null)
continue;
switch(version) {
case VCF3_2:
case VCF3_3:
return new VCF3Codec();
case VCF4_0:
case VCF4_1:
case VCF4_2:
return new VCFCodec();
}
}
return createDefaultVCFCodec();
}
use of htsjdk.variant.vcf.AbstractVCFCodec in project jvarkit by lindenb.
the class FixVCF method doWork.
private int doWork(String filenameIn, InputStream vcfStream, VariantContextWriter w) throws IOException {
final AbstractVCFCodec vcfCodec = VCFUtils.createDefaultVCFCodec();
LineIterator r = new LineIteratorImpl(new SynchronousLineReader(vcfStream));
final VCFHeader header = (VCFHeader) vcfCodec.readActualHeader(r);
// samples names have been changed by picard api and reordered !!!
// re-create the original order
List<String> sampleNamesInSameOrder = new ArrayList<String>(header.getSampleNamesInOrder().size());
for (int col = 0; col < header.getSampleNamesInOrder().size(); ++col) {
for (String sample : header.getSampleNameToOffset().keySet()) {
if (header.getSampleNameToOffset().get(sample) == col) {
sampleNamesInSameOrder.add(sample);
break;
}
}
}
if (sampleNamesInSameOrder.size() != header.getSampleNamesInOrder().size()) {
throw new IllegalStateException();
}
VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), sampleNamesInSameOrder);
File tmp = IOUtil.newTempFile("tmp", ".vcf.gz", new File[] { tmpDir });
tmp.deleteOnExit();
PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(tmp)));
while (r.hasNext()) {
String line = r.next();
pw.println(line);
VariantContext ctx = null;
try {
ctx = vcfCodec.decode(line);
} catch (Exception err) {
pw.close();
LOG.error(line);
LOG.error(err);
return -1;
}
for (String f : ctx.getFilters()) {
if (h2.getFilterHeaderLine(f) != null)
continue;
// if(f.equals(VCFConstants.PASSES_FILTERS_v4)) continue; hum...
if (f.isEmpty() || f.equals(VCFConstants.UNFILTERED))
continue;
LOG.info("Fixing missing Filter:" + f);
h2.addMetaDataLine(new VCFFilterHeaderLine(f));
}
for (String tag : ctx.getAttributes().keySet()) {
if (h2.getInfoHeaderLine(tag) != null)
continue;
LOG.info("Fixing missing INFO:" + tag);
h2.addMetaDataLine(new VCFInfoHeaderLine(tag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "undefined. Saved by " + getClass()));
}
}
pw.flush();
pw.close();
pw = null;
LOG.info("re-reading VCF frm tmpFile:" + tmp);
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName(), "Saved VCF FILTER AND INFO from " + filenameIn));
// save header in memory
ByteArrayOutputStream baos = new ByteArrayOutputStream();
VariantContextWriter w2 = VCFUtils.createVariantContextWriterToOutputStream(baos);
w2.writeHeader(h2);
w2.close();
baos.close();
// reopen tmp file
@SuppressWarnings("resource") VcfIterator in = new VcfIteratorImpl(new SequenceInputStream(new ByteArrayInputStream(baos.toByteArray()), new GZIPInputStream(new FileInputStream(tmp))));
w.writeHeader(h2);
while (in.hasNext()) {
w.add(in.next());
}
in.close();
tmp.delete();
return 0;
}
Aggregations