use of htsjdk.tribble.readers.LineIteratorImpl in project jvarkit by lindenb.
the class VCFCompare method doWork.
@Override
public int doWork(final List<String> args) {
if (args.isEmpty()) {
LOG.error("VCFs missing.");
return -1;
}
if (args.size() != 2) {
System.err.println("Illegal number or arguments. Expected two VCFs");
return -1;
}
PrintWriter pw = null;
XMLStreamWriter w = null;
InputStream in = null;
SortingCollection<LineAndFile> variants = null;
try {
LineAndFileComparator varcmp = new LineAndFileComparator();
variants = SortingCollection.newInstance(LineAndFile.class, new LineAndFileCodec(), varcmp, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
variants.setDestructiveIteration(true);
for (int i = 0; i < 2; ++i) {
this.inputs[i] = new Input();
this.inputs[i].codec = VCFUtils.createDefaultVCFCodec();
this.inputs[i].filename = args.get(i);
LOG.info("Opening " + this.inputs[i].filename);
in = IOUtils.openURIForReading(this.inputs[i].filename);
final LineReader lr = new SynchronousLineReader(in);
final LineIterator li = new LineIteratorImpl(lr);
this.inputs[i].header = (VCFHeader) this.inputs[i].codec.readActualHeader(li);
this.inputs[i].vepPredictionParser = new VepPredictionParserFactory(this.inputs[i].header).get();
this.inputs[i].snpEffPredictionParser = new SnpEffPredictionParserFactory(this.inputs[i].header).get();
this.inputs[i].annPredictionParser = new AnnPredictionParserFactory(this.inputs[i].header).get();
while (li.hasNext()) {
LineAndFile laf = new LineAndFile();
laf.fileIdx = i;
laf.line = li.next();
variants.add(laf);
}
LOG.info("Done Reading " + this.inputs[i].filename);
CloserUtil.close(li);
CloserUtil.close(lr);
CloserUtil.close(in);
}
variants.doneAdding();
LOG.info("Done Adding");
Set<String> commonSamples = new TreeSet<String>(this.inputs[0].header.getSampleNamesInOrder());
commonSamples.retainAll(this.inputs[1].header.getSampleNamesInOrder());
List<Venn0> venn1List = new ArrayList<VCFCompare.Venn0>();
venn1List.add(new Venn1("ALL"));
venn1List.add(new Venn1("having ID") {
@Override
public VariantContext filter(VariantContext ctx, int fileIndex) {
return ctx == null || !ctx.hasID() ? null : ctx;
}
});
venn1List.add(new Venn1("QUAL greater 30") {
@Override
public VariantContext filter(VariantContext ctx, int fileIndex) {
return ctx == null || !ctx.hasLog10PError() || ctx.getPhredScaledQual() < 30.0 ? null : ctx;
}
});
for (VariantContext.Type t : VariantContext.Type.values()) {
venn1List.add(new VennType(t));
}
for (SequenceOntologyTree.Term term : SequenceOntologyTree.getInstance().getTerms()) {
venn1List.add(new VennPred("vep", term) {
@Override
Set<Term> terms(VariantContext ctx, int file_id) {
Set<Term> tt = new HashSet<SequenceOntologyTree.Term>();
for (VepPredictionParser.VepPrediction pred : VCFCompare.this.inputs[file_id].vepPredictionParser.getPredictions(ctx)) {
tt.addAll(pred.getSOTerms());
}
return tt;
}
});
venn1List.add(new VennPred("SnpEff", term) {
@Override
Set<Term> terms(VariantContext ctx, int file_id) {
Set<Term> tt = new HashSet<SequenceOntologyTree.Term>();
for (SnpEffPredictionParser.SnpEffPrediction pred : VCFCompare.this.inputs[file_id].snpEffPredictionParser.getPredictions(ctx)) {
tt.addAll(pred.getSOTerms());
}
return tt;
}
});
venn1List.add(new VennPred("ANN", term) {
@Override
Set<Term> terms(VariantContext ctx, int file_id) {
Set<Term> tt = new HashSet<SequenceOntologyTree.Term>();
for (AnnPredictionParser.AnnPrediction pred : VCFCompare.this.inputs[file_id].annPredictionParser.getPredictions(ctx)) {
tt.addAll(pred.getSOTerms());
}
return tt;
}
});
}
for (String s : commonSamples) {
venn1List.add(new VennGType(s));
}
/* START : digest results ====================== */
Counter<String> diff = new Counter<String>();
List<LineAndFile> row = new ArrayList<LineAndFile>();
CloseableIterator<LineAndFile> iter = variants.iterator();
for (; ; ) {
LineAndFile rec = null;
if (iter.hasNext()) {
rec = iter.next();
}
if (rec == null || (!row.isEmpty() && varcmp.compare(row.get(0), rec) != 0)) {
if (!row.isEmpty()) {
diff.incr("count.variations");
VariantContext[] contexes_init = new VariantContext[] { null, null };
for (LineAndFile var : row) {
if (contexes_init[var.fileIdx] != null) {
LOG.error("Duplicate context in " + inputs[var.fileIdx].filename + " : " + var.line);
continue;
}
contexes_init[var.fileIdx] = var.getContext();
}
for (Venn0 venn : venn1List) {
venn.visit(contexes_init);
}
row.clear();
}
if (rec == null)
break;
}
row.add(rec);
}
iter.close();
/* END : digest results ====================== */
pw = super.openFileOrStdoutAsPrintWriter(outputFile);
XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
w = xmlfactory.createXMLStreamWriter(pw);
w.writeStartElement("html");
w.writeStartElement("body");
/* specific samples */
w.writeStartElement("div");
w.writeStartElement("dl");
for (int i = 0; i < 3; ++i) {
String title;
Set<String> samples;
switch(i) {
case 0:
case 1:
title = "Sample(s) for " + this.inputs[i].filename + ".";
samples = new TreeSet<String>(this.inputs[i].header.getSampleNamesInOrder());
samples.removeAll(commonSamples);
break;
default:
title = "Common Sample(s).";
samples = new TreeSet<String>(commonSamples);
break;
}
w.writeStartElement("dt");
w.writeCharacters(title);
w.writeEndElement();
w.writeStartElement("dd");
w.writeStartElement("ol");
for (String s : samples) {
w.writeStartElement("li");
w.writeCharacters(s);
w.writeEndElement();
}
w.writeEndElement();
w.writeEndElement();
}
// dl
w.writeEndElement();
// div
w.writeEndElement();
for (Venn0 v : venn1List) {
v.write(w);
}
// body
w.writeEndElement();
// html
w.writeEndElement();
w.writeEndDocument();
w.close();
w = null;
pw.flush();
pw.close();
pw = null;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(w);
CloserUtil.close(pw);
if (variants != null)
variants.cleanup();
}
return 0;
}
use of htsjdk.tribble.readers.LineIteratorImpl in project jvarkit by lindenb.
the class FixVCF method doWork.
private int doWork(String filenameIn, InputStream vcfStream, VariantContextWriter w) throws IOException {
final AbstractVCFCodec vcfCodec = VCFUtils.createDefaultVCFCodec();
LineIterator r = new LineIteratorImpl(new SynchronousLineReader(vcfStream));
final VCFHeader header = (VCFHeader) vcfCodec.readActualHeader(r);
// samples names have been changed by picard api and reordered !!!
// re-create the original order
List<String> sampleNamesInSameOrder = new ArrayList<String>(header.getSampleNamesInOrder().size());
for (int col = 0; col < header.getSampleNamesInOrder().size(); ++col) {
for (String sample : header.getSampleNameToOffset().keySet()) {
if (header.getSampleNameToOffset().get(sample) == col) {
sampleNamesInSameOrder.add(sample);
break;
}
}
}
if (sampleNamesInSameOrder.size() != header.getSampleNamesInOrder().size()) {
throw new IllegalStateException();
}
VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), sampleNamesInSameOrder);
File tmp = IOUtil.newTempFile("tmp", ".vcf.gz", new File[] { tmpDir });
tmp.deleteOnExit();
PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(tmp)));
while (r.hasNext()) {
String line = r.next();
pw.println(line);
VariantContext ctx = null;
try {
ctx = vcfCodec.decode(line);
} catch (Exception err) {
pw.close();
LOG.error(line);
LOG.error(err);
return -1;
}
for (String f : ctx.getFilters()) {
if (h2.getFilterHeaderLine(f) != null)
continue;
// if(f.equals(VCFConstants.PASSES_FILTERS_v4)) continue; hum...
if (f.isEmpty() || f.equals(VCFConstants.UNFILTERED))
continue;
LOG.info("Fixing missing Filter:" + f);
h2.addMetaDataLine(new VCFFilterHeaderLine(f));
}
for (String tag : ctx.getAttributes().keySet()) {
if (h2.getInfoHeaderLine(tag) != null)
continue;
LOG.info("Fixing missing INFO:" + tag);
h2.addMetaDataLine(new VCFInfoHeaderLine(tag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "undefined. Saved by " + getClass()));
}
}
pw.flush();
pw.close();
pw = null;
LOG.info("re-reading VCF frm tmpFile:" + tmp);
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName(), "Saved VCF FILTER AND INFO from " + filenameIn));
// save header in memory
ByteArrayOutputStream baos = new ByteArrayOutputStream();
VariantContextWriter w2 = VCFUtils.createVariantContextWriterToOutputStream(baos);
w2.writeHeader(h2);
w2.close();
baos.close();
// reopen tmp file
@SuppressWarnings("resource") VcfIterator in = new VcfIteratorImpl(new SequenceInputStream(new ByteArrayInputStream(baos.toByteArray()), new GZIPInputStream(new FileInputStream(tmp))));
w.writeHeader(h2);
while (in.hasNext()) {
w.add(in.next());
}
in.close();
tmp.delete();
return 0;
}
use of htsjdk.tribble.readers.LineIteratorImpl in project jvarkit by lindenb.
the class VCFMerge method workUsingSortingCollection.
private int workUsingSortingCollection() {
VariantContextWriter w = null;
SortingCollection<VariantOfFile> array = null;
InputStream in = null;
CloseableIterator<VariantOfFile> iter = null;
try {
final List<String> IN = new ArrayList<String>(this.userVcfFiles);
final Set<String> genotypeSampleNames = new TreeSet<String>();
final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
array = SortingCollection.newInstance(VariantOfFile.class, new VariantCodec(), new VariantComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
array.setDestructiveIteration(true);
for (int fileIndex = 0; fileIndex < IN.size(); ++fileIndex) {
final String vcfFile = IN.get(fileIndex);
LOG.info("reading from " + vcfFile + " " + (fileIndex + 1) + "/" + IN.size());
final VCFHandler handler = new VCFHandler(vcfFile);
vcfHandlers.add(handler);
in = IOUtils.openURIForReading(vcfFile);
final LineReader lr = new SynchronousLineReader(in);
final LineIterator lit = new LineIteratorImpl(lr);
handler.header = (VCFHeader) handler.vcfCodec.readActualHeader(lit);
final SAMSequenceDictionary dict1 = handler.header.getSequenceDictionary();
if (dict1 == null)
throw new RuntimeException("dictionary missing in " + vcfFile);
if (dict1.isEmpty())
throw new RuntimeException("dictionary is Empty in " + vcfFile);
genotypeSampleNames.addAll(handler.header.getSampleNamesInOrder());
metaData.addAll(handler.header.getMetaDataInInputOrder());
if (fileIndex == 0) {
this.global_dictionary = dict1;
} else if (!SequenceUtil.areSequenceDictionariesEqual(global_dictionary, dict1)) {
throw new JvarkitException.DictionariesAreNotTheSame(global_dictionary, dict1);
}
final Predicate<VariantOfFile> accept;
if (!StringUtil.isBlank(VCFMerge.this.regionStr)) {
final IntervalParser intervalParser = new IntervalParser(dict1);
intervalParser.setContigNameIsWholeContig(true);
final Interval rgn = intervalParser.parse(VCFMerge.this.regionStr);
accept = (VOL) -> {
final VariantContext ctx = VOL.parse();
return rgn.intersects(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd()));
};
} else {
accept = (VOL) -> true;
}
while (lit.hasNext()) {
final VariantOfFile vof = new VariantOfFile();
vof.fileIndex = fileIndex;
vof.line = lit.next();
if (!accept.test(vof))
continue;
array.add(vof);
}
in.close();
in = null;
}
array.doneAdding();
LOG.info("merging..." + vcfHandlers.size() + " vcfs");
/* CREATE THE NEW VCH Header */
VCFHeader mergeHeader = null;
if (this.doNotMergeRowLines) {
metaData.add(NO_MERGE_INFO_HEADER);
}
mergeHeader = new VCFHeader(metaData, genotypeSampleNames);
// create the context writer
w = super.openVariantContextWriter(outputFile);
w.writeHeader(mergeHeader);
iter = array.iterator();
final List<VariantOfFile> row = new ArrayList<VariantOfFile>();
for (; ; ) {
VariantOfFile var = null;
if (iter.hasNext()) {
var = iter.next();
} else {
LOG.info("end of iteration");
if (!row.isEmpty()) {
for (final VariantContext merged : buildContextFromVariantOfFiles(mergeHeader, row)) {
w.add(merged);
}
}
break;
}
if (!row.isEmpty() && !row.get(0).same(var)) {
for (final VariantContext merged : buildContextFromVariantOfFiles(mergeHeader, row)) {
w.add(merged);
}
row.clear();
}
row.add(var);
}
CloserUtil.close(w);
w = null;
array.cleanup();
array = null;
CloserUtil.close(iter);
iter = null;
LOG.info("done");
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
this.userVcfFiles.clear();
CloserUtil.close(w);
CloserUtil.close(in);
CloserUtil.close(iter);
if (array != null)
array.cleanup();
}
}
use of htsjdk.tribble.readers.LineIteratorImpl in project jvarkit by lindenb.
the class WorldMapGenome method scan.
private void scan(Graphics2D g, InputStream input) throws IOException {
Set<String> unknownC = new HashSet<String>();
Pattern tab = Pattern.compile("[\t]");
LineIterator in = new LineIteratorImpl(new SynchronousLineReader(input));
while (in.hasNext()) {
String line = in.next();
String[] tokens = tab.split(line, 5);
if (tokens.length < 4) {
LOG.warning("Ignoring " + line);
continue;
}
SAMSequenceRecord rec = this.context.getDictionary().getSequence(tokens[0]);
if (rec == null) {
LOG.warning("unknown chromosome " + tokens[0]);
continue;
}
String country = tokens[3].toLowerCase().replaceAll("[ ]", "");
Shape shape = this.country2shape.get(country);
if (shape == null) {
if (!unknownC.contains(country)) {
unknownC.add(country);
LOG.warning("unknown country " + country);
}
continue;
}
seen.incr(country);
int midpos = (Integer.parseInt(tokens[1]) + Integer.parseInt(tokens[2])) / 2;
// country center
Point2D.Double pt1 = new Point2D.Double(shape.getBounds2D().getCenterX(), shape.getBounds2D().getCenterY());
// circle point
Point2D pt3 = this.context.convertPositionToPoint(tokens[0], midpos, getRadiusInt());
double angle = this.context.convertPositionToRadian(rec, midpos);
double angle2 = angle -= Math.PI / 10.0;
double distance13 = context.getCenter().distance(new Point2D.Double((pt1.getX() + pt3.getX()) / 2.0, (pt1.getY() + pt3.getY()) / 2.0));
// mid point
Point2D pt2 = new Point2D.Double(context.getCenterX() + distance13 * Math.cos(angle2), context.getCenterX() + distance13 * Math.sin(angle2));
Composite old = g.getComposite();
Stroke olds = g.getStroke();
g.setStroke(new BasicStroke(0.8f));
g.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 0.02f));
g.setColor(Color.DARK_GRAY);
GeneralPath p = new GeneralPath();
p.moveTo(pt1.getX(), pt1.getY());
p.quadTo(pt2.getX(), pt2.getY(), pt3.getX(), pt3.getY());
p.closePath();
g.draw(p);
g.setComposite(old);
g.setStroke(olds);
}
CloserUtil.close(in);
}
Aggregations