use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class VcfConcat method fromFiles.
private int fromFiles(final VariantContextWriter out) throws IOException {
List<VcfIterator> inputs = new ArrayList<VcfIterator>(this.inputFiles.size());
List<String> inputFiles = new ArrayList<>(this.inputFiles.size());
List<String> samples = new ArrayList<>();
SAMSequenceDictionary dict = null;
try {
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
/* open each vcf file */
for (String vcfFile : this.inputFiles) {
LOG.info("Opening " + vcfFile);
VcfIterator r = VCFUtils.createVcfIterator(vcfFile);
/* check VCF dict */
VCFHeader header = r.getHeader();
if (dict == null && inputs.isEmpty()) {
dict = header.getSequenceDictionary();
} else if (!inputs.isEmpty() && ((dict == null && header.getSequenceDictionary() != null) || (dict != null && header.getSequenceDictionary() == null))) {
LOG.error("not.the.same.sequence.dictionaries");
return -1;
} else if (!inputs.isEmpty() && dict != null && !SequenceUtil.areSequenceDictionariesEqual(dict, header.getSequenceDictionary())) {
LOG.error("not.the.same.sequence.dictionaries");
return -1;
}
/* check samples */
if (inputs.isEmpty()) {
samples = header.getSampleNamesInOrder();
} else if (!header.getSampleNamesInOrder().equals(samples)) {
LOG.error("No same samples");
return -1;
}
metaData.addAll(header.getMetaDataInInputOrder());
inputs.add(r);
inputFiles.add(VCFUtils.escapeInfoField(vcfFile));
}
/* create comparator according to dict*/
final Comparator<VariantContext> comparator = (dict == null ? VCFUtils.createChromPosRefComparator() : VCFUtils.createTidPosRefComparator(dict));
metaData.add(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
metaData.add(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
metaData.add(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
metaData.add(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
metaData.add(new VCFInfoHeaderLine(VARIANTSOURCE, 1, VCFHeaderLineType.String, "Origin File of Varant"));
VCFHeader h2 = new VCFHeader(metaData, samples);
out.writeHeader(h2);
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
for (; ; ) {
/* get 'smallest' variant */
VariantContext smallest = null;
int idx = 0;
int best_idx = -1;
while (idx < inputs.size()) {
VcfIterator in = inputs.get(idx);
if (!in.hasNext()) {
CloserUtil.close(in);
inputs.remove(idx);
inputFiles.remove(idx);
} else {
VariantContext ctx = in.peek();
if (smallest == null || comparator.compare(smallest, ctx) > 0) {
smallest = ctx;
best_idx = idx;
}
++idx;
}
}
if (smallest == null)
break;
final VariantContext ctx = progress.watch(inputs.get(best_idx).next());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(VARIANTSOURCE, inputFiles.get(best_idx));
out.add(vcb.make());
}
progress.finish();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(inputs);
}
}
use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class VcfToZip method doWork.
@Override
public int doWork(List<String> args) {
if (this.outputFile != null && this.outputFile.getName().endsWith(".zip")) {
LOG.error("Filename must end with '.zip' " + outputFile);
return -1;
}
LineIterator lr = null;
VcfIterator in = null;
ZipOutputStream zout = null;
FileOutputStream fout = null;
int num_vcfs = 0;
VariantContextWriter vcw = null;
args = new ArrayList<>(IOUtils.unrollFiles(args));
try {
int optind = 0;
if (this.outputFile != null) {
fout = new FileOutputStream(this.outputFile);
zout = new ZipOutputStream(fout);
} else {
zout = new ZipOutputStream(stdout());
}
do {
if (args.isEmpty()) {
LOG.info("reading concatenated vcf from stdin");
lr = IOUtils.openStreamForLineIterator(stdin());
} else {
LOG.info("reading concatenated vcf from " + args.get(optind));
lr = IOUtils.openURIForLineIterator(args.get(optind));
}
while (lr.hasNext()) {
++num_vcfs;
in = VCFUtils.createVcfIteratorFromLineIterator(lr, true);
final VCFHeader header = in.getHeader();
String filename = null;
if (this.titleHeaderStr != null && !this.titleHeaderStr.isEmpty()) {
final VCFHeaderLine h = header.getOtherHeaderLine(this.titleHeaderStr);
if (h != null && !h.getValue().trim().isEmpty())
filename = h.getValue().trim();
}
if (filename == null || filename.trim().isEmpty()) {
// create title
filename = String.format("vcf2zip.%05d.vcf", num_vcfs);
// set name in header
if (this.titleHeaderStr != null && !this.titleHeaderStr.isEmpty()) {
header.addMetaDataLine(new VCFHeaderLine(this.titleHeaderStr.trim(), filename));
}
}
if (!filename.endsWith(".vcf")) {
filename += ".vcf";
}
if (!this.zipPrefix.isEmpty()) {
filename = this.zipPrefix + (this.zipPrefix.endsWith("/") ? "" : "/") + filename;
}
LOG.info(filename);
final ZipEntry entry = new ZipEntry(filename);
entry.setComment("Created with " + getProgramName());
zout.putNextEntry(entry);
vcw = VCFUtils.createVariantContextWriterToOutputStream(IOUtils.uncloseableOutputStream(zout));
vcw.writeHeader(header);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
while (in.hasNext()) {
vcw.add(progress.watch(in.next()));
}
vcw.close();
progress.finish();
zout.closeEntry();
in.close();
}
++optind;
} while (optind < args.size());
zout.finish();
zout.flush();
zout.close();
zout = null;
CloserUtil.close(fout);
LOG.info("done. Number of VCFs:" + num_vcfs);
return RETURN_OK;
} catch (final Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(in);
CloserUtil.close(lr);
CloserUtil.close(zout);
CloserUtil.close(fout);
}
}
use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class LumpyVcfToCircos method doWork.
@Override
public int doWork(List<String> args) {
VcfIterator r = null;
ArchiveFactory archiveFactory = null;
PrintWriter pw = null;
PrintWriter conf = null;
try {
r = super.openVcfIterator(oneFileOrNull(args));
final VCFHeader header = r.getHeader();
final SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict == null) {
LOG.error("No dictionary in vcf");
return -1;
}
archiveFactory = ArchiveFactory.open(outputFile);
conf = archiveFactory.openWriter(createFilename("ideogram.conf"));
conf.println("<ideogram>");
conf.println("show = yes");
conf.println("<spacing>");
conf.println("default = 1u");
conf.println("<pairwise \"/hs/ /hs/\">");
conf.println("spacing = 0.5r");
conf.println("</pairwise>");
conf.println("");
conf.println("</spacing>");
conf.println("");
conf.println("thickness = 25p");
conf.println("");
conf.println("#stroke_thickness = 1");
conf.println("#stroke_color = black");
conf.println("");
conf.println("fill = yes");
conf.println("fill_color = black");
conf.println("");
conf.println("radius = 1r");
conf.println("show_label = yes");
conf.println("label_font = default");
conf.println("label_radius = dims(ideogram,radius_outer) + 10p");
conf.println("label_size = 12p");
conf.println("label_parallel = yes");
conf.println("");
conf.println("show_bands = yes");
conf.println("fill_bands = yes");
conf.println("band_stroke_thickness = 0");
conf.println("band_stroke_color = black");
conf.println("band_transparency = 4");
conf.println("");
conf.println("</ideogram>");
conf.close();
conf = archiveFactory.openWriter(createFilename("ticks.conf"));
conf.println("");
conf.println("show_ticks = no");
conf.println("show_tick_labels = no");
conf.println("show_grid = no");
conf.println("");
conf.println("<ticks>");
conf.println("tick_label_font = light");
conf.println("radius = dims(ideogram,radius_outer) + 180p");
conf.println("label_offset = 5p");
conf.println("label_size = 16p");
conf.println("multiplier = 1e-6");
conf.println("color = black");
conf.println("thickness = 1p");
conf.println("");
conf.println("# 25 Mb ticks, all chromosomes, with labels");
conf.println("");
conf.println("<tick>");
conf.println("spacing = 25u");
conf.println("size = 12p");
conf.println("show_label = yes");
conf.println("format = %d");
conf.println("</tick>");
conf.println("");
conf.println("# 5 Mb ticks, all chromosomes, with labels");
conf.println("# labels must be separated by at least 1px, which");
conf.println("# avoids overlap on human chrs");
conf.println("<tick>");
conf.println("label_separation = 1p");
conf.println("spacing = 5u");
conf.println("size = 7p");
conf.println("show_label = yes");
conf.println("format = %d");
conf.println("</tick>");
conf.println("");
conf.println("# 5 Mb ticks with grid on rn1 and mm1, drawn only");
conf.println("# for their grid (note size=0p)");
conf.println("<tick>");
conf.println("chromosomes_display_default = no");
conf.println("chromosomes = rn1;mm1");
conf.println("spacing = 5u");
conf.println("size = 0p");
conf.println("force_display = yes");
conf.println("grid_start = 0.45r");
conf.println("grid_end = dims(ideogram,radius_outer) + 180p");
conf.println("grid_color = grey");
conf.println("grid_thickness = 1p");
conf.println("grid = yes");
conf.println("</tick>");
conf.println("");
conf.println("# 20% relative ticks on human chromosomes");
conf.println("");
conf.println("<tick>");
conf.println("chromosomes = -rn1;-mm1");
conf.println("radius = 0.95r");
conf.println("spacing_type = relative");
conf.println("rspacing = 0.20");
conf.println("size = 6p");
conf.println("show_label = yes");
conf.println("label_relative = yes");
conf.println("rmultiplier = 100");
conf.println("format = %d");
conf.println("suffix = %");
conf.println("");
conf.println("skip_last_label= yes");
conf.println("");
conf.println("grid_start = 0.885r");
conf.println("grid_end = 0.95r");
conf.println("grid_color = grey");
conf.println("grid_thickness = 1p");
conf.println("grid = yes");
conf.println("</tick>");
conf.println("");
conf.println("# relative ticks at start and end of human chromosomes");
conf.println("# with grid");
conf.println("");
conf.println("<tick>");
conf.println("spacing_type = relative");
conf.println("rspacing = 1");
conf.println("size = 6p");
conf.println("grid_start = 0.45r");
conf.println("grid_end = dims(ideogram,radius_outer) + 180p");
conf.println("grid_color = grey");
conf.println("grid_thickness = 1p");
conf.println("grid = yes");
conf.println("</tick>");
conf.println("");
conf.println("<tick>");
conf.println("chromosomes = -rn1;-mm1");
conf.println("radius = 0.95r");
conf.println("spacing_type = relative");
conf.println("rspacing = 0.10");
conf.println("size = 3p");
conf.println("show_label = no");
conf.println("");
conf.println("grid_start = 0.885r");
conf.println("grid_end = 0.95r");
conf.println("grid_color = lgrey");
conf.println("grid_thickness = 1p");
conf.println("grid = yes");
conf.println("</tick>");
conf.println("");
conf.println("# 25% relative ticks on human chromosomes");
conf.println("");
conf.println("<tick>");
conf.println("chromosomes = -rn1;-mm1");
conf.println("radius = 0.82r");
conf.println("spacing_type = relative");
conf.println("rspacing = 0.25");
conf.println("size = 6p");
conf.println("show_label = yes");
conf.println("label_relative = yes");
conf.println("rmultiplier = 100");
conf.println("format = %d");
conf.println("");
conf.println("skip_last_label= yes");
conf.println("");
conf.println("grid_start = 0.755r");
conf.println("grid_end = 0.82r");
conf.println("grid_color = grey");
conf.println("grid_thickness = 1p");
conf.println("grid = yes");
conf.println("</tick>");
conf.println("");
conf.println("</ticks>");
conf.close();
conf = archiveFactory.openWriter(createFilename("circos.conf"));
conf.println("karyotype=" + createFilename("karyotype.txt"));
conf.println("chromosomes_units=1000000");
conf.println("<<include " + createFilename("ideogram.conf") + ">>");
conf.println("<<include " + createFilename("ticks.conf") + ">>");
final Map<String, SampleWriters> sample2writers = new HashMap<>(header.getNGenotypeSamples());
for (final String sampleName : header.getSampleNamesInOrder()) {
sample2writers.put(sampleName, new SampleWriters(sampleName));
}
while (r.hasNext()) {
final VariantContext vc = r.next();
for (final SampleWriters sw : sample2writers.values()) {
sw.visit(vc);
}
}
r.close();
r = null;
LOG.info("writing conf");
final Set<String> seen_chromosomes = new HashSet<>(dict.size());
for (final SampleWriters sw : sample2writers.values()) {
seen_chromosomes.addAll(sw.bdns.stream().map(SV -> SV.contig).collect(Collectors.toSet()));
seen_chromosomes.addAll(sw.bdns.stream().map(SV -> SV.bnb_contig).collect(Collectors.toSet()));
seen_chromosomes.addAll(sw.duplications.stream().map(SV -> SV.contig).collect(Collectors.toSet()));
seen_chromosomes.addAll(sw.invertions.stream().map(SV -> SV.contig).collect(Collectors.toSet()));
seen_chromosomes.addAll(sw.deletions.stream().map(SV -> SV.contig).collect(Collectors.toSet()));
}
pw = archiveFactory.openWriter(createFilename("karyotype.txt"));
for (int i = 0; i < dict.size(); ++i) {
final SAMSequenceRecord ssr = dict.getSequence(i);
if (!seen_chromosomes.contains(ssr.getSequenceName()))
continue;
pw.print("chr - ");
pw.print(ssr.getSequenceName());
pw.print(" ");
pw.print(ssr.getSequenceName());
pw.print(" 0 ");
pw.print(ssr.getSequenceLength());
pw.print(" ");
pw.println(i % 2 == 0 ? "blue" : "grey");
}
pw.flush();
pw.close();
Function<String, String> sample2color = S -> {
int i = header.getSampleNamesInOrder().indexOf(S);
if (i != -1) {
if (i % 2 == 0)
i = ((header.getSampleNamesInOrder().size() - 1) - i);
int g = 30 + (int) ((i / (double) header.getSampleNamesInOrder().size()) * 50.0);
return "gray" + (int) g;
}
return (i % 2 == 0 ? "green" : "red");
};
final double minR = 0.1;
final double maxR = 0.95;
final double numberOfTracks = sample2writers.values().stream().mapToInt(X -> X.getNumberOfTracks()).sum();
// final double maxSu = sample2writers.values().stream().mapToInt(X->X.getMaxSupportingEvidences()).max().orElse(1);
double radius = minR;
final double dr = (maxR - minR) / numberOfTracks;
LOG.info("N tracks = " + numberOfTracks + " dr=" + dr);
conf.println("<plots>");
int maxSu = sample2writers.values().stream().flatMap(S -> S.deletions.stream()).mapToInt(SV -> SV.su).max().orElse(0);
for (final SampleWriters sw : sample2writers.values()) {
if (maxSu == 0 || sw.deletions.isEmpty())
continue;
String file = sw.getDeletionFilename();
pw = archiveFactory.openWriter(file);
for (final SVDel sv : sw.deletions) sv.print(pw);
pw.flush();
pw.close();
conf.println("<plot>");
conf.println("type = histogram");
conf.println("file = " + file);
conf.println("min = 0");
conf.println("max = " + maxSu);
conf.println("orientation = out");
conf.println("layers = 1");
conf.println("margin = 0.02u");
conf.println("color = black_a4");
conf.println("r0 = " + radius + "r");
conf.println("r1 = " + (radius + dr) + "r");
conf.println("fill_color = " + sample2color.apply(sw.sampleName));
// +sample2color.apply(sw.sampleName));
conf.println("stroke_color = grey");
conf.println("thickness = 1");
conf.println("extend_bin = no");
// conf.println("<backgrounds>\n<background>\ncolor ="+(zz%2==0?"pink":"yellow")+"\n</background>\n</backgrounds>");
// conf.println("<axes>\n<axis>\nspacing = "+(dr/10.0)+"\ncolor=lgrey\nthickness=2\n</axis>\n</axes>");
conf.println("</plot>");
radius += dr;
}
maxSu = sample2writers.values().stream().flatMap(S -> S.invertions.stream()).mapToInt(SV -> SV.su).max().orElse(0);
for (final SampleWriters sw : sample2writers.values()) {
if (maxSu == 0 || sw.invertions.isEmpty())
continue;
String file = sw.getInvertionFilename();
pw = archiveFactory.openWriter(file);
for (final SVInv sv : sw.invertions) sv.print(pw);
pw.flush();
pw.close();
conf.println("<plot>");
conf.println("type = histogram");
conf.println("file = " + file);
conf.println("min = 0");
conf.println("max = " + maxSu);
conf.println("color = black_a4");
conf.println("r0 = " + radius + "r");
conf.println("r1 = " + (radius + dr) + "r");
conf.println("fill_color = " + sample2color.apply(sw.sampleName));
conf.println("stroke_color = " + sample2color.apply(sw.sampleName));
conf.println("thickness = 1");
conf.println("extend_bin = no");
conf.println("</plot>");
radius += dr;
}
maxSu = sample2writers.values().stream().flatMap(S -> S.duplications.stream()).mapToInt(SV -> SV.su).max().orElse(0);
for (final SampleWriters sw : sample2writers.values()) {
if (maxSu == 0 || sw.duplications.isEmpty())
continue;
String file = sw.getDuplicationFilename();
pw = archiveFactory.openWriter(file);
for (final SVDup sv : sw.duplications) sv.print(pw);
pw.flush();
pw.close();
conf.println("<plot>");
conf.println("type = histogram");
conf.println("file = " + file);
conf.println("min = 0");
conf.println("max = " + maxSu);
conf.println("color = black_a4");
conf.println("r0 = " + radius + "r");
conf.println("r1 = " + (radius + dr) + "r");
conf.println("fill_color = " + sample2color.apply(sw.sampleName));
conf.println("stroke_color = " + sample2color.apply(sw.sampleName));
conf.println("thickness = 1");
conf.println("extend_bin = no");
conf.println("</plot>");
radius += dr;
}
conf.println("</plots>");
conf.println("<links>");
for (final SampleWriters sw : sample2writers.values()) {
if (sw.bdns.isEmpty())
continue;
pw = archiveFactory.openWriter(sw.getBnbFilename());
for (final SVBnB sv : sw.bdns) sv.print(pw);
pw.flush();
pw.close();
conf.println("<link>");
conf.println("file=" + sw.getBnbFilename());
conf.println("radius=" + radius + "r");
conf.println("bezier_radius = " + (radius + dr) + "r");
conf.println("fill_color = " + sample2color.apply(sw.sampleName));
conf.println("stroke_color = " + sample2color.apply(sw.sampleName));
conf.println("</link>");
radius += dr;
}
conf.println("</links>");
sample2writers.clear();
conf.println("");
conf.println("<image>");
conf.println("# Included from Circos distribution.");
conf.println("<<include etc/image.conf>>");
conf.println("</image>");
conf.println("");
conf.println("<<include etc/colors_fonts_patterns.conf>>");
conf.println("");
conf.println("# Debugging, I/O an dother system parameters");
conf.println("# Included from Circos distribution.");
conf.println("<<include etc/housekeeping.conf>>");
conf.println("");
conf.flush();
conf.close();
conf = null;
archiveFactory.close();
archiveFactory = null;
LOG.info("done");
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(conf);
CloserUtil.close(archiveFactory);
CloserUtil.close(r);
}
}
use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class FixVCF method doWork.
private int doWork(String filenameIn, InputStream vcfStream, VariantContextWriter w) throws IOException {
final AbstractVCFCodec vcfCodec = VCFUtils.createDefaultVCFCodec();
LineIterator r = new LineIteratorImpl(new SynchronousLineReader(vcfStream));
final VCFHeader header = (VCFHeader) vcfCodec.readActualHeader(r);
// samples names have been changed by picard api and reordered !!!
// re-create the original order
List<String> sampleNamesInSameOrder = new ArrayList<String>(header.getSampleNamesInOrder().size());
for (int col = 0; col < header.getSampleNamesInOrder().size(); ++col) {
for (String sample : header.getSampleNameToOffset().keySet()) {
if (header.getSampleNameToOffset().get(sample) == col) {
sampleNamesInSameOrder.add(sample);
break;
}
}
}
if (sampleNamesInSameOrder.size() != header.getSampleNamesInOrder().size()) {
throw new IllegalStateException();
}
VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), sampleNamesInSameOrder);
File tmp = IOUtil.newTempFile("tmp", ".vcf.gz", new File[] { tmpDir });
tmp.deleteOnExit();
PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(tmp)));
while (r.hasNext()) {
String line = r.next();
pw.println(line);
VariantContext ctx = null;
try {
ctx = vcfCodec.decode(line);
} catch (Exception err) {
pw.close();
LOG.error(line);
LOG.error(err);
return -1;
}
for (String f : ctx.getFilters()) {
if (h2.getFilterHeaderLine(f) != null)
continue;
// if(f.equals(VCFConstants.PASSES_FILTERS_v4)) continue; hum...
if (f.isEmpty() || f.equals(VCFConstants.UNFILTERED))
continue;
LOG.info("Fixing missing Filter:" + f);
h2.addMetaDataLine(new VCFFilterHeaderLine(f));
}
for (String tag : ctx.getAttributes().keySet()) {
if (h2.getInfoHeaderLine(tag) != null)
continue;
LOG.info("Fixing missing INFO:" + tag);
h2.addMetaDataLine(new VCFInfoHeaderLine(tag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "undefined. Saved by " + getClass()));
}
}
pw.flush();
pw.close();
pw = null;
LOG.info("re-reading VCF frm tmpFile:" + tmp);
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName(), "Saved VCF FILTER AND INFO from " + filenameIn));
// save header in memory
ByteArrayOutputStream baos = new ByteArrayOutputStream();
VariantContextWriter w2 = VCFUtils.createVariantContextWriterToOutputStream(baos);
w2.writeHeader(h2);
w2.close();
baos.close();
// reopen tmp file
@SuppressWarnings("resource") VcfIterator in = new VcfIteratorImpl(new SequenceInputStream(new ByteArrayInputStream(baos.toByteArray()), new GZIPInputStream(new FileInputStream(tmp))));
w.writeHeader(h2);
while (in.hasNext()) {
w.add(in.next());
}
in.close();
tmp.delete();
return 0;
}
use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class FixVcfMissingGenotypes method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
final Set<String> bamFiles = IOUtils.unrollFiles(bamList);
final Map<String, List<SamReader>> sample2bam = new HashMap<>(bamFiles.size());
try {
final VCFHeader header = in.getHeader();
for (final String bamFile : bamFiles) {
LOG.info("Reading header for " + bamFile);
final SamReader reader = super.openSamReader(bamFile);
if (!reader.hasIndex()) {
LOG.error("No BAM index available for " + bamFile);
return -1;
}
final SAMFileHeader samHeader = reader.getFileHeader();
for (final SAMReadGroupRecord g : samHeader.getReadGroups()) {
if (g.getSample() == null)
continue;
final String sample = g.getSample();
if (StringUtil.isBlank(sample))
continue;
List<SamReader> readers = sample2bam.get(sample);
if (readers == null) {
readers = new ArrayList<>();
sample2bam.put(sample, readers);
}
readers.add(reader);
}
}
final VCFHeader h2 = new VCFHeader(header);
if (h2.getFormatHeaderLine(VCFConstants.DEPTH_KEY) == null) {
h2.addMetaDataLine(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));
}
if (h2.getFormatHeaderLine(VCFConstants.GENOTYPE_KEY) == null) {
h2.addMetaDataLine(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));
}
if (h2.getFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY) == null) {
h2.addMetaDataLine(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
}
h2.addMetaDataLine(new VCFFormatHeaderLine(this.fixedTag, 1, VCFHeaderLineType.Integer, "Genotype was set as homozygous (min depth =" + this.minDepth + ")"));
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
this.recalculator.setHeader(h2);
out.writeHeader(h2);
while (in.hasNext()) {
final VariantContext ctx = progress.watch(in.next());
boolean somethingWasChanged = false;
final List<Genotype> genotypes = new ArrayList<>(ctx.getNSamples());
for (int i = 0; i < ctx.getNSamples(); ++i) {
Genotype genotype = ctx.getGenotype(i);
final String sample = genotype.getSampleName();
if (!genotype.isCalled() || (!genotype.hasDP() && this.fixDP)) {
List<SamReader> samReaders = sample2bam.get(sample);
if (samReaders == null)
samReaders = Collections.emptyList();
final int depth = fetchDP(ctx, sample, samReaders);
if (genotype.isCalled() && !genotype.hasDP()) {
genotype = new GenotypeBuilder(genotype).DP(depth).make();
somethingWasChanged = true;
} else // genotype was not called
{
if (depth >= this.minDepth) {
final List<Allele> homozygous = new ArrayList<>(2);
homozygous.add(ctx.getReference());
homozygous.add(ctx.getReference());
final GenotypeBuilder gb = new GenotypeBuilder(genotype);
gb.alleles(homozygous);
gb.attribute(this.fixedTag, 1);
gb.DP(depth);
if (!StringUtil.isBlank(this.fixedGenotypesAreFiltered))
gb.filter(this.fixedGenotypesAreFiltered);
somethingWasChanged = true;
genotype = gb.make();
} else if (// cannot fix but we can update DP
!genotype.hasDP()) {
genotype = new GenotypeBuilder(genotype).DP(depth).make();
somethingWasChanged = true;
}
}
}
genotypes.add(genotype);
}
if (somethingWasChanged) {
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.genotypes(genotypes);
out.add(this.recalculator.apply(vcb.make()));
} else {
out.add(ctx);
}
}
progress.finish();
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
sample2bam.values().stream().flatMap(L -> L.stream()).forEach(R -> CloserUtil.close(R));
}
}
Aggregations