use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.
the class VcfInTest method makeVcfIn.
private Path makeVcfIn(final String vcfpath, String other_args) throws Exception {
Path vcfDbIn = Paths.get(vcfpath);
Path tmpIn = support.createTmpPath(".vcf");
Path outVcf = support.createTmpPath(".vcf");
final VariantContextWriter w = VCFUtils.createVariantContextWriterToPath(tmpIn);
final VCFReader r = VCFReaderFactory.makeDefault().open(vcfDbIn, true);
w.writeHeader(r.getHeader());
int n = 0;
final CloseableIterator<VariantContext> iter = r.iterator();
while (iter.hasNext()) {
VariantContext ctx = iter.next();
// ignore some variants
if (n++ % 2 == 0)
continue;
w.add(ctx);
// add it twice
w.add(ctx);
VariantContextBuilder vcb = new VariantContextBuilder(vcfpath, ctx.getContig(), ctx.getStart(), ctx.getEnd(), Arrays.asList(ctx.getReference(), ZORG_ALLELE));
vcb.genotypes(ctx.getGenotypes().stream().map(G -> new GenotypeBuilder(G.getSampleName(), Arrays.asList(ZORG_ALLELE, ZORG_ALLELE)).make()).collect(Collectors.toList()));
w.add(vcb.make());
}
w.close();
iter.close();
r.close();
final List<String> args = new ArrayList<>();
args.add("-o");
args.add(outVcf.toString());
args.add("--database");
args.add(vcfDbIn.toString());
Arrays.stream(other_args.split("[ ]")).filter(S -> !S.isEmpty()).forEach(S -> args.add(S));
args.add(vcfpath.toString());
Assert.assertEquals(new VcfIn().instanceMain(args), 0);
support.assertIsVcf(outVcf);
return outVcf;
}
use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.
the class KnimeVariantHelper method forEachVariants.
public Stream<VariantContext> forEachVariants(final String vcfFile) throws IOException {
final Path file = Paths.get(vcfFile);
IOUtil.assertFileIsReadable(file);
final VCFReader r = VCFReaderFactory.makeDefault().open(file, false);
final VCFHeader header = r.getHeader();
this.init(header);
final CloseableIterator<VariantContext> iter = r.iterator();
final Iterable<VariantContext> iterable = () -> iter;
return StreamSupport.stream(iterable.spliterator(), false).onClose(() -> {
CloserUtil.close(iter);
CloserUtil.close(r);
});
}
use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.
the class VcfEpistatis01 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.number_of_jobs < 1) {
LOG.error("bad number of jobs");
return -1;
}
try {
final int variantsCount;
final List<VariantContext> inMemoryVariants;
final File vcfFile = new File(oneAndOnlyOneFile(args));
final File tmpIndexFile;
if (vcfFile.equals(this.outputFile)) {
LOG.error("input == output");
return -1;
}
VCFReader vcfFileReader = VCFReaderFactory.makeDefault().open(vcfFile.toPath(), false);
final VCFHeader header = vcfFileReader.getHeader();
final Pedigree pedigree;
if (this.pedigreeFile != null) {
pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
} else {
pedigree = new Pedigree.Parser().parse(header);
}
pedigree.verifyPersonsHaveUniqueNames();
final Map<String, Integer> sample2index = header.getSampleNameToOffset();
final int[] caseIndexes = pedigree.getAffected().stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
final int[] ctrlIndexes = new ArrayList<>(pedigree.getUnaffected()).stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
if (caseIndexes.length == 0 || ctrlIndexes.length == 0) {
LOG.error("empty ped or no case/ctrl");
vcfFileReader.close();
return -1;
}
if (this.load_variants_in_memory) {
LOG.info("loading variants in memory");
tmpIndexFile = null;
final CloseableIterator<VariantContext> iter2 = vcfFileReader.iterator();
inMemoryVariants = Collections.unmodifiableList(iter2.stream().filter(this.variantFilter).filter(// should fix https://github.com/samtools/htsjdk/issues/1026 ?
V -> V.getGenotypes().stream().filter(G -> G.isCalled()).count() > 0).collect(Collectors.toList()));
variantsCount = inMemoryVariants.size();
iter2.close();
} else {
tmpIndexFile = File.createTempFile("epistatsis", VcfOffsetsIndexFactory.INDEX_EXTENSION);
tmpIndexFile.deleteOnExit();
new VcfOffsetsIndexFactory().setLogger(LOG).setPredicate(variantFilter).indexVcfFile(vcfFile, tmpIndexFile);
final VcfList tmpList = VcfList.fromFile(vcfFile, tmpIndexFile);
variantsCount = tmpList.size();
tmpList.close();
inMemoryVariants = null;
}
vcfFileReader.close();
LOG.info("Number of variants: " + variantsCount);
Result bestResult = null;
int x = this.start_index_at;
while (x + 1 < variantsCount) {
final List<Runner> runners = new Vector<>(this.number_of_jobs);
while (x + 1 < variantsCount && runners.size() < this.number_of_jobs) {
LOG.info("starting " + x + "/" + variantsCount);
runners.add(new Runner(inMemoryVariants == null ? VcfList.fromFile(vcfFile, tmpIndexFile) : new Vector<>(inMemoryVariants), x, caseIndexes, ctrlIndexes));
++x;
}
final ExecutorService execSvc;
if (this.number_of_jobs == 1) {
execSvc = null;
} else {
execSvc = Executors.newFixedThreadPool(this.number_of_jobs);
;
}
if (this.number_of_jobs == 1) {
runners.get(0).call();
} else {
execSvc.invokeAll(runners);
}
if (execSvc != null) {
execSvc.shutdown();
execSvc.awaitTermination(10000L, TimeUnit.DAYS);
execSvc.shutdownNow();
}
runners.stream().mapToLong(R -> R.duration).min().ifPresent(D -> {
LOG.info("That took " + (D / 1000f) + " seconds.");
});
for (final Runner r : runners) {
final Result rez = r.result;
if (rez == null)
continue;
if (bestResult == null || bestResult.score < rez.score) {
bestResult = rez;
if (this.output_score) {
final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
pw.println(bestResult.score + "\t" + bestResult.toString());
pw.flush();
pw.close();
} else {
final VariantContextWriter w = openVariantContextWriter(this.outputFile);
final VCFHeader header2 = new VCFHeader(header);
header2.addMetaDataLine(new VCFHeaderLine(VcfEpistatis01.class.getName(), bestResult.toString()));
w.writeHeader(header2);
w.add(bestResult.ctx1);
w.add(bestResult.ctx2);
w.close();
}
}
}
LOG.info("best: " + bestResult);
}
if (tmpIndexFile != null)
tmpIndexFile.delete();
return 0;
} catch (final Exception err) {
err.printStackTrace();
LOG.error(err);
return -1;
} finally {
}
}
use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.
the class IgvReview method start.
@Override
public void start(final Stage stage) throws Exception {
stage.setTitle(getClass().getSimpleName());
Predicate<VariantContext> ctxFilter;
Map<String, String> params = super.getParameters().getNamed();
if (params.containsKey("--filter")) {
ctxFilter = JexlVariantPredicate.create(params.get("--filter"));
} else {
ctxFilter = V -> true;
}
final List<String> args = super.getParameters().getUnnamed();
final File configFile;
if (args.isEmpty()) {
final FileChooser fc = new FileChooser();
final String lastDirStr = preferences.get(LAST_USED_DIR_KEY, null);
if (lastDirStr != null && !lastDirStr.isEmpty()) {
fc.setInitialDirectory(new File(lastDirStr));
}
fc.getExtensionFilters().addAll(Collections.singletonList(new FileChooser.ExtensionFilter("Config file", "*.config", "*.cfg", "*.list")));
configFile = fc.showOpenDialog(stage);
} else if (args.size() == 1) {
configFile = new File(args.get(0));
} else {
configFile = null;
}
if (configFile == null || !configFile.exists()) {
JfxUtils.dialog().cause("Illegal number of arguments or file doesn't exists.").show(stage);
Platform.exit();
return;
}
if (configFile.isFile() && configFile.getParentFile() != null) {
this.preferences.put(LAST_USED_DIR_KEY, configFile.getParentFile().getPath());
}
final List<String> configLines = Files.readAllLines(configFile.toPath());
final Predicate<String> ignoreComment = (S) -> !S.startsWith("#");
final Predicate<String> predVcf = S -> S.endsWith(".vcf") || S.endsWith(".vcf.gz");
if (configLines.stream().filter(ignoreComment).filter(predVcf).count() != 1) {
JfxUtils.dialog().cause("Found more than one vcf file in " + configFile).show(stage);
Platform.exit();
return;
}
final File vcfFile = configLines.stream().filter(ignoreComment).filter(predVcf).map(S -> new File(S)).findFirst().get();
LOG.info("Opening vcf file and loading in memory");
VCFReader vfr = null;
CloseableIterator<VariantContext> iter = null;
final Set<String> sampleNames;
try {
this.variants.clear();
vfr = VCFReaderFactory.makeDefault().open(vcfFile.toPath(), false);
this.vcfHeader = vfr.getHeader();
sampleNames = new HashSet<>(this.vcfHeader.getSampleNamesInOrder());
if (sampleNames.isEmpty()) {
JfxUtils.dialog().cause("No Genotypes in " + vcfFile).show(stage);
Platform.exit();
return;
}
iter = vfr.iterator();
this.variants.addAll(iter.stream().filter(ctxFilter).filter(CTX -> CTX.isVariant()).collect(Collectors.toList()));
} catch (final Exception err) {
JfxUtils.dialog().cause(err).show(stage);
Platform.exit();
return;
} finally {
CloserUtil.close(iter);
CloserUtil.close(vfr);
}
if (this.variants.isEmpty()) {
JfxUtils.dialog().cause("No Variants").show(stage);
Platform.exit();
return;
}
final SAMSequenceDictionary dict = this.vcfHeader.getSequenceDictionary();
if (dict == null || dict.isEmpty()) {
JfxUtils.dialog().cause(JvarkitException.VcfDictionaryMissing.getMessage(vcfFile.getPath())).show(stage);
Platform.exit();
return;
}
final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
configLines.stream().filter(ignoreComment).filter(S -> S.endsWith(FileExtensions.BAM)).map(S -> Paths.get(S)).forEach(F -> {
final SamReader samIn = srf.open(F);
final SAMFileHeader header = samIn.getFileHeader();
CloserUtil.close(samIn);
String sample = null;
for (final SAMReadGroupRecord rg : header.getReadGroups()) {
String s = rg.getSample();
if (s == null)
continue;
if (sample == null) {
sample = s;
} else if (!sample.equals(s)) {
JfxUtils.dialog().cause("Two samples in " + F).show(stage);
Platform.exit();
return;
}
}
if (sample == null) {
JfxUtils.dialog().cause("No sample in " + F + ". Ignoring").show(stage);
return;
}
if (!sampleNames.contains(sample)) {
JfxUtils.dialog().cause("Not in VCF header " + sample + " / " + F + ". Ignoring").show(stage);
return;
}
this.sample2bamFile.put(sample, F);
});
if (this.sample2bamFile.isEmpty()) {
JfxUtils.dialog().cause("No valid bam file in " + configFile).show(stage);
return;
}
sampleNames.retainAll(this.sample2bamFile.keySet());
if (sampleNames.isEmpty()) {
JfxUtils.dialog().cause("No Sample associated to bam").show(stage);
return;
}
ObservableList<VariantAndGenotype> genotypes = FXCollections.observableArrayList(this.variants.stream().flatMap(CTX -> CTX.getGenotypes().stream().filter(G -> sampleNames.contains(G.getSampleName())).map(G -> new VariantAndGenotype(CTX, G))).collect(Collectors.toList()));
if (genotypes.isEmpty()) {
JfxUtils.dialog().cause("No Genotype to show").show(stage);
return;
}
Menu menu = new Menu("File");
MenuItem menuItem = new MenuItem("Save as...");
menuItem.setOnAction(AE -> {
saveVariantsAs(stage);
});
menu.getItems().add(menuItem);
menuItem = new MenuItem("Save");
menuItem.setOnAction(AE -> {
if (this.saveAsFile != null) {
saveVariants(stage, this.saveAsFile);
} else {
saveVariantsAs(stage);
}
});
menu.getItems().add(menuItem);
menu.getItems().add(new SeparatorMenuItem());
menuItem = new MenuItem("Quit");
menuItem.setOnAction(AE -> {
Platform.exit();
});
menu.getItems().add(menuItem);
MenuBar bar = new MenuBar(menu);
this.genotypeTable = new TableView<>(genotypes);
this.genotypeTable.getColumns().add(makeColumn("CHROM", G -> G.ctx.getContig()));
this.genotypeTable.getColumns().add(makeColumn("POS", G -> G.ctx.getStart()));
this.genotypeTable.getColumns().add(makeColumn("ID", G -> G.ctx.getID()));
this.genotypeTable.getColumns().add(makeColumn("REF", G -> G.ctx.getReference().getDisplayString()));
this.genotypeTable.getColumns().add(makeColumn("ALT", G -> G.ctx.getAlternateAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(","))));
this.genotypeTable.getColumns().add(makeColumn("Sample", G -> G.g.getSampleName()));
this.genotypeTable.getColumns().add(makeColumn("Type", G -> G.g.getType().name()));
this.genotypeTable.getColumns().add(makeColumn("Alleles", G -> G.g.getAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(","))));
TableColumn<VariantAndGenotype, String> reviewCol = new TableColumn<>("Review");
reviewCol.setCellValueFactory(C -> C.getValue().getReviewProperty());
reviewCol.setCellFactory(TextFieldTableCell.forTableColumn());
reviewCol.setOnEditCommit((E) -> {
int y = E.getTablePosition().getRow();
this.genotypeTable.getItems().get(y).setReview(E.getNewValue());
});
reviewCol.setEditable(true);
this.genotypeTable.getColumns().add(reviewCol);
this.genotypeTable.getSelectionModel().cellSelectionEnabledProperty().set(true);
this.genotypeTable.setEditable(true);
final ContextMenu cm = new ContextMenu();
MenuItem mi1 = new MenuItem("Menu 1");
cm.getItems().add(mi1);
MenuItem mi2 = new MenuItem("Menu 2");
cm.getItems().add(mi2);
this.genotypeTable.setOnMousePressed(event -> {
if (event.isPrimaryButtonDown() && (event.getClickCount() == 3 || event.isShiftDown())) {
moveIgvTo(stage, genotypeTable.getSelectionModel().getSelectedItem());
} else if (event.isSecondaryButtonDown()) {
cm.show(genotypeTable, event.getScreenX(), event.getScreenY());
}
});
final BorderPane pane2 = new BorderPane(this.genotypeTable);
pane2.setPadding(new Insets(10, 10, 10, 10));
VBox vbox1 = new VBox(bar, pane2);
final Scene scene = new Scene(vbox1, 500, 300);
stage.setScene(scene);
stage.show();
}
use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.
the class ScanStructuralVariants method recursive.
private int recursive(final VariantContext ctx, final List<VariantContext> candidates, final List<VCFReader> vcfFilesInput, final List<ShadowedVcfReader> shadowControls, final VariantContextWriter out) {
if (candidates.size() == vcfFilesInput.size()) {
final int max_controls = (int) (shadowControls.size() * max_maf);
int count_matching_controls = 0;
for (final ShadowedVcfReader vcfReader : shadowControls) {
final CloseableIterator<VariantContext> iter = vcfReader.query(ctx.getContig(), Math.max(1, ctx.getStart() - this.svComparator.getBndDistance()), ctx.getEnd() + this.svComparator.getBndDistance());
while (iter.hasNext()) {
final VariantContext ctx3 = iter.next();
if (this.svComparator.test(ctx3, ctx)) {
count_matching_controls++;
candidates.add(new VariantContextBuilder(ctx3).noGenotypes().filter(ATT_CONTROL).attribute(ATT_FILENAME, vcfReader.vcfPath.toString()).make());
break;
}
}
iter.close();
vcfReader.close();
if (count_matching_controls > max_controls)
return -1;
}
if (this.print_all_ctx) {
final String cluster = "CTX" + (++ID_GENERATOR);
for (int x = 0; x < candidates.size(); ++x) {
out.add(new VariantContextBuilder(candidates.get(x)).noGenotypes().attribute(ATT_CLUSTER, cluster).make());
}
return 0;
}
final VariantContextBuilder vcb = new VariantContextBuilder();
vcb.chr(ctx.getContig());
vcb.start(ctx.getStart());
vcb.stop(ctx.getEnd());
vcb.attribute(VCFConstants.END_KEY, ctx.getEnd());
vcb.attribute("SVLEN", ctx.getLengthOnReference());
final String svType = ctx.getAttributeAsString(VCFConstants.SVTYPE, ".");
vcb.attribute(VCFConstants.SVTYPE, svType);
vcb.attribute("IMPRECISE", true);
for (int side = 0; side < 2; side++) {
final Function<VariantContext, Integer> coordExtractor;
if (side == 0) {
coordExtractor = C -> C.getStart();
} else {
coordExtractor = C -> C.getEnd();
}
final List<Integer> list = Arrays.asList(candidates.stream().mapToInt(C -> coordExtractor.apply(C) - coordExtractor.apply(ctx)).min().orElse(0), candidates.stream().mapToInt(C -> coordExtractor.apply(C) - coordExtractor.apply(ctx)).max().orElse(0));
vcb.attribute(side == 0 ? "CIPOS" : "CIEND", list);
}
final Allele ref = Allele.create("N", true);
final Allele alt = Allele.create("<" + svType + ">", false);
vcb.alleles(Arrays.asList(ref, alt));
out.add(vcb.make());
return 0;
}
VariantContext ctx2 = null;
;
final CloseableIterator<VariantContext> iter = vcfFilesInput.get(candidates.size()).query(ctx.getContig(), Math.max(1, ctx.getStart() - this.svComparator.getBndDistance()), ctx.getEnd() + this.svComparator.getBndDistance());
while (iter.hasNext()) {
final VariantContext ctx3 = iter.next();
if (this.svComparator.test(ctx3, ctx)) {
ctx2 = ctx3;
break;
}
}
iter.close();
if (ctx2 == null)
return -1;
candidates.add(ctx2);
return recursive(ctx, candidates, vcfFilesInput, shadowControls, out);
}
Aggregations