use of com.github.lindenb.jvarkit.tools.vcflist.VcfOffsetsIndexFactory in project jvarkit by lindenb.
the class VcfEpistatis01 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.number_of_jobs < 1) {
LOG.error("bad number of jobs");
return -1;
}
try {
final int variantsCount;
final List<VariantContext> inMemoryVariants;
final File vcfFile = new File(oneAndOnlyOneFile(args));
final File tmpIndexFile;
if (vcfFile.equals(this.outputFile)) {
LOG.error("input == output");
return -1;
}
VCFFileReader vcfFileReader = new VCFFileReader(vcfFile, false);
final VCFHeader header = vcfFileReader.getFileHeader();
final Pedigree pedigree;
if (this.pedigreeFile != null) {
pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
} else {
pedigree = new Pedigree.Parser().parse(header);
}
pedigree.verifyPersonsHaveUniqueNames();
final Map<String, Integer> sample2index = header.getSampleNameToOffset();
final int[] caseIndexes = pedigree.getAffected().stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
final int[] ctrlIndexes = new ArrayList<>(pedigree.getUnaffected()).stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
if (caseIndexes.length == 0 || ctrlIndexes.length == 0) {
LOG.error("empty ped or no case/ctrl");
vcfFileReader.close();
return -1;
}
if (this.load_variants_in_memory) {
LOG.info("loading variants in memory");
tmpIndexFile = null;
final CloseableIterator<VariantContext> iter2 = vcfFileReader.iterator();
inMemoryVariants = Collections.unmodifiableList(iter2.stream().filter(this.variantFilter).filter(// should fix https://github.com/samtools/htsjdk/issues/1026 ?
V -> V.getGenotypes().stream().filter(G -> G.isCalled()).count() > 0).collect(Collectors.toList()));
variantsCount = inMemoryVariants.size();
iter2.close();
} else {
tmpIndexFile = File.createTempFile("epistatsis", VcfOffsetsIndexFactory.INDEX_EXTENSION);
tmpIndexFile.deleteOnExit();
new VcfOffsetsIndexFactory().setLogger(LOG).setPredicate(variantFilter).indexVcfFile(vcfFile, tmpIndexFile);
final VcfList tmpList = VcfList.fromFile(vcfFile, tmpIndexFile);
variantsCount = tmpList.size();
tmpList.close();
inMemoryVariants = null;
}
vcfFileReader.close();
LOG.info("Number of variants: " + variantsCount);
Result bestResult = null;
int x = this.start_index_at;
while (x + 1 < variantsCount) {
final List<Runner> runners = new Vector<>(this.number_of_jobs);
while (x + 1 < variantsCount && runners.size() < this.number_of_jobs) {
LOG.info("starting " + x + "/" + variantsCount);
runners.add(new Runner(inMemoryVariants == null ? VcfList.fromFile(vcfFile, tmpIndexFile) : new Vector<>(inMemoryVariants), x, caseIndexes, ctrlIndexes));
++x;
}
final ExecutorService execSvc;
if (this.number_of_jobs == 1) {
execSvc = null;
} else {
execSvc = Executors.newFixedThreadPool(this.number_of_jobs);
;
}
if (this.number_of_jobs == 1) {
runners.get(0).call();
} else {
execSvc.invokeAll(runners);
}
if (execSvc != null) {
execSvc.shutdown();
execSvc.awaitTermination(10000L, TimeUnit.DAYS);
execSvc.shutdownNow();
}
runners.stream().mapToLong(R -> R.duration).min().ifPresent(D -> {
LOG.info("That took " + (D / 1000f) + " seconds.");
});
for (final Runner r : runners) {
final Result rez = r.result;
if (rez == null)
continue;
if (bestResult == null || bestResult.score < rez.score) {
bestResult = rez;
if (this.output_score) {
final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
pw.println(bestResult.score + "\t" + bestResult.toString());
pw.flush();
pw.close();
} else {
final VariantContextWriter w = openVariantContextWriter(this.outputFile);
final VCFHeader header2 = new VCFHeader(header);
header2.addMetaDataLine(new VCFHeaderLine(VcfEpistatis01.class.getName(), bestResult.toString()));
w.writeHeader(header2);
w.add(bestResult.ctx1);
w.add(bestResult.ctx2);
w.close();
}
}
}
LOG.info("best: " + bestResult);
}
if (tmpIndexFile != null)
tmpIndexFile.delete();
return 0;
} catch (final Exception err) {
err.printStackTrace();
LOG.error(err);
return -1;
} finally {
}
}
use of com.github.lindenb.jvarkit.tools.vcflist.VcfOffsetsIndexFactory in project jvarkit by lindenb.
the class TestNg01 method testVcfFileOffsets.
@Test(dataProvider = "all_vcfs")
public void testVcfFileOffsets(final String path) throws IOException {
final File vcfFile = new File(path);
VcfOffsetsIndexFactory factory = new VcfOffsetsIndexFactory();
final File indexFile = factory.indexVcfFile(vcfFile);
Assert.assertTrue(indexFile.exists());
final VcfList list = VcfList.fromFile(vcfFile);
final List<Integer> positions = streamVcf(vcfFile).map(CTX -> CTX.getStart()).collect(Collectors.toList());
Assert.assertEquals(positions.size(), list.size());
Assert.assertNotNull(list.getHeader());
for (int i = list.size() - 1; i >= 0; --i) {
Assert.assertNotNull(list.get(i));
Assert.assertEquals(list.get(i).getStart(), positions.get(i).intValue());
}
Assert.assertEquals((long) list.size(), streamVcf(vcfFile).count());
list.close();
Assert.assertTrue(indexFile.delete());
}
Aggregations