use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.
the class VcfEpistatis01 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.number_of_jobs < 1) {
LOG.error("bad number of jobs");
return -1;
}
try {
final int variantsCount;
final List<VariantContext> inMemoryVariants;
final File vcfFile = new File(oneAndOnlyOneFile(args));
final File tmpIndexFile;
if (vcfFile.equals(this.outputFile)) {
LOG.error("input == output");
return -1;
}
VCFFileReader vcfFileReader = new VCFFileReader(vcfFile, false);
final VCFHeader header = vcfFileReader.getFileHeader();
final Pedigree pedigree;
if (this.pedigreeFile != null) {
pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
} else {
pedigree = new Pedigree.Parser().parse(header);
}
pedigree.verifyPersonsHaveUniqueNames();
final Map<String, Integer> sample2index = header.getSampleNameToOffset();
final int[] caseIndexes = pedigree.getAffected().stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
final int[] ctrlIndexes = new ArrayList<>(pedigree.getUnaffected()).stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
if (caseIndexes.length == 0 || ctrlIndexes.length == 0) {
LOG.error("empty ped or no case/ctrl");
vcfFileReader.close();
return -1;
}
if (this.load_variants_in_memory) {
LOG.info("loading variants in memory");
tmpIndexFile = null;
final CloseableIterator<VariantContext> iter2 = vcfFileReader.iterator();
inMemoryVariants = Collections.unmodifiableList(iter2.stream().filter(this.variantFilter).filter(// should fix https://github.com/samtools/htsjdk/issues/1026 ?
V -> V.getGenotypes().stream().filter(G -> G.isCalled()).count() > 0).collect(Collectors.toList()));
variantsCount = inMemoryVariants.size();
iter2.close();
} else {
tmpIndexFile = File.createTempFile("epistatsis", VcfOffsetsIndexFactory.INDEX_EXTENSION);
tmpIndexFile.deleteOnExit();
new VcfOffsetsIndexFactory().setLogger(LOG).setPredicate(variantFilter).indexVcfFile(vcfFile, tmpIndexFile);
final VcfList tmpList = VcfList.fromFile(vcfFile, tmpIndexFile);
variantsCount = tmpList.size();
tmpList.close();
inMemoryVariants = null;
}
vcfFileReader.close();
LOG.info("Number of variants: " + variantsCount);
Result bestResult = null;
int x = this.start_index_at;
while (x + 1 < variantsCount) {
final List<Runner> runners = new Vector<>(this.number_of_jobs);
while (x + 1 < variantsCount && runners.size() < this.number_of_jobs) {
LOG.info("starting " + x + "/" + variantsCount);
runners.add(new Runner(inMemoryVariants == null ? VcfList.fromFile(vcfFile, tmpIndexFile) : new Vector<>(inMemoryVariants), x, caseIndexes, ctrlIndexes));
++x;
}
final ExecutorService execSvc;
if (this.number_of_jobs == 1) {
execSvc = null;
} else {
execSvc = Executors.newFixedThreadPool(this.number_of_jobs);
;
}
if (this.number_of_jobs == 1) {
runners.get(0).call();
} else {
execSvc.invokeAll(runners);
}
if (execSvc != null) {
execSvc.shutdown();
execSvc.awaitTermination(10000L, TimeUnit.DAYS);
execSvc.shutdownNow();
}
runners.stream().mapToLong(R -> R.duration).min().ifPresent(D -> {
LOG.info("That took " + (D / 1000f) + " seconds.");
});
for (final Runner r : runners) {
final Result rez = r.result;
if (rez == null)
continue;
if (bestResult == null || bestResult.score < rez.score) {
bestResult = rez;
if (this.output_score) {
final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
pw.println(bestResult.score + "\t" + bestResult.toString());
pw.flush();
pw.close();
} else {
final VariantContextWriter w = openVariantContextWriter(this.outputFile);
final VCFHeader header2 = new VCFHeader(header);
header2.addMetaDataLine(new VCFHeaderLine(VcfEpistatis01.class.getName(), bestResult.toString()));
w.writeHeader(header2);
w.add(bestResult.ctx1);
w.add(bestResult.ctx2);
w.close();
}
}
}
LOG.info("best: " + bestResult);
}
if (tmpIndexFile != null)
tmpIndexFile.delete();
return 0;
} catch (final Exception err) {
err.printStackTrace();
LOG.error(err);
return -1;
} finally {
}
}
use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.
the class VcfGeneEpistasis method doWork.
@Override
public int doWork(final List<String> args) {
if (this.geneBed == null) {
LOG.error("gene file bed undefined");
return -1;
}
if (this.outputFile == null) {
LOG.error("output file undefined");
return -1;
}
CloseableIterator<VariantContext> iter = null;
try {
final File vcfFile = new File(oneAndOnlyOneFile(args));
this.vcfFileReader = new VCFFileReader(vcfFile, true);
final VCFHeader header = this.vcfFileReader.getFileHeader();
final Pedigree pedigree;
if (this.pedigreeFile != null) {
pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
} else {
pedigree = new Pedigree.Parser().parse(header);
}
if (pedigree == null || pedigree.isEmpty() || !pedigree.hasAffected() || !pedigree.hasUnaffected()) {
LOG.error("empty ped or no case/ctrl");
return -1;
}
pedigree.verifyPersonsHaveUniqueNames();
for (final Pedigree.Person p : pedigree.getPersons().stream().filter(P -> P.isAffected() || P.isUnaffected()).filter(P -> header.getSampleNamesInOrder().contains(P.getId())).collect(Collectors.toSet())) {
this.id2samples.put(p.getId(), p);
}
this.vcfTools = new VcfTools(header);
List<Interval> geneList;
if (!this.geneBed.exists()) {
final Map<String, Interval> gene2interval = new HashMap<>(50000);
LOG.info("building gene file" + this.geneBed);
iter = this.vcfFileReader.iterator();
// iter = this.vcfFileReader.query("chr3",1,300_000_000);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
while (iter.hasNext()) {
final VariantContext ctx = progress.watch(iter.next());
if (!accept(ctx))
continue;
for (final String geneName : getGenes(ctx)) {
final Interval old = gene2interval.get(geneName);
if (old == null) {
gene2interval.put(geneName, new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false, geneName));
LOG.info("adding " + geneName + ". number of genes: " + gene2interval.size());
} else if (!old.getContig().equals(ctx.getContig())) {
LOG.error("boum :" + geneName + ": on chrom " + ctx.getContig() + " vs " + old);
return -1;
} else {
gene2interval.put(geneName, new Interval(ctx.getContig(), Math.min(ctx.getStart(), old.getStart()), Math.max(ctx.getEnd(), old.getEnd()), false, geneName));
}
}
}
iter.close();
iter = null;
progress.finish();
geneList = new ArrayList<>(gene2interval.values());
PrintWriter pw = new PrintWriter(this.geneBed);
for (final Interval g : geneList) {
pw.println(g.getContig() + "\t" + (g.getStart() - 1) + "\t" + (g.getEnd()) + "\t" + g.getName());
}
pw.flush();
pw.close();
pw = null;
} else {
BedLineCodec codec = new BedLineCodec();
BufferedReader r = IOUtil.openFileForBufferedReading(geneBed);
geneList = r.lines().map(L -> codec.decode(L)).filter(B -> B != null).map(B -> new Interval(B.getContig(), B.getStart(), B.getEnd(), true, B.get(3))).collect(Collectors.toList());
r.close();
}
if (geneList.isEmpty()) {
LOG.error("gene List is empty");
return -1;
}
final Comparator<VariantContext> ctxSorter = VCFUtils.createTidPosRefComparator(header.getSequenceDictionary());
final Function<Interval, List<VariantContext>> loadVariants = (R) -> {
List<VariantContext> L = new ArrayList<>();
CloseableIterator<VariantContext> r = this.vcfFileReader.query(R.getContig(), R.getStart(), R.getEnd());
while (r.hasNext()) {
final VariantContext ctx = r.next();
if (!accept(ctx))
continue;
if (!getGenes(ctx).contains(R.getName()))
continue;
L.add(ctx);
}
r.close();
return L;
};
final SkatExecutor executor = this.skatFactory.build();
Double bestSkat = null;
LOG.info("number of genes : " + geneList.size());
final int list_end_index = (this.user_end_index < 0 ? geneList.size() : Math.min(geneList.size(), this.user_end_index));
for (int x = this.user_begin_index; x < list_end_index; ++x) {
final Interval intervalx = geneList.get(x);
final List<VariantContext> variantsx = loadVariants.apply(intervalx);
if (variantsx.isEmpty())
continue;
for (int y = x; y < geneList.size(); /* pas list_end_index */
++y) {
final Interval intervaly;
final List<VariantContext> merge;
if (y == x) {
// we-re testing gene 1 only
intervaly = intervalx;
merge = variantsx;
} else {
intervaly = geneList.get(y);
if (intervaly.intersects(intervalx))
continue;
final List<VariantContext> variantsy = loadVariants.apply(intervaly);
if (variantsy.isEmpty())
continue;
merge = new MergedList<>(variantsx, variantsy);
}
LOG.info("testing : [" + x + "]" + intervalx + " [" + y + "]" + intervaly + " N:" + geneList.size() + " best: " + bestSkat);
final Double skat = eval(executor, merge);
if (skat == null)
continue;
if (bestSkat == null || skat.compareTo(bestSkat) < 0) {
bestSkat = skat;
LOG.info("best " + bestSkat + " " + intervalx + " " + intervaly);
if (this.outputFile.getName().endsWith(".vcf") || this.outputFile.getName().endsWith(".vcf.gz")) {
final VCFHeader header2 = new VCFHeader(header);
header2.addMetaDataLine(new VCFHeaderLine(VcfGeneEpistasis.class.getName(), intervalx.getName() + " " + intervaly.getName() + " " + bestSkat));
final VariantContextWriter w = VCFUtils.createVariantContextWriter(outputFile);
w.writeHeader(header2);
merge.stream().sorted(ctxSorter).forEach(V -> w.add(V));
w.close();
} else {
final PrintWriter w = super.openFileOrStdoutAsPrintWriter(outputFile);
w.println(String.valueOf(bestSkat) + "\t" + intervalx.getName() + "\t" + intervaly.getName());
w.flush();
w.close();
}
}
}
}
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(this.vcfFileReader);
}
}
use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.
the class VcfDoest method run.
private void run(final LineIterator lr, final PrintWriter pw) throws IOException {
SortingCollection<TranscriptInfo> sorting = null;
CloseableIterator<TranscriptInfo> iter2 = null;
try {
while (lr.hasNext()) {
VcfIterator in = VCFUtils.createVcfIteratorFromLineIterator(lr, true);
final VCFHeader header = in.getHeader();
final Pedigree pedigree = Pedigree.newParser().parse(header);
if (pedigree.isEmpty()) {
throw new IOException("No pedigree found in header VCF header. use VcfInjectPedigree to add it");
}
final SortedSet<Pedigree.Person> individuals = new TreeSet<>();
for (final Pedigree.Person individual : pedigree.getPersons()) {
if (individual.isAffected() || individual.isUnaffected()) {
individuals.add(individual);
}
}
boolean first = true;
pw.println("# samples ( 0: unaffected 1:affected)");
pw.print("population <- data.frame(family=c(");
first = true;
for (final Pedigree.Person person : individuals) {
if (!first)
pw.print(",");
pw.print("\"" + person.getFamily().getId() + "\"");
first = false;
}
pw.print("),name=c(");
first = true;
for (final Pedigree.Person person : individuals) {
if (!first)
pw.print(",");
pw.print("\"" + person.getId() + "\"");
first = false;
}
pw.print("),status=c(");
first = true;
for (final Pedigree.Person person : individuals) {
if (!first)
pw.print(",");
pw.print(person.isUnaffected() ? 0 : 1);
first = false;
}
pw.println("))");
sorting = SortingCollection.newInstance(TranscriptInfo.class, new TranscriptInfoCodec(), new TranscriptInfoCmp(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
sorting.setDestructiveIteration(true);
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
/**
* loop over variants
*/
while (in.hasNext() && !pw.checkError()) {
final VariantContext ctx = progess.watch(in.next());
if (ctx.isFiltered())
continue;
if (ctx.getAlternateAlleles().isEmpty())
continue;
final Allele altAllele = ctx.getAltAlleleWithHighestAlleleCount();
final MafCalculator mafCalculator = new MafCalculator(altAllele, ctx.getContig());
boolean genotyped = false;
for (final Pedigree.Person p : pedigree.getPersons()) {
if (!(p.isAffected() || p.isUnaffected()))
continue;
final Genotype g = ctx.getGenotype(p.getId());
if (g == null)
throw new IOException("Strange I cannot find individual " + p + " in the pedigree. Aborting.");
if (g.isCalled()) {
mafCalculator.add(g, p.isMale());
}
if (g.isHet() || g.isHomVar()) {
if (!g.getAlleles().contains(altAllele))
continue;
genotyped = true;
break;
}
}
if (!genotyped)
continue;
final Interval interval = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd());
final List<KnownGene> genes = this.overlap(interval);
if (genes.isEmpty())
continue;
for (final KnownGene kg : genes) {
final TranscriptInfo trInfo = new TranscriptInfo();
trInfo.contig = kg.getContig();
trInfo.txStart = kg.getTxStart();
trInfo.txEnd = kg.getTxEnd();
trInfo.transcriptName = kg.getName();
trInfo.strand = (byte) (kg.isPositiveStrand() ? '+' : '-');
trInfo.exonCount = kg.getExonCount();
trInfo.transcriptLength = kg.getTranscriptLength();
trInfo.ctxStart = ctx.getStart();
trInfo.ref = ctx.getReference();
trInfo.alt = altAllele;
trInfo.maf = mafCalculator.getMaf();
trInfo.genotypes = new byte[individuals.size()];
int idx = 0;
for (final Pedigree.Person individual : individuals) {
final Genotype genotype = ctx.getGenotype(individual.getId());
final byte b;
if (genotype.isHomRef()) {
b = 0;
} else if (genotype.isHomVar() && genotype.getAlleles().contains(altAllele)) {
b = 2;
} else if (genotype.isHet() && genotype.getAlleles().contains(altAllele) && genotype.getAlleles().contains(ctx.getReference())) {
b = 1;
} else /* we treat 0/2 has hom-ref */
if (genotype.isHet() && !genotype.getAlleles().contains(altAllele) && genotype.getAlleles().contains(ctx.getReference())) {
LOG.warn("Treating " + genotype + " as hom-ref (0) alt=" + altAllele);
b = 0;
} else /* we treat 2/2 has hom-ref */
if (genotype.isHomVar() && !genotype.getAlleles().contains(altAllele)) {
LOG.warn("Treating " + genotype + " as hom-ref (0) alt=" + altAllele);
b = 0;
} else {
b = -9;
}
trInfo.genotypes[idx] = b;
++idx;
}
KnownGene archetype = kg;
/* find gene archetype = longest overlapping */
for (final KnownGene kg2 : genes) {
if (kg2 == kg)
continue;
if (archetype.getStrand().equals(kg2.getStrand()) && archetype.getTranscriptLength() < kg2.getTranscriptLength()) {
archetype = kg2;
}
}
trInfo.archetypeName = archetype.getName();
trInfo.archetypeLength = archetype.getTranscriptLength();
boolean ctxWasFoundInExon = false;
final int ctxPos0 = ctx.getStart() - 1;
int indexInTranscript0 = 0;
for (final KnownGene.Exon exon : kg.getExons()) {
// variant in exon ?
if (!(exon.getStart() > (ctx.getEnd() - 1) || (ctx.getStart() - 1) >= exon.getEnd())) {
ctxWasFoundInExon = true;
indexInTranscript0 += (ctxPos0 - exon.getStart());
if (kg.isNegativeStrand()) {
indexInTranscript0 = (kg.getTranscriptLength() - 1) - indexInTranscript0;
}
trInfo.indexInTranscript0 = indexInTranscript0;
trInfo.overlapName = exon.getName();
sorting.add(trInfo);
break;
} else {
indexInTranscript0 += (exon.getEnd() - exon.getStart());
}
}
if (ctxWasFoundInExon) {
continue;
}
indexInTranscript0 = 0;
// search closest intron/exon junction
for (int ex = 0; ex + 1 < kg.getExonCount(); ++ex) {
final KnownGene.Exon exon1 = kg.getExon(ex);
indexInTranscript0 += (exon1.getEnd() - exon1.getStart());
final KnownGene.Exon exon2 = kg.getExon(ex + 1);
if (exon1.getEnd() <= ctxPos0 && ctxPos0 < exon2.getStart()) {
final int dist_to_exon1 = ctxPos0 - exon1.getEnd();
final int dist_to_exon2 = exon2.getStart() - ctxPos0;
if (dist_to_exon2 < dist_to_exon1) {
indexInTranscript0++;
}
if (kg.isNegativeStrand()) {
indexInTranscript0 = (kg.getTranscriptLength() - 1) - indexInTranscript0;
}
trInfo.indexInTranscript0 = indexInTranscript0;
trInfo.overlapName = exon1.getNextIntron().getName();
sorting.add(trInfo);
break;
}
}
}
// end loop over genes
}
// end while loop over variants
progess.finish();
sorting.doneAdding();
LOG.info("done adding");
iter2 = sorting.iterator();
final EqualRangeIterator<TranscriptInfo> eqiter = new EqualRangeIterator<TranscriptInfo>(iter2, new Comparator<TranscriptInfo>() {
@Override
public int compare(final TranscriptInfo o1, final TranscriptInfo o2) {
int i = o1.contig.compareTo(o2.contig);
if (i != 0)
return i;
i = o1.transcriptName.compareTo(o2.transcriptName);
return i;
}
});
while (eqiter.hasNext()) {
final List<TranscriptInfo> list = eqiter.next();
final TranscriptInfo front = list.get(0);
pw.println("# BEGIN TRANSCRIPT " + front.transcriptName + " ##########################################");
pw.println("transcript.chrom <- \"" + front.contig + "\"");
pw.println("transcript.txStart0 <- " + front.txStart + "");
pw.println("transcript.txEnd0 <- " + front.txEnd + "");
pw.println("transcript.name <- \"" + front.transcriptName + "\"");
pw.println("transcript.strand <- \"" + ((char) front.strand) + "\"");
pw.println("transcript.length <- " + front.transcriptLength + "");
pw.println("transcript.exonCount <- " + front.exonCount + "");
pw.println("archetype.name <- \"" + front.archetypeName + "\"");
pw.println("archetype.length <- " + front.archetypeLength + "");
pw.print("variants <- data.frame(chrom=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print("\"" + v.contig + "\"");
first = false;
}
pw.print("),chromStart=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print(v.ctxStart);
first = false;
}
pw.print("),chromEnd=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print(v.ctxStart + v.ref.length() - 1);
first = false;
}
pw.print("),refAllele=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print("\"" + v.ref.getDisplayString() + "\"");
first = false;
}
pw.print("),altAllele=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print("\"" + v.alt.getDisplayString() + "\"");
first = false;
}
pw.print("),positionInTranscript1=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print(v.indexInTranscript0 + 1);
first = false;
}
pw.print("),maf=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print(v.maf);
first = false;
}
pw.print("),overlapName=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print("\"" + v.overlapName + "\"");
first = false;
}
pw.println("))");
pw.println("# genotypes as a list. Should be a multiple of length(samples).");
pw.println("# 0 is homref (0/0), 1 is het (0/1), 2 is homvar (1/1)");
pw.println("# if the variant contains another ALT allele: (0/2) and (2/2) are considered 0 (homref)");
pw.print("genotypes <- c(");
first = true;
for (final TranscriptInfo tr : list) {
for (byte g : tr.genotypes) {
if (!first)
pw.print(",");
first = false;
pw.print((int) g);
}
}
pw.println(")");
pw.println("stopifnot(NROW(variants) * NROW(population) == length(genotypes) )");
if (this.userDefinedFunName == null || this.userDefinedFunName.trim().isEmpty()) {
pw.println("## WARNING not user-defined R function was defined");
} else {
pw.println("# consumme data with user-defined R function ");
pw.println(this.userDefinedFunName + "()");
}
pw.println("# END TRANSCRIPT " + front.transcriptName + " ##########################################");
}
// end while eqiter
eqiter.close();
iter2.close();
iter2 = null;
sorting.cleanup();
sorting = null;
}
} finally {
CloserUtil.close(iter2);
if (sorting != null)
sorting.cleanup();
}
}
use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.
the class CaseControlPlot method doWork.
@Override
public int doWork(final List<String> args) {
ArchiveFactory archiveFactory = null;
VcfIterator in = null;
VariantContextWriter teeVariantWriter = null;
final List<CaseControlExtractor> excractors = new ArrayList<>();
try {
in = super.openVcfIterator(oneFileOrNull(args));
final VCFHeader header = in.getHeader();
excractors.addAll(parseConfigFile(header));
final Pedigree pedigree;
if (this.pedigreeFile != null) {
pedigree = Pedigree.newParser().parse(this.pedigreeFile);
} else {
pedigree = Pedigree.newParser().parse(header);
}
if (pedigree == null || pedigree.isEmpty()) {
LOG.error("No pedigree defined , or it is empty");
return -1;
}
final Set<Pedigree.Person> casepersons = pedigree.getPersons().stream().filter(F -> F.isAffected() && header.getSampleNamesInOrder().indexOf(F.getId()) != -1).collect(Collectors.toSet());
if (casepersons.isEmpty()) {
LOG.error("No Affected individuals in pedigree/header");
return -1;
}
final Set<Pedigree.Person> controlpersons = pedigree.getPersons().stream().filter(F -> F.isUnaffected() && header.getSampleNamesInOrder().indexOf(F.getId()) != -1).collect(Collectors.toSet());
if (controlpersons.isEmpty()) {
LOG.error("No Unaffected individuals in pedigree/header");
return -1;
}
if (this.teeToStdout) {
teeVariantWriter = super.openVariantContextWriter(null);
teeVariantWriter.writeHeader(header);
}
archiveFactory = ArchiveFactory.open(this.outputDirOrZip);
for (final CaseControlExtractor extractor : excractors) {
extractor.pw = archiveFactory.openWriter(extractor.name);
}
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
while (in.hasNext()) {
final VariantContext ctx = progress.watch(in.next());
if (teeVariantWriter != null)
teeVariantWriter.add(ctx);
for (final CaseControlExtractor handler : excractors) {
handler.visit(ctx, casepersons, controlpersons);
}
}
for (final CaseControlExtractor handler : excractors) {
handler.close();
}
progress.finish();
if (teeVariantWriter != null) {
teeVariantWriter.close();
teeVariantWriter = null;
}
in.close();
in = null;
archiveFactory.close();
archiveFactory = null;
return RETURN_OK;
} catch (final Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(in);
CloserUtil.close(archiveFactory);
CloserUtil.close(teeVariantWriter);
for (final CaseControlExtractor handler : excractors) {
handler.close();
}
}
}
use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.
the class VcfOptimizePedForSkat method exec.
private void exec(final long generation, final VCFHeader header, final List<VariantContext> variants, final List<Pedigree.Person> samples, final SkatFactory.SkatExecutor skatExecutor) {
final Solution solution = new Solution();
solution.generation = generation;
String origin = "random";
if (generation != 0) {
int nRemove = 1 + this.random.nextInt(this.nSamplesRemove);
if (generation == 1 && this.bootstrapSamples != null) {
origin = "bootstrap";
for (final String sample : this.bootstrapSamples.split("[; ,]")) {
if (StringUtil.isBlank(sample))
continue;
if (!samples.stream().anyMatch(S -> S.getId().equals(sample))) {
throw new JvarkitException.UserError("Sample " + sample + " not found in effective pedigree.");
}
LOG.info("bootstraping with " + sample);
solution.sampleSet.add(sample);
}
} else if (generation % 5 == 0 && !this.bestSolutions.isEmpty()) {
int sol_index = this.random.nextInt(Math.min(this.max_results_output, this.bestSolutions.size()));
final List<String> list = new ArrayList<>(this.bestSolutions.get(sol_index).sampleSet);
if (list.size() > 1 && this.random.nextBoolean()) {
origin = "best-minus-random";
list.remove(this.random.nextInt(list.size()));
} else if (list.size() < nRemove) {
origin = "best-plus-random";
list.add(samples.get(this.random.nextInt(samples.size())).getId());
}
solution.sampleSet.addAll(list);
} else if (generation % 7 == 0 && this.bestSolutions.size() > 2) {
final Set<String> set = new HashSet<>(this.bestSolutions.get(0).sampleSet);
set.addAll(this.bestSolutions.get(1).sampleSet);
final List<String> bestsamples = new ArrayList<>(set);
Collections.shuffle(bestsamples, this.random);
while (bestsamples.size() > nRemove) {
bestsamples.remove(0);
}
solution.sampleSet.addAll(bestsamples);
origin = "best0-plus-best1";
} else {
while (nRemove > 0) {
final String sampleId;
if (generation % 3 == 0L && nRemove % 2 == 0 && this.bestSolutions.size() > 0 && !this.bestSolutions.get(0).sampleSet.isEmpty()) {
final List<String> bestsamples = new ArrayList<>(this.bestSolutions.get(0).sampleSet);
sampleId = bestsamples.get(this.random.nextInt(bestsamples.size()));
origin = "random-plus-best0";
} else {
sampleId = samples.get(this.random.nextInt(samples.size())).getId();
}
solution.sampleSet.add(sampleId);
nRemove--;
}
}
} else {
origin = "original";
}
if (generation > 0 && solution.sampleSet.isEmpty())
return;
while (solution.sampleSet.size() > this.nSamplesRemove) {
LOG.warn("Hum... to many for " + origin);
final List<String> L = new ArrayList<>(solution.sampleSet);
while (L.size() > 0 && L.size() > this.nSamplesRemove) L.remove(this.random.nextInt(L.size()));
solution.sampleSet.clear();
solution.sampleSet.addAll(L);
}
if (this.bestSolutions.contains(solution))
return;
solution.origin = origin;
final List<Pedigree.Person> ped2 = new ArrayList<>(samples);
ped2.removeIf(I -> solution.sampleSet.contains(I.getId()));
if (ped2.isEmpty())
return;
if (!ped2.stream().anyMatch(P -> P.isAffected()))
return;
if (!ped2.stream().anyMatch(P -> P.isUnaffected()))
return;
// test MAF et Fisher
final VCFHeader header2 = new VCFHeader(header.getMetaDataInInputOrder(), header.getSampleNamesInOrder());
final VcfBurdenFisherH.CtxWriterFactory fisherhFactory = new VcfBurdenFisherH.CtxWriterFactory();
fisherhFactory.setCaseControlExtractor((H) -> new HashSet<>(ped2));
fisherhFactory.setIgnoreFiltered(true);
final VcfBurdenMAF.CtxWriterFactory mafFactory = new VcfBurdenMAF.CtxWriterFactory();
mafFactory.setCaseControlExtractor((H) -> new HashSet<>(ped2));
mafFactory.setIgnoreFiltered(true);
final VariantCollector collector = new VariantCollector();
VariantContextWriter vcw = mafFactory.open(collector);
vcw = fisherhFactory.open(vcw);
vcw.writeHeader(header2);
for (final VariantContext vc : variants) {
vcw.add(vc);
}
vcw.close();
CloserUtil.close(fisherhFactory);
CloserUtil.close(mafFactory);
//
solution.result = skatExecutor.execute(collector.variants, ped2);
if (solution.result.isError())
return;
if (this.bestSolutions.isEmpty() || solution.compareTo(this.bestSolutions.get(this.bestSolutions.size() - 1)) < 0) {
this.bestSolutions.add(solution);
if (this.firstScore == null) {
this.firstScore = solution.result.getPValue();
}
Collections.sort(this.bestSolutions);
final int BUFFER_RESULT = 1000;
while (this.bestSolutions.size() > BUFFER_RESULT) {
this.bestSolutions.remove(this.bestSolutions.size() - 1);
}
if (this.bestSolutions.indexOf(solution) < this.max_results_output) {
final StringBuilder sb = new StringBuilder();
sb.append(">>> ").append(generation).append("\n");
this.bestSolutions.stream().limit(this.max_results_output).forEach(S -> sb.append(S).append("\n"));
sb.append("<<< ").append(generation).append("\n");
if (outputFile == null) {
stdout().println(sb.toString());
} else {
stderr().println(sb.toString());
IOUtils.cat(sb.toString(), this.outputFile, false);
}
}
}
}
Aggregations