Search in sources :

Example 26 with EnvironmentConfig

use of com.sleepycat.je.EnvironmentConfig in project bboxdb by jnidzwetzki.

the class BDBTupleStore method open.

@Override
public void open() throws Exception {
    final EnvironmentConfig envConfig = new EnvironmentConfig();
    envConfig.setTransactional(USE_TRANSACTIONS);
    envConfig.setAllowCreate(true);
    environment = new Environment(dir, envConfig);
    Transaction txn = null;
    if (USE_TRANSACTIONS) {
        txn = environment.beginTransaction(null, null);
    }
    final DatabaseConfig dbConfig = new DatabaseConfig();
    dbConfig.setTransactional(USE_TRANSACTIONS);
    dbConfig.setAllowCreate(true);
    // dbConfig.setSortedDuplicates(true);
    dbConfig.setDeferredWrite(true);
    // dbConfig.setKeyPrefixing(true);
    // dbConfig.setNodeMaxEntries(128);
    database = environment.openDatabase(txn, "test", dbConfig);
    if (txn != null) {
        txn.commit();
    }
}
Also used : Transaction(com.sleepycat.je.Transaction) EnvironmentConfig(com.sleepycat.je.EnvironmentConfig) Environment(com.sleepycat.je.Environment) DatabaseConfig(com.sleepycat.je.DatabaseConfig)

Example 27 with EnvironmentConfig

use of com.sleepycat.je.EnvironmentConfig in project jvarkit by lindenb.

the class NgsFilesScanner method doWork.

@Override
public int doWork(List<String> args) {
    boolean dump = false;
    EnvironmentConfig envCfg = new EnvironmentConfig();
    Environment env = null;
    if (bdbHome == null) {
        LOG.error("BDB home undefined");
        return -1;
    }
    if (!bdbHome.exists() || !bdbHome.isDirectory()) {
        LOG.error("BDB doesn't exist or is not a directory");
        return -1;
    }
    File root = new File("/");
    Cursor cursor = null;
    OutputStream xmlout = null;
    try {
        if (!dump && args.size() == 1) {
            root = new File(args.get(0));
        } else if (!(dump && args.isEmpty())) {
            LOG.error("illegal.number.of.arguments");
            return -1;
        }
        envCfg.setAllowCreate(!dump);
        envCfg.setReadOnly(dump);
        envCfg.setTransactional(false);
        LOG.info("Opening env " + bdbHome);
        env = new Environment(bdbHome, envCfg);
        // TransactionConfig txnCfg=new TransactionConfig();
        // this.txn=env.beginTransaction(null, txnCfg);
        LOG.info("Opening database " + DATABASE_NAME);
        DatabaseConfig cfg = new DatabaseConfig();
        cfg.setAllowCreate(!dump);
        cfg.setReadOnly(dump);
        cfg.setTransactional(false);
        this.database = env.openDatabase(this.txn, DATABASE_NAME, cfg);
        DatabaseEntry key = new DatabaseEntry();
        DatabaseEntry data = new DatabaseEntry();
        if (dump) {
            XMLEventFactory xef = XMLEventFactory.newFactory();
            XMLOutputFactory xof = XMLOutputFactory.newFactory();
            XMLInputFactory xif = XMLInputFactory.newFactory();
            xmlout = openFileOrStdoutAsStream(outputFile);
            XMLEventWriter out = xof.createXMLEventWriter(xmlout, "UTF-8");
            out.add(xef.createStartDocument("UTF-8", "1.0"));
            out.add(xef.createStartElement("", "", "ngs-files"));
            cursor = this.database.openCursor(this.txn, null);
            while (cursor.getNext(key, data, LockMode.DEFAULT) == OperationStatus.SUCCESS) {
                File file = new File(StringBinding.entryToString(key));
                if (isEntryShouldBeDeleted(file)) {
                    LOG.info("deleting entry for " + file);
                    // cursor.delete();//no env is read only
                    continue;
                }
                StringReader sr = new StringReader(StringBinding.entryToString(data));
                XMLEventReader xr = xif.createXMLEventReader(sr);
                while (xr.hasNext()) {
                    XMLEvent evt = xr.nextEvent();
                    if (evt.isStartDocument())
                        continue;
                    if (evt.isEndDocument())
                        break;
                    out.add(evt);
                }
                xr.close();
                sr.close();
            }
            cursor.close();
            out.add(xef.createEndElement("", "", "ngs-files"));
            out.add(xef.createEndDocument());
            out.flush();
            out.close();
            xmlout.flush();
            xmlout.close();
            xmlout = null;
        } else {
            recursive(root);
            // final cleanup
            cursor = this.database.openCursor(this.txn, null);
            while (cursor.getNext(key, data, LockMode.DEFAULT) == OperationStatus.SUCCESS) {
                File file = new File(StringBinding.entryToString(key));
                if (isEntryShouldBeDeleted(file)) {
                    LOG.info("deleting entry for " + file);
                    cursor.delete();
                }
            }
            cursor.close();
        }
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (cursor != null)
            try {
                cursor.close();
            } catch (Exception err) {
            }
        if (this.txn != null)
            try {
                this.txn.commit();
            } catch (Exception err) {
            }
        if (this.database != null)
            try {
                this.database.close();
            } catch (Exception err) {
            }
        if (env != null)
            try {
                env.close();
            } catch (Exception err) {
            }
        CloserUtil.close(xmlout);
    }
}
Also used : XMLOutputFactory(javax.xml.stream.XMLOutputFactory) XMLEventFactory(javax.xml.stream.XMLEventFactory) EnvironmentConfig(com.sleepycat.je.EnvironmentConfig) OutputStream(java.io.OutputStream) DatabaseEntry(com.sleepycat.je.DatabaseEntry) Cursor(com.sleepycat.je.Cursor) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) XMLEventWriter(javax.xml.stream.XMLEventWriter) StringReader(java.io.StringReader) XMLEvent(javax.xml.stream.events.XMLEvent) Environment(com.sleepycat.je.Environment) XMLEventReader(javax.xml.stream.XMLEventReader) File(java.io.File) XMLInputFactory(javax.xml.stream.XMLInputFactory) DatabaseConfig(com.sleepycat.je.DatabaseConfig)

Example 28 with EnvironmentConfig

use of com.sleepycat.je.EnvironmentConfig in project jvarkit by lindenb.

the class LumpySort method doWork.

@Override
public int doWork(final List<String> args) {
    VariantContextWriter vcw = null;
    LineIterator vcfIn = null;
    Environment environment = null;
    Database variantsDb1 = null;
    final List<File> inputs = IOUtil.unrollFiles(args.stream().map(S -> new File(S)).collect(Collectors.toList()), ".vcf", ".vcf.gz");
    if (inputs.isEmpty()) {
        LOG.error("empty vcf list");
        return -1;
    }
    try {
        IOUtil.assertDirectoryIsWritable(this.bdbHomeDir);
        final Set<VCFHeaderLine> metaData = new HashSet<>();
        final Set<String> sampleNames = new TreeSet<>();
        final IntervalTreeMap<Boolean> intervalTreeMapBed;
        if (this.bedFile != null) {
            intervalTreeMapBed = new IntervalTreeMap<>();
            final BedLineCodec bedLineCodec = new BedLineCodec();
            final BufferedReader br = IOUtils.openFileForBufferedReading(this.bedFile);
            br.lines().map(L -> bedLineCodec.decode(L)).filter(L -> L != null).forEach(B -> intervalTreeMapBed.put(B.toInterval(), true));
            br.close();
        } else {
            intervalTreeMapBed = null;
        }
        for (int idx = 0; idx < inputs.size(); ++idx) {
            final File vcfFile = inputs.get(idx);
            LOG.info("Read header " + (idx + 1) + "/" + inputs.size());
            final VCFFileReader r = new VCFFileReader(vcfFile, false);
            final VCFHeader header = r.getFileHeader();
            if (!LumpyConstants.isLumpyHeader(header)) {
                LOG.error("doesn't look like a Lumpy-SV vcf header " + vcfFile);
                r.close();
                return -1;
            }
            if (!header.hasGenotypingData()) {
                LOG.error("No sample in " + vcfFile);
                r.close();
                return -1;
            }
            for (final String sampleName : header.getSampleNamesInOrder()) {
                if (sampleNames.contains(sampleName)) {
                    LOG.error("Sample found twice " + sampleName + " in " + vcfFile);
                    r.close();
                    return -1;
                }
                sampleNames.add(sampleName);
            }
            metaData.addAll(header.getMetaDataInInputOrder().stream().filter(H -> !H.getKey().equals("fileDate")).collect(Collectors.toSet()));
            r.close();
        }
        final VCFInfoHeaderLine nSampleInfoHeaderLine = new VCFInfoHeaderLine("NSAMPLES", 1, VCFHeaderLineType.Integer, "Number of affected samples.");
        metaData.add(nSampleInfoHeaderLine);
        final VCFFormatHeaderLine chromStartFormatHeaderLine = new VCFFormatHeaderLine("CB", 1, VCFHeaderLineType.Integer, "Original Variant POS");
        metaData.add(chromStartFormatHeaderLine);
        final VCFFormatHeaderLine chromEndFormatHeaderLine = new VCFFormatHeaderLine("CE", 1, VCFHeaderLineType.Integer, "Original Variant END");
        metaData.add(chromEndFormatHeaderLine);
        final VCFHeader outHeader = new VCFHeader(metaData, sampleNames);
        final VCFHeaderVersion[] versions = VCFHeaderVersion.values();
        this.vcfEncoder = new VCFEncoder(outHeader, false, true);
        this.vcfCodec.setVCFHeader(outHeader, versions[versions.length - 1]);
        /* open BDB env */
        final Transaction txn = null;
        environment = new Environment(this.bdbHomeDir, new EnvironmentConfig().setAllowCreate(true).setReadOnly(false));
        variantsDb1 = environment.openDatabase(txn, "variants1", new DatabaseConfig().setBtreeComparator(KeySorterComparator.class).setAllowCreate(true).setReadOnly(false).setTemporary(true));
        long total_variants = 0L;
        final LumpyVarBinding lumpVarBinding = new LumpyVarBinding();
        final KeySorterBinding keySorterBinding = new KeySorterBinding();
        for (int idx = 0; idx < inputs.size(); ++idx) {
            final long millisecstart = System.currentTimeMillis();
            final File vcfFile = inputs.get(idx);
            int nVariant = 0;
            final VCFFileReader r = new VCFFileReader(vcfFile, false);
            final List<Genotype> missing = new ArrayList<>(sampleNames.size());
            for (final String sn : sampleNames) {
                if (r.getFileHeader().getSampleNamesInOrder().contains(sn))
                    continue;
                missing.add(GenotypeBuilder.createMissing(sn, 2));
            }
            final CloseableIterator<VariantContext> iter = r.iterator();
            while (iter.hasNext()) {
                VariantContext ctx = iter.next();
                if (!this.keep_secondary) {
                    if (ctx.hasAttribute("SECONDARY"))
                        continue;
                }
                if (!this.variantFilter.test(ctx))
                    continue;
                if (intervalTreeMapBed != null && !intervalTreeMapBed.containsOverlapping(ctx))
                    continue;
                final List<Genotype> gtList = new ArrayList<>(ctx.getGenotypes());
                for (int gi = 0; gi < gtList.size(); gi++) {
                    Genotype g = gtList.get(gi);
                    final GenotypeBuilder gb;
                    if (this.do_genotype && isAvailableGenotype(g)) {
                        gb = new GenotypeBuilder(g.getSampleName(), ctx.getAlternateAlleles());
                        gb.attributes(g.getExtendedAttributes());
                    } else {
                        gb = new GenotypeBuilder(g);
                    }
                    gb.attribute(chromStartFormatHeaderLine.getID(), ctx.getStart());
                    gb.attribute(chromEndFormatHeaderLine.getID(), ctx.getEnd());
                    gtList.set(gi, gb.make());
                }
                gtList.addAll(missing);
                ctx = new VariantContextBuilder(ctx).genotypes(gtList).rmAttribute("PRPOS").make();
                final LumpyVar lvar = new LumpyVar(ctx, total_variants);
                final DatabaseEntry key = new DatabaseEntry();
                final DatabaseEntry data = new DatabaseEntry();
                lumpVarBinding.objectToEntry(lvar, data);
                keySorterBinding.objectToEntry(lvar.getSortKey(), key);
                if (variantsDb1.put(txn, key, data) != OperationStatus.SUCCESS) {
                    r.close();
                    LOG.error("insertion failed");
                    return -1;
                }
                nVariant++;
                total_variants++;
            }
            iter.close();
            r.close();
            LOG.info("Read  " + (idx + 1) + "/" + inputs.size() + " variants of " + vcfFile + " N=" + nVariant + " Total:" + total_variants + " That took: " + Duration.ofMillis(System.currentTimeMillis() - millisecstart));
            System.gc();
        }
        if (intervalTreeMapBed != null)
            intervalTreeMapBed.clear();
        System.gc();
        LOG.info("Writing output");
        final List<Allele> ALLELES_NO_CALLS = this.do_genotype ? Collections.singletonList(Allele.NO_CALL) : Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
        final Cursor cursor = variantsDb1.openCursor(txn, null);
        vcw = super.openVariantContextWriter(this.outputFile);
        vcw.writeHeader(outHeader);
        for (; ; ) {
            final DatabaseEntry key = new DatabaseEntry();
            final DatabaseEntry data = new DatabaseEntry();
            OperationStatus status = cursor.getNext(key, data, LockMode.DEFAULT);
            if (!status.equals(OperationStatus.SUCCESS))
                break;
            final LumpyVar first = lumpVarBinding.entryToObject(data);
            if (this.do_not_merge_ctx) {
                vcw.add(first.ctx);
                continue;
            }
            final KeySorter keySorter1 = keySorterBinding.entryToObject(key);
            final List<LumpyVar> buffer = new ArrayList<>();
            buffer.add(first);
            final DatabaseEntry key2 = new DatabaseEntry();
            final DatabaseEntry data2 = new DatabaseEntry();
            final Cursor cursor2 = cursor.dup(true);
            for (; ; ) {
                status = cursor2.getNext(key2, data2, LockMode.DEFAULT);
                if (!status.equals(OperationStatus.SUCCESS))
                    break;
                final KeySorter keySorter2 = keySorterBinding.entryToObject(key2);
                if (keySorter1.compare1(keySorter2) != 0) {
                    break;
                }
                final LumpyVar lv = lumpVarBinding.entryToObject(data2);
                if (lv.ctx.getStart() > first.ctx.getEnd()) {
                    break;
                }
                if (first.canMerge(lv)) {
                    buffer.add(lv);
                    cursor2.delete();
                }
            }
            cursor2.close();
            // delete 'first'
            cursor.delete();
            final int variantStartA = buffer.stream().mapToInt(V -> V.ctx.getStart()).min().getAsInt();
            final int variantStartB = (int) buffer.stream().mapToInt(V -> V.ctx.getStart()).average().getAsDouble();
            final int variantStartC = buffer.stream().mapToInt(V -> V.ctx.getStart()).max().getAsInt();
            final int variantEndA = buffer.stream().mapToInt(V -> V.ctx.getEnd()).min().getAsInt();
            final int variantEndB = (int) buffer.stream().mapToInt(V -> V.ctx.getEnd()).average().getAsDouble();
            final int variantEndC = buffer.stream().mapToInt(V -> V.ctx.getEnd()).max().getAsInt();
            final VariantContextBuilder vcb = new VariantContextBuilder("lumpymerge", first.ctx.getContig(), variantStartB, variantEndB, first.ctx.getAlleles());
            vcb.attribute("END", variantEndB);
            vcb.attribute("SVTYPE", first.ctx.getAttribute("SVTYPE"));
            vcb.attribute("SVLEN", (int) Percentile.median().evaluate(buffer.stream().mapToInt(V -> V.ctx.getEnd() - V.ctx.getStart())));
            vcb.attribute("CIPOS", Arrays.asList(variantStartB - variantStartA, variantStartC - variantStartB));
            vcb.attribute("CIEND", Arrays.asList(variantEndB - variantEndA, variantEndC - variantEndB));
            vcb.attribute("SU", buffer.stream().flatMap(V -> V.ctx.getGenotypes().stream()).mapToInt(G -> G.getAttributeAsInt("SU", 0)).sum());
            vcb.attribute("SR", buffer.stream().flatMap(V -> V.ctx.getGenotypes().stream()).mapToInt(G -> G.getAttributeAsInt("SR", 0)).sum());
            vcb.attribute("PE", buffer.stream().flatMap(V -> V.ctx.getGenotypes().stream()).mapToInt(G -> G.getAttributeAsInt("PE", 0)).sum());
            final Map<String, Genotype> sample2genotype = new HashMap<>(sampleNames.size());
            buffer.stream().flatMap(V -> V.ctx.getGenotypes().stream()).filter(G -> isAvailableGenotype(G)).forEach(G -> {
                sample2genotype.put(G.getSampleName(), G);
            });
            vcb.attribute(nSampleInfoHeaderLine.getID(), sample2genotype.size());
            for (final String sn : sampleNames) {
                if (!sample2genotype.containsKey(sn)) {
                    sample2genotype.put(sn, new GenotypeBuilder(sn, ALLELES_NO_CALLS).attribute("SU", 0).attribute("SR", 0).attribute("PE", 0).make());
                }
            }
            vcb.genotypes(sample2genotype.values());
            vcw.add(vcb.make());
        }
        cursor.close();
        vcw.close();
        vcw = null;
        variantsDb1.close();
        variantsDb1 = null;
        environment.close();
        environment = null;
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(vcfIn);
        CloserUtil.close(vcw);
        CloserUtil.close(variantsDb1);
        CloserUtil.close(environment);
    }
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) Program(com.github.lindenb.jvarkit.util.jcommander.Program) LineIterator(htsjdk.tribble.readers.LineIterator) IOUtil(htsjdk.samtools.util.IOUtil) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) LockMode(com.sleepycat.je.LockMode) VCFEncoder(htsjdk.variant.vcf.VCFEncoder) VCFHeaderVersion(htsjdk.variant.vcf.VCFHeaderVersion) Duration(java.time.Duration) Map(java.util.Map) CloserUtil(htsjdk.samtools.util.CloserUtil) TupleInput(com.sleepycat.bind.tuple.TupleInput) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) DatabaseEntry(com.sleepycat.je.DatabaseEntry) Set(java.util.Set) Collectors(java.util.stream.Collectors) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) List(java.util.List) DatabaseConfig(com.sleepycat.je.DatabaseConfig) StructuralVariantType(htsjdk.variant.variantcontext.StructuralVariantType) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) EnvironmentConfig(com.sleepycat.je.EnvironmentConfig) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) OperationStatus(com.sleepycat.je.OperationStatus) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Parameter(com.beust.jcommander.Parameter) TupleOutput(com.sleepycat.bind.tuple.TupleOutput) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) HashMap(java.util.HashMap) Function(java.util.function.Function) TreeSet(java.util.TreeSet) TupleBinding(com.sleepycat.bind.tuple.TupleBinding) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Database(com.sleepycat.je.Database) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFCodec(htsjdk.variant.vcf.VCFCodec) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Environment(com.sleepycat.je.Environment) File(java.io.File) Cursor(com.sleepycat.je.Cursor) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) BufferedReader(java.io.BufferedReader) Comparator(java.util.Comparator) Transaction(com.sleepycat.je.Transaction) Collections(java.util.Collections) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Cursor(com.sleepycat.je.Cursor) TreeSet(java.util.TreeSet) Database(com.sleepycat.je.Database) HashSet(java.util.HashSet) VCFHeaderVersion(htsjdk.variant.vcf.VCFHeaderVersion) EnvironmentConfig(com.sleepycat.je.EnvironmentConfig) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) VCFEncoder(htsjdk.variant.vcf.VCFEncoder) Transaction(com.sleepycat.je.Transaction) Environment(com.sleepycat.je.Environment) File(java.io.File) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) DatabaseEntry(com.sleepycat.je.DatabaseEntry) LineIterator(htsjdk.tribble.readers.LineIterator) OperationStatus(com.sleepycat.je.OperationStatus) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) DatabaseConfig(com.sleepycat.je.DatabaseConfig) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader)

Aggregations

EnvironmentConfig (com.sleepycat.je.EnvironmentConfig)28 Environment (com.sleepycat.je.Environment)20 DatabaseConfig (com.sleepycat.je.DatabaseConfig)16 File (java.io.File)11 Database (com.sleepycat.je.Database)5 DatabaseEntry (com.sleepycat.je.DatabaseEntry)5 ReplicatedEnvironment (com.sleepycat.je.rep.ReplicatedEnvironment)5 ReplicationConfig (com.sleepycat.je.rep.ReplicationConfig)5 Transaction (com.sleepycat.je.Transaction)4 IOException (java.io.IOException)4 Cursor (com.sleepycat.je.Cursor)3 DatabaseException (com.sleepycat.je.DatabaseException)3 RuntimeIOException (htsjdk.samtools.util.RuntimeIOException)2 LineIterator (htsjdk.tribble.readers.LineIterator)2 ArrayList (java.util.ArrayList)2 Test (org.junit.Test)2 Parameter (com.beust.jcommander.Parameter)1 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)1 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)1 Percentile (com.github.lindenb.jvarkit.math.stats.Percentile)1