Search in sources :

Example 1 with BPlusTree

use of org.apache.jena.dboe.trans.bplustree.BPlusTree in project jena by apache.

the class LoaderOps method idxBTree.

/**
 * Get the BPlusTree index for a {@Link TupleIndex}
 */
public static BPlusTree idxBTree(TupleIndex idx) {
    TupleIndexRecord idxr = (TupleIndexRecord) idx;
    RangeIndex rIndex = idxr.getRangeIndex();
    BPlusTree bpt = (BPlusTree) rIndex;
    return bpt;
}
Also used : TupleIndexRecord(org.apache.jena.tdb2.store.tupletable.TupleIndexRecord) RangeIndex(org.apache.jena.dboe.index.RangeIndex) BPlusTree(org.apache.jena.dboe.trans.bplustree.BPlusTree)

Example 2 with BPlusTree

use of org.apache.jena.dboe.trans.bplustree.BPlusTree in project jena by apache.

the class CmdTestBPlusTree method runOneTest.

@Override
protected void runOneTest(int testCount, int order, int size) {
    // //System.err.println("runOneTest("+order+","+size+")") ;
    BPlusTree bpt = BPlusTreeFactory.makeMem(order, SysDB.SizeOfInt, 0);
    bpt = BPlusTreeFactory.addTracking(bpt);
    bpt.nonTransactional();
    IndexTestLib.randTest(bpt, 5 * size, size, true);
    bpt.close();
// Transaction.
// BPlusTree bpt = BPlusTreeFactory.makeMem(order, SystemLz.SizeOfInt, 0) ;
// Journal journal = Journal.create(Location.mem()) ;
// Transactional holder = TransactionalFactory.create(journal, bpt) ;
// holder.begin(ReadWrite.WRITE);
// IndexTestLib.randTest(bpt, 5*size, size, true);
// holder.commit() ;
// holder.end() ;
}
Also used : BPlusTree(org.apache.jena.dboe.trans.bplustree.BPlusTree)

Example 3 with BPlusTree

use of org.apache.jena.dboe.trans.bplustree.BPlusTree in project jena by apache.

the class TestBPlusTreeRewriterNonTxn method runOneTest.

public static void runOneTest(int order, int N, RecordFactory recordFactory, boolean debug) {
    BPlusTreeParams bptParams = new BPlusTreeParams(order, recordFactory);
    BPlusTreeRewriter.debug = debug;
    // ---- Test data
    List<Record> originaldata = TestBPlusTreeRewriterNonTxn.createData(N, recordFactory);
    if (debug)
        System.out.println("Test data: " + originaldata);
    FileSet destination = FileSet.mem();
    // ---- Rewrite
    BufferChannel rootState = FileFactory.createBufferChannel(destination, Names.extBptState);
    // Write leaves to ...
    BlockMgr blkMgr1 = BlockMgrFactory.create(destination, Names.extBptTree, bptParams.getCalcBlockSize(), 10, 10);
    // Write nodes to ...
    BlockMgr blkMgr2 = BlockMgrFactory.create(destination, Names.extBptTree, bptParams.getCalcBlockSize(), 10, 10);
    BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(originaldata.iterator(), bptParams, recordFactory, rootState, blkMgr1, blkMgr2);
    if (debug) {
        BPlusTreeRewriterUtils.divider();
        bpt2.dump();
    }
    // ---- Checking
    bpt2.check();
    scanComparision(originaldata, bpt2);
    findComparison(originaldata, bpt2);
    sizeComparison(originaldata, bpt2);
}
Also used : BPlusTreeParams(org.apache.jena.dboe.trans.bplustree.BPlusTreeParams) FileSet(org.apache.jena.dboe.base.file.FileSet) BlockMgr(org.apache.jena.dboe.base.block.BlockMgr) BufferChannel(org.apache.jena.dboe.base.file.BufferChannel) Record(org.apache.jena.dboe.base.record.Record) BPlusTree(org.apache.jena.dboe.trans.bplustree.BPlusTree)

Example 4 with BPlusTree

use of org.apache.jena.dboe.trans.bplustree.BPlusTree in project jena by apache.

the class ProcBuildNodeTableX method exec2.

/**
 * Pair<triples, indexed nodes>
 * @param sortThreads
 */
// [BULK] Output, not return.
private static Pair<Long, Long> exec2(String DB, XLoaderFiles loaderFiles, int sortThreads, String sortNodeTableArgs, List<String> datafiles) {
    // Threads - 1 parser, 1 builder, 2 sort.
    // Steps:
    // 1 - parser to and pipe terms to sort
    // 2 - sort
    // 3 - build node table from unique sort
    IRIProvider provider = SystemIRIx.getProvider();
    // SystemIRIx.setProvider(new IRIProviderAny());
    DatasetGraph dsg = DatabaseMgr.connectDatasetGraph(DB);
    DatasetGraphTDB dsgtdb = TDBInternal.getDatasetGraphTDB(dsg);
    NodeTable nt = dsgtdb.getTripleTable().getNodeTupleTable().getNodeTable();
    NodeTableTRDF nodeTable = (NodeTableTRDF) nt.baseNodeTable();
    OutputStream toSortOutputStream;
    InputStream fromSortInputStream;
    if (sortThreads <= 0)
        sortThreads = 2;
    // ** Step 2: The sort
    Process procSort;
    try {
        // LOG.info("Step : external sort");
        // Mutable list.
        List<String> sortCmd = new ArrayList<>(Arrays.asList("sort", "--temporary-directory=" + loaderFiles.TMPDIR, "--buffer-size=50%", "--parallel=" + sortThreads, "--unique", "--key=1,1"));
        if (BulkLoaderX.CompressSortNodeTableFiles)
            sortCmd.add("--compress-program=" + BulkLoaderX.gzipProgram());
        // if ( sortNodeTableArgs != null ) {}
        ProcessBuilder pb2 = new ProcessBuilder(sortCmd);
        pb2.environment().put("LC_ALL", "C");
        procSort = pb2.start();
        // To process.
        // Let the writer close it.
        toSortOutputStream = procSort.getOutputStream();
        // From process to the tree builder.
        // Let the reader side close it.
        fromSortInputStream = procSort.getInputStream();
    // // Debug sort process.
    // InputStream fromSortErrortStream = proc2.getErrorStream();
    // IOUtils.copy(fromSortErrortStream, System.err);
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
    // ** Step 1 : write intermediate file (hash, thrift bytes).
    AtomicLong countParseTicks = new AtomicLong(-1);
    AtomicLong countIndexedNodes = new AtomicLong(-1);
    long tickPoint = BulkLoaderX.DataTick;
    int superTick = BulkLoaderX.DataSuperTick;
    Runnable task1 = () -> {
        ProgressMonitorOutput monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Nodes, "Nodes", tickPoint, superTick);
        OutputStream output = IO.ensureBuffered(toSortOutputStream);
        // Counting.
        StreamRDF worker = new NodeHashTmpStream(output);
        ProgressStreamRDF stream = new ProgressStreamRDF(worker, monitor);
        monitor.start();
        String label = monitor.getLabel();
        datafiles.forEach(datafile -> {
            String basename = FileOps.basename(datafile);
            monitor.setLabel(basename);
            stream.start();
            RDFParser.source(datafile).parse(stream);
            stream.finish();
        });
        monitor.finish();
        monitor.setLabel(label);
        IO.flush(output);
        IO.close(output);
        long x = monitor.getTime();
        // long x = timer.endTimer();
        long count = monitor.getTicks();
        countParseTicks.set(count);
        double xSec = x / 1000.0;
        double rate = count / xSec;
        FmtLog.info(BulkLoaderX.LOG_Nodes, "%s Parse (nodes): %s seconds : %,d triples/quads %,.0f TPS", BulkLoaderX.StageMarker, Timer.timeStr(x), count, rate);
    };
    // [BULK] XXX AsyncParser.asyncParse(files, output)
    Thread thread1 = async(task1, "AsyncParser");
    // Step3: build node table.
    Runnable task3 = () -> {
        Timer timer = new Timer();
        // Don't start timer until sort send something
        // Process stream are already buffered.
        InputStream input = IO.ensureBuffered(fromSortInputStream);
        FileSet fileSet = new FileSet(dsgtdb.getLocation(), Names.nodeTableBaseName);
        BufferChannel blkState = FileFactory.createBufferChannel(fileSet, Names.extBptState);
        long idxTickPoint = BulkLoaderX.DataTick;
        int idxSuperTick = BulkLoaderX.DataSuperTick;
        ProgressMonitorOutput monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Terms, "Index", idxTickPoint, idxSuperTick);
        // Library of tools!
        dsg.executeWrite(() -> {
            BinaryDataFile objectFile = nodeTable.getData();
            Iterator<Record> rIter = records(BulkLoaderX.LOG_Terms, input, objectFile);
            rIter = new ProgressIterator<>(rIter, monitor);
            // Record of (hash, nodeId)
            BPlusTree bpt1 = (BPlusTree) (nodeTable.getIndex());
            BPlusTreeParams bptParams = bpt1.getParams();
            RecordFactory factory = new RecordFactory(SystemTDB.LenNodeHash, NodeId.SIZE);
            // Wait until something has been received from the sort step
            rIter.hasNext();
            monitor.start();
            // .. then start the timer. It is closed after the transaction finishes.
            timer.startTimer();
            BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(rIter, bptParams, factory, blkState, bpt1.getNodeManager().getBlockMgr(), bpt1.getRecordsMgr().getBlockMgr());
            bpt2.sync();
            bpt1.sync();
            objectFile.sync();
            monitor.finish();
        });
        blkState.sync();
        IO.close(input);
        long x = timer.endTimer();
        long count = monitor.getTicks();
        countIndexedNodes.set(count);
        String rateStr = BulkLoaderX.rateStr(count, x);
        FmtLog.info(BulkLoaderX.LOG_Terms, "%s Index terms: %s seconds : %,d indexed RDF terms : %s PerSecond", BulkLoaderX.StageMarker, Timer.timeStr(x), count, rateStr);
    };
    Thread thread3 = async(task3, "AsyncBuild");
    try {
        int exitCode = procSort.waitFor();
        if (exitCode != 0) {
            String msg = IO.readWholeFileAsUTF8(procSort.getErrorStream());
            String logMsg = String.format("Sort RC = %d : Error: %s", exitCode, msg);
            Log.error(BulkLoaderX.LOG_Terms, logMsg);
            // ** Exit process
            System.exit(exitCode);
        } else
            BulkLoaderX.LOG_Terms.info("Sort finished");
    // I/O Stream toSortOutputStream and fromSortInputStream closed by
    // their users - step 1 and step 3.
    } catch (InterruptedException e) {
        BulkLoaderX.LOG_Nodes.error("Failed to cleanly wait-for the subprocess");
        throw new RuntimeException(e);
    }
    BulkLoaderX.waitFor(thread1);
    BulkLoaderX.waitFor(thread3);
    return Pair.create(countParseTicks.get(), countIndexedNodes.get());
}
Also used : Arrays(java.util.Arrays) RiotThriftException(org.apache.jena.riot.thrift.RiotThriftException) IO(org.apache.jena.atlas.io.IO) FileFactory(org.apache.jena.dboe.base.file.FileFactory) DatasetGraph(org.apache.jena.sparql.core.DatasetGraph) NodeId(org.apache.jena.tdb2.store.NodeId) FileSet(org.apache.jena.dboe.base.file.FileSet) RecordFactory(org.apache.jena.dboe.base.record.RecordFactory) TSerializer(org.apache.thrift.TSerializer) RDF_Term(org.apache.jena.riot.thrift.wire.RDF_Term) NodeTable(org.apache.jena.tdb2.store.nodetable.NodeTable) BinaryDataFile(org.apache.jena.dboe.base.file.BinaryDataFile) ProgressMonitorOutput(org.apache.jena.system.progress.ProgressMonitorOutput) Log(org.apache.jena.atlas.logging.Log) BulkLoaderX.async(org.apache.jena.tdb2.xloader.BulkLoaderX.async) TCompactProtocol(org.apache.thrift.protocol.TCompactProtocol) BPlusTreeParams(org.apache.jena.dboe.trans.bplustree.BPlusTreeParams) Names(org.apache.jena.dboe.sys.Names) ProgressIterator(org.apache.jena.system.progress.ProgressIterator) SystemTDB(org.apache.jena.tdb2.sys.SystemTDB) Triple(org.apache.jena.graph.Triple) DatabaseMgr(org.apache.jena.tdb2.DatabaseMgr) NodeIdFactory(org.apache.jena.tdb2.store.NodeIdFactory) List(java.util.List) DatasetGraphTDB(org.apache.jena.tdb2.store.DatasetGraphTDB) ThriftConvert(org.apache.jena.riot.thrift.ThriftConvert) BPlusTreeRewriter(org.apache.jena.dboe.trans.bplustree.rewriter.BPlusTreeRewriter) Record(org.apache.jena.dboe.base.record.Record) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TDBInternal(org.apache.jena.tdb2.sys.TDBInternal) ArrayList(java.util.ArrayList) ProgressStreamRDF(org.apache.jena.system.progress.ProgressStreamRDF) Hash(org.apache.jena.tdb2.store.Hash) NodeTableTRDF(org.apache.jena.tdb2.store.nodetable.NodeTableTRDF) Quad(org.apache.jena.sparql.core.Quad) NodeLib(org.apache.jena.tdb2.lib.NodeLib) OutputStream(java.io.OutputStream) BPlusTree(org.apache.jena.dboe.trans.bplustree.BPlusTree) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) RDFParser(org.apache.jena.riot.RDFParser) TException(org.apache.thrift.TException) IOException(java.io.IOException) StreamRDF(org.apache.jena.riot.system.StreamRDF) SystemIRIx(org.apache.jena.irix.SystemIRIx) FmtLog(org.apache.jena.atlas.logging.FmtLog) org.apache.jena.atlas.lib(org.apache.jena.atlas.lib) AtomicLong(java.util.concurrent.atomic.AtomicLong) BufferChannel(org.apache.jena.dboe.base.file.BufferChannel) IteratorSlotted(org.apache.jena.atlas.iterator.IteratorSlotted) Node(org.apache.jena.graph.Node) IRIProvider(org.apache.jena.irix.IRIProvider) InputStream(java.io.InputStream) BPlusTreeParams(org.apache.jena.dboe.trans.bplustree.BPlusTreeParams) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) DatasetGraph(org.apache.jena.sparql.core.DatasetGraph) ProgressIterator(org.apache.jena.system.progress.ProgressIterator) ProgressStreamRDF(org.apache.jena.system.progress.ProgressStreamRDF) StreamRDF(org.apache.jena.riot.system.StreamRDF) ProgressIterator(org.apache.jena.system.progress.ProgressIterator) Iterator(java.util.Iterator) NodeTable(org.apache.jena.tdb2.store.nodetable.NodeTable) NodeTableTRDF(org.apache.jena.tdb2.store.nodetable.NodeTableTRDF) FileSet(org.apache.jena.dboe.base.file.FileSet) InputStream(java.io.InputStream) ProgressMonitorOutput(org.apache.jena.system.progress.ProgressMonitorOutput) BufferChannel(org.apache.jena.dboe.base.file.BufferChannel) BinaryDataFile(org.apache.jena.dboe.base.file.BinaryDataFile) IRIProvider(org.apache.jena.irix.IRIProvider) RiotThriftException(org.apache.jena.riot.thrift.RiotThriftException) TException(org.apache.thrift.TException) IOException(java.io.IOException) DatasetGraphTDB(org.apache.jena.tdb2.store.DatasetGraphTDB) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProgressStreamRDF(org.apache.jena.system.progress.ProgressStreamRDF) RecordFactory(org.apache.jena.dboe.base.record.RecordFactory) BPlusTree(org.apache.jena.dboe.trans.bplustree.BPlusTree)

Example 5 with BPlusTree

use of org.apache.jena.dboe.trans.bplustree.BPlusTree in project jena by apache.

the class TDB2StorageBuilder method makeRangeIndex.

private RangeIndex makeRangeIndex(RecordFactory recordFactory, String name) {
    ComponentId cid = componentIdMgr.getComponentId(name);
    FileSet fs = new FileSet(location, name);
    BPlusTree bpt = BPlusTreeFactory.createBPTree(cid, fs, recordFactory);
    components.add(bpt);
    return bpt;
}
Also used : BPlusTree(org.apache.jena.dboe.trans.bplustree.BPlusTree)

Aggregations

BPlusTree (org.apache.jena.dboe.trans.bplustree.BPlusTree)8 BufferChannel (org.apache.jena.dboe.base.file.BufferChannel)3 FileSet (org.apache.jena.dboe.base.file.FileSet)3 Record (org.apache.jena.dboe.base.record.Record)3 BPlusTreeParams (org.apache.jena.dboe.trans.bplustree.BPlusTreeParams)3 BlockMgr (org.apache.jena.dboe.base.block.BlockMgr)2 RecordFactory (org.apache.jena.dboe.base.record.RecordFactory)2 RangeIndex (org.apache.jena.dboe.index.RangeIndex)2 ProgressIterator (org.apache.jena.system.progress.ProgressIterator)2 TupleIndex (org.apache.jena.tdb2.store.tupletable.TupleIndex)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Iterator (java.util.Iterator)1 List (java.util.List)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 IO (org.apache.jena.atlas.io.IO)1