Search in sources :

Example 6 with BufferChannel

use of org.apache.jena.dboe.base.file.BufferChannel in project jena by apache.

the class TestStateMgrData method state_data_02.

@Test
public void state_data_02() {
    BufferChannel x = FileFactory.createBufferChannelMem();
    long[] data = { 2, 3 };
    StateMgrDataIdx sm = new StateMgrDataIdx(x, data);
    sm.writeState();
    sm.set(1, 99L);
    sm.writeState();
    ByteBuffer bb = ByteBuffer.allocate(2 * Long.BYTES);
    x.read(bb, 0);
    assertEquals(99L, bb.getLong(Long.BYTES));
}
Also used : BufferChannel(org.apache.jena.dboe.base.file.BufferChannel) StateMgrDataIdx(org.apache.jena.dboe.transaction.txn.StateMgrDataIdx) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 7 with BufferChannel

use of org.apache.jena.dboe.base.file.BufferChannel in project jena by apache.

the class TestStateMgrData method state_data_03.

@Test
public void state_data_03() {
    BufferChannel x = FileFactory.createBufferChannelMem();
    {
        ByteBuffer bb = ByteBuffer.allocate(Long.BYTES);
        bb.putLong(0, -8888);
        bb.rewind();
        x.write(bb);
        bb.putLong(0, -1234);
        bb.rewind();
        x.write(bb);
        x.sync();
    }
    long[] data = { 2, 3 };
    StateMgrDataIdx sm = new StateMgrDataIdx(x, data);
    assertEquals(-8888L, sm.get(0));
    assertEquals(-1234L, sm.get(1));
}
Also used : BufferChannel(org.apache.jena.dboe.base.file.BufferChannel) StateMgrDataIdx(org.apache.jena.dboe.transaction.txn.StateMgrDataIdx) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 8 with BufferChannel

use of org.apache.jena.dboe.base.file.BufferChannel in project jena by apache.

the class ProcBuildNodeTableX method exec2.

/**
 * Pair<triples, indexed nodes>
 * @param sortThreads
 */
// [BULK] Output, not return.
private static Pair<Long, Long> exec2(String DB, XLoaderFiles loaderFiles, int sortThreads, String sortNodeTableArgs, List<String> datafiles) {
    // Threads - 1 parser, 1 builder, 2 sort.
    // Steps:
    // 1 - parser to and pipe terms to sort
    // 2 - sort
    // 3 - build node table from unique sort
    IRIProvider provider = SystemIRIx.getProvider();
    // SystemIRIx.setProvider(new IRIProviderAny());
    DatasetGraph dsg = DatabaseMgr.connectDatasetGraph(DB);
    DatasetGraphTDB dsgtdb = TDBInternal.getDatasetGraphTDB(dsg);
    NodeTable nt = dsgtdb.getTripleTable().getNodeTupleTable().getNodeTable();
    NodeTableTRDF nodeTable = (NodeTableTRDF) nt.baseNodeTable();
    OutputStream toSortOutputStream;
    InputStream fromSortInputStream;
    if (sortThreads <= 0)
        sortThreads = 2;
    // ** Step 2: The sort
    Process procSort;
    try {
        // LOG.info("Step : external sort");
        // Mutable list.
        List<String> sortCmd = new ArrayList<>(Arrays.asList("sort", "--temporary-directory=" + loaderFiles.TMPDIR, "--buffer-size=50%", "--parallel=" + sortThreads, "--unique", "--key=1,1"));
        if (BulkLoaderX.CompressSortNodeTableFiles)
            sortCmd.add("--compress-program=" + BulkLoaderX.gzipProgram());
        // if ( sortNodeTableArgs != null ) {}
        ProcessBuilder pb2 = new ProcessBuilder(sortCmd);
        pb2.environment().put("LC_ALL", "C");
        procSort = pb2.start();
        // To process.
        // Let the writer close it.
        toSortOutputStream = procSort.getOutputStream();
        // From process to the tree builder.
        // Let the reader side close it.
        fromSortInputStream = procSort.getInputStream();
    // // Debug sort process.
    // InputStream fromSortErrortStream = proc2.getErrorStream();
    // IOUtils.copy(fromSortErrortStream, System.err);
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
    // ** Step 1 : write intermediate file (hash, thrift bytes).
    AtomicLong countParseTicks = new AtomicLong(-1);
    AtomicLong countIndexedNodes = new AtomicLong(-1);
    long tickPoint = BulkLoaderX.DataTick;
    int superTick = BulkLoaderX.DataSuperTick;
    Runnable task1 = () -> {
        ProgressMonitorOutput monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Nodes, "Nodes", tickPoint, superTick);
        OutputStream output = IO.ensureBuffered(toSortOutputStream);
        // Counting.
        StreamRDF worker = new NodeHashTmpStream(output);
        ProgressStreamRDF stream = new ProgressStreamRDF(worker, monitor);
        monitor.start();
        String label = monitor.getLabel();
        datafiles.forEach(datafile -> {
            String basename = FileOps.basename(datafile);
            monitor.setLabel(basename);
            stream.start();
            RDFParser.source(datafile).parse(stream);
            stream.finish();
        });
        monitor.finish();
        monitor.setLabel(label);
        IO.flush(output);
        IO.close(output);
        long x = monitor.getTime();
        // long x = timer.endTimer();
        long count = monitor.getTicks();
        countParseTicks.set(count);
        double xSec = x / 1000.0;
        double rate = count / xSec;
        FmtLog.info(BulkLoaderX.LOG_Nodes, "%s Parse (nodes): %s seconds : %,d triples/quads %,.0f TPS", BulkLoaderX.StageMarker, Timer.timeStr(x), count, rate);
    };
    // [BULK] XXX AsyncParser.asyncParse(files, output)
    Thread thread1 = async(task1, "AsyncParser");
    // Step3: build node table.
    Runnable task3 = () -> {
        Timer timer = new Timer();
        // Don't start timer until sort send something
        // Process stream are already buffered.
        InputStream input = IO.ensureBuffered(fromSortInputStream);
        FileSet fileSet = new FileSet(dsgtdb.getLocation(), Names.nodeTableBaseName);
        BufferChannel blkState = FileFactory.createBufferChannel(fileSet, Names.extBptState);
        long idxTickPoint = BulkLoaderX.DataTick;
        int idxSuperTick = BulkLoaderX.DataSuperTick;
        ProgressMonitorOutput monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Terms, "Index", idxTickPoint, idxSuperTick);
        // Library of tools!
        dsg.executeWrite(() -> {
            BinaryDataFile objectFile = nodeTable.getData();
            Iterator<Record> rIter = records(BulkLoaderX.LOG_Terms, input, objectFile);
            rIter = new ProgressIterator<>(rIter, monitor);
            // Record of (hash, nodeId)
            BPlusTree bpt1 = (BPlusTree) (nodeTable.getIndex());
            BPlusTreeParams bptParams = bpt1.getParams();
            RecordFactory factory = new RecordFactory(SystemTDB.LenNodeHash, NodeId.SIZE);
            // Wait until something has been received from the sort step
            rIter.hasNext();
            monitor.start();
            // .. then start the timer. It is closed after the transaction finishes.
            timer.startTimer();
            BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(rIter, bptParams, factory, blkState, bpt1.getNodeManager().getBlockMgr(), bpt1.getRecordsMgr().getBlockMgr());
            bpt2.sync();
            bpt1.sync();
            objectFile.sync();
            monitor.finish();
        });
        blkState.sync();
        IO.close(input);
        long x = timer.endTimer();
        long count = monitor.getTicks();
        countIndexedNodes.set(count);
        String rateStr = BulkLoaderX.rateStr(count, x);
        FmtLog.info(BulkLoaderX.LOG_Terms, "%s Index terms: %s seconds : %,d indexed RDF terms : %s PerSecond", BulkLoaderX.StageMarker, Timer.timeStr(x), count, rateStr);
    };
    Thread thread3 = async(task3, "AsyncBuild");
    try {
        int exitCode = procSort.waitFor();
        if (exitCode != 0) {
            String msg = IO.readWholeFileAsUTF8(procSort.getErrorStream());
            String logMsg = String.format("Sort RC = %d : Error: %s", exitCode, msg);
            Log.error(BulkLoaderX.LOG_Terms, logMsg);
            // ** Exit process
            System.exit(exitCode);
        } else
            BulkLoaderX.LOG_Terms.info("Sort finished");
    // I/O Stream toSortOutputStream and fromSortInputStream closed by
    // their users - step 1 and step 3.
    } catch (InterruptedException e) {
        BulkLoaderX.LOG_Nodes.error("Failed to cleanly wait-for the subprocess");
        throw new RuntimeException(e);
    }
    BulkLoaderX.waitFor(thread1);
    BulkLoaderX.waitFor(thread3);
    return Pair.create(countParseTicks.get(), countIndexedNodes.get());
}
Also used : Arrays(java.util.Arrays) RiotThriftException(org.apache.jena.riot.thrift.RiotThriftException) IO(org.apache.jena.atlas.io.IO) FileFactory(org.apache.jena.dboe.base.file.FileFactory) DatasetGraph(org.apache.jena.sparql.core.DatasetGraph) NodeId(org.apache.jena.tdb2.store.NodeId) FileSet(org.apache.jena.dboe.base.file.FileSet) RecordFactory(org.apache.jena.dboe.base.record.RecordFactory) TSerializer(org.apache.thrift.TSerializer) RDF_Term(org.apache.jena.riot.thrift.wire.RDF_Term) NodeTable(org.apache.jena.tdb2.store.nodetable.NodeTable) BinaryDataFile(org.apache.jena.dboe.base.file.BinaryDataFile) ProgressMonitorOutput(org.apache.jena.system.progress.ProgressMonitorOutput) Log(org.apache.jena.atlas.logging.Log) BulkLoaderX.async(org.apache.jena.tdb2.xloader.BulkLoaderX.async) TCompactProtocol(org.apache.thrift.protocol.TCompactProtocol) BPlusTreeParams(org.apache.jena.dboe.trans.bplustree.BPlusTreeParams) Names(org.apache.jena.dboe.sys.Names) ProgressIterator(org.apache.jena.system.progress.ProgressIterator) SystemTDB(org.apache.jena.tdb2.sys.SystemTDB) Triple(org.apache.jena.graph.Triple) DatabaseMgr(org.apache.jena.tdb2.DatabaseMgr) NodeIdFactory(org.apache.jena.tdb2.store.NodeIdFactory) List(java.util.List) DatasetGraphTDB(org.apache.jena.tdb2.store.DatasetGraphTDB) ThriftConvert(org.apache.jena.riot.thrift.ThriftConvert) BPlusTreeRewriter(org.apache.jena.dboe.trans.bplustree.rewriter.BPlusTreeRewriter) Record(org.apache.jena.dboe.base.record.Record) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TDBInternal(org.apache.jena.tdb2.sys.TDBInternal) ArrayList(java.util.ArrayList) ProgressStreamRDF(org.apache.jena.system.progress.ProgressStreamRDF) Hash(org.apache.jena.tdb2.store.Hash) NodeTableTRDF(org.apache.jena.tdb2.store.nodetable.NodeTableTRDF) Quad(org.apache.jena.sparql.core.Quad) NodeLib(org.apache.jena.tdb2.lib.NodeLib) OutputStream(java.io.OutputStream) BPlusTree(org.apache.jena.dboe.trans.bplustree.BPlusTree) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) RDFParser(org.apache.jena.riot.RDFParser) TException(org.apache.thrift.TException) IOException(java.io.IOException) StreamRDF(org.apache.jena.riot.system.StreamRDF) SystemIRIx(org.apache.jena.irix.SystemIRIx) FmtLog(org.apache.jena.atlas.logging.FmtLog) org.apache.jena.atlas.lib(org.apache.jena.atlas.lib) AtomicLong(java.util.concurrent.atomic.AtomicLong) BufferChannel(org.apache.jena.dboe.base.file.BufferChannel) IteratorSlotted(org.apache.jena.atlas.iterator.IteratorSlotted) Node(org.apache.jena.graph.Node) IRIProvider(org.apache.jena.irix.IRIProvider) InputStream(java.io.InputStream) BPlusTreeParams(org.apache.jena.dboe.trans.bplustree.BPlusTreeParams) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) DatasetGraph(org.apache.jena.sparql.core.DatasetGraph) ProgressIterator(org.apache.jena.system.progress.ProgressIterator) ProgressStreamRDF(org.apache.jena.system.progress.ProgressStreamRDF) StreamRDF(org.apache.jena.riot.system.StreamRDF) ProgressIterator(org.apache.jena.system.progress.ProgressIterator) Iterator(java.util.Iterator) NodeTable(org.apache.jena.tdb2.store.nodetable.NodeTable) NodeTableTRDF(org.apache.jena.tdb2.store.nodetable.NodeTableTRDF) FileSet(org.apache.jena.dboe.base.file.FileSet) InputStream(java.io.InputStream) ProgressMonitorOutput(org.apache.jena.system.progress.ProgressMonitorOutput) BufferChannel(org.apache.jena.dboe.base.file.BufferChannel) BinaryDataFile(org.apache.jena.dboe.base.file.BinaryDataFile) IRIProvider(org.apache.jena.irix.IRIProvider) RiotThriftException(org.apache.jena.riot.thrift.RiotThriftException) TException(org.apache.thrift.TException) IOException(java.io.IOException) DatasetGraphTDB(org.apache.jena.tdb2.store.DatasetGraphTDB) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProgressStreamRDF(org.apache.jena.system.progress.ProgressStreamRDF) RecordFactory(org.apache.jena.dboe.base.record.RecordFactory) BPlusTree(org.apache.jena.dboe.trans.bplustree.BPlusTree)

Example 9 with BufferChannel

use of org.apache.jena.dboe.base.file.BufferChannel in project jena by apache.

the class TestStateMgrData method state_data_01.

@Test
public void state_data_01() {
    BufferChannel x = FileFactory.createBufferChannelMem();
    long[] data = { 2, 3 };
    StateMgrDataIdx sm = new StateMgrDataIdx(x, data);
    assertEquals(data.length, sm.getData().length);
    assertEquals(2L, sm.get(0));
    assertEquals(3L, sm.get(1));
    // Test initial state written
    ByteBuffer bb = ByteBuffer.allocate(2 * Long.BYTES);
    x.read(bb, 0);
    assertEquals(2L, bb.getLong(0));
    assertEquals(3L, bb.getLong(Long.BYTES));
}
Also used : BufferChannel(org.apache.jena.dboe.base.file.BufferChannel) StateMgrDataIdx(org.apache.jena.dboe.transaction.txn.StateMgrDataIdx) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 10 with BufferChannel

use of org.apache.jena.dboe.base.file.BufferChannel in project jena by apache.

the class ProcBuildIndexX method indexBuilder.

private static long indexBuilder(DatasetGraph dsg, InputStream input, String indexName) {
    long tickPoint = BulkLoaderX.DataTick;
    int superTick = BulkLoaderX.DataSuperTick;
    // Location of storage, not the DB.
    DatasetGraphTDB dsgtdb = TDBInternal.getDatasetGraphTDB(dsg);
    Location location = dsgtdb.getLocation();
    int keyLength = SystemTDB.SizeOfNodeId * indexName.length();
    int valueLength = 0;
    // The name is the order.
    String primary = indexName;
    String primaryOrder;
    int dftKeyLength;
    int dftValueLength;
    int tupleLength = indexName.length();
    TupleIndex index;
    if (tupleLength == 3) {
        primaryOrder = Names.primaryIndexTriples;
        dftKeyLength = SystemTDB.LenIndexTripleRecord;
        dftValueLength = 0;
        // Find index.
        index = findIndex(dsgtdb.getTripleTable().getNodeTupleTable().getTupleTable().getIndexes(), indexName);
    } else if (tupleLength == 4) {
        primaryOrder = Names.primaryIndexQuads;
        dftKeyLength = SystemTDB.LenIndexQuadRecord;
        dftValueLength = 0;
        index = findIndex(dsgtdb.getQuadTable().getNodeTupleTable().getTupleTable().getIndexes(), indexName);
    } else {
        throw new TDBException("Index name: " + indexName);
    }
    TupleMap colMap = TupleMap.create(primaryOrder, indexName);
    int readCacheSize = 10;
    int writeCacheSize = 100;
    int blockSize = SystemTDB.BlockSize;
    RecordFactory recordFactory = new RecordFactory(dftKeyLength, dftValueLength);
    int order = BPlusTreeParams.calcOrder(blockSize, recordFactory);
    BPlusTreeParams bptParams = new BPlusTreeParams(order, recordFactory);
    int blockSizeNodes = blockSize;
    int blockSizeRecords = blockSize;
    FileSet destination = new FileSet(location, indexName);
    BufferChannel blkState = FileFactory.createBufferChannel(destination, Names.extBptState);
    BlockMgr blkMgrNodes = BlockMgrFactory.create(destination, Names.extBptTree, blockSizeNodes, readCacheSize, writeCacheSize);
    BlockMgr blkMgrRecords = BlockMgrFactory.create(destination, Names.extBptRecords, blockSizeRecords, readCacheSize, writeCacheSize);
    int rowBlock = 1000;
    Iterator<Record> iter = new RecordsFromInput(input, tupleLength, colMap, rowBlock);
    // ProgressMonitor.
    ProgressMonitor monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Index, indexName, tickPoint, superTick);
    ProgressIterator<Record> iter2 = new ProgressIterator<>(iter, monitor);
    monitor.start();
    BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(iter2, bptParams, recordFactory, blkState, blkMgrNodes, blkMgrRecords);
    bpt2.close();
    monitor.finish();
    // [BULK] End stage.
    long count = monitor.getTicks();
    return count;
}
Also used : BPlusTreeParams(org.apache.jena.dboe.trans.bplustree.BPlusTreeParams) FileSet(org.apache.jena.dboe.base.file.FileSet) TDBException(org.apache.jena.tdb2.TDBException) BufferChannel(org.apache.jena.dboe.base.file.BufferChannel) DatasetGraphTDB(org.apache.jena.tdb2.store.DatasetGraphTDB) TupleMap(org.apache.jena.atlas.lib.tuple.TupleMap) ProgressIterator(org.apache.jena.system.progress.ProgressIterator) ProgressMonitor(org.apache.jena.system.progress.ProgressMonitor) RecordFactory(org.apache.jena.dboe.base.record.RecordFactory) BlockMgr(org.apache.jena.dboe.base.block.BlockMgr) Record(org.apache.jena.dboe.base.record.Record) TupleIndex(org.apache.jena.tdb2.store.tupletable.TupleIndex) BPlusTree(org.apache.jena.dboe.trans.bplustree.BPlusTree) Location(org.apache.jena.dboe.base.file.Location)

Aggregations

BufferChannel (org.apache.jena.dboe.base.file.BufferChannel)15 BlockMgr (org.apache.jena.dboe.base.block.BlockMgr)6 ComponentId (org.apache.jena.dboe.transaction.txn.ComponentId)6 ByteBuffer (java.nio.ByteBuffer)5 Journal (org.apache.jena.dboe.transaction.txn.journal.Journal)4 FileSet (org.apache.jena.dboe.base.file.FileSet)3 Record (org.apache.jena.dboe.base.record.Record)3 BPlusTree (org.apache.jena.dboe.trans.bplustree.BPlusTree)3 BPlusTreeParams (org.apache.jena.dboe.trans.bplustree.BPlusTreeParams)3 StateMgrDataIdx (org.apache.jena.dboe.transaction.txn.StateMgrDataIdx)3 Test (org.junit.Test)3 RecordFactory (org.apache.jena.dboe.base.record.RecordFactory)2 TransBlob (org.apache.jena.dboe.trans.data.TransBlob)2 Transactional (org.apache.jena.dboe.transaction.Transactional)2 TransactionCoordinator (org.apache.jena.dboe.transaction.txn.TransactionCoordinator)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 ArrayList (java.util.ArrayList)1