Search in sources :

Example 31 with Record

use of org.apache.jena.dboe.base.record.Record in project jena by apache.

the class TestBPlusTreeRewriterNonTxn method findComparison.

public static void findComparison(List<Record> originaldata, BPlusTree bpt2) {
    Iterator<Record> iter1 = originaldata.iterator();
    long count = 0;
    for (; iter1.hasNext(); ) {
        count++;
        Record r1 = iter1.next();
        Record r3 = bpt2.find(r1);
        if (r3 == null) {
            r3 = bpt2.find(r1);
            error("Deviation in find at record %d: %s : null", count, r1);
        }
        if (!Record.equals(r1, r3))
            error("Deviation in find at record %d: %s : %s", count, r1, r3);
    }
}
Also used : Record(org.apache.jena.dboe.base.record.Record)

Example 32 with Record

use of org.apache.jena.dboe.base.record.Record in project jena by apache.

the class ProcBuildNodeTableX method exec2.

/**
 * Pair<triples, indexed nodes>
 * @param sortThreads
 */
// [BULK] Output, not return.
private static Pair<Long, Long> exec2(String DB, XLoaderFiles loaderFiles, int sortThreads, String sortNodeTableArgs, List<String> datafiles) {
    // Threads - 1 parser, 1 builder, 2 sort.
    // Steps:
    // 1 - parser to and pipe terms to sort
    // 2 - sort
    // 3 - build node table from unique sort
    IRIProvider provider = SystemIRIx.getProvider();
    // SystemIRIx.setProvider(new IRIProviderAny());
    DatasetGraph dsg = DatabaseMgr.connectDatasetGraph(DB);
    DatasetGraphTDB dsgtdb = TDBInternal.getDatasetGraphTDB(dsg);
    NodeTable nt = dsgtdb.getTripleTable().getNodeTupleTable().getNodeTable();
    NodeTableTRDF nodeTable = (NodeTableTRDF) nt.baseNodeTable();
    OutputStream toSortOutputStream;
    InputStream fromSortInputStream;
    if (sortThreads <= 0)
        sortThreads = 2;
    // ** Step 2: The sort
    Process procSort;
    try {
        // LOG.info("Step : external sort");
        // Mutable list.
        List<String> sortCmd = new ArrayList<>(Arrays.asList("sort", "--temporary-directory=" + loaderFiles.TMPDIR, "--buffer-size=50%", "--parallel=" + sortThreads, "--unique", "--key=1,1"));
        if (BulkLoaderX.CompressSortNodeTableFiles)
            sortCmd.add("--compress-program=" + BulkLoaderX.gzipProgram());
        // if ( sortNodeTableArgs != null ) {}
        ProcessBuilder pb2 = new ProcessBuilder(sortCmd);
        pb2.environment().put("LC_ALL", "C");
        procSort = pb2.start();
        // To process.
        // Let the writer close it.
        toSortOutputStream = procSort.getOutputStream();
        // From process to the tree builder.
        // Let the reader side close it.
        fromSortInputStream = procSort.getInputStream();
    // // Debug sort process.
    // InputStream fromSortErrortStream = proc2.getErrorStream();
    // IOUtils.copy(fromSortErrortStream, System.err);
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
    // ** Step 1 : write intermediate file (hash, thrift bytes).
    AtomicLong countParseTicks = new AtomicLong(-1);
    AtomicLong countIndexedNodes = new AtomicLong(-1);
    long tickPoint = BulkLoaderX.DataTick;
    int superTick = BulkLoaderX.DataSuperTick;
    Runnable task1 = () -> {
        ProgressMonitorOutput monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Nodes, "Nodes", tickPoint, superTick);
        OutputStream output = IO.ensureBuffered(toSortOutputStream);
        // Counting.
        StreamRDF worker = new NodeHashTmpStream(output);
        ProgressStreamRDF stream = new ProgressStreamRDF(worker, monitor);
        monitor.start();
        String label = monitor.getLabel();
        datafiles.forEach(datafile -> {
            String basename = FileOps.basename(datafile);
            monitor.setLabel(basename);
            stream.start();
            RDFParser.source(datafile).parse(stream);
            stream.finish();
        });
        monitor.finish();
        monitor.setLabel(label);
        IO.flush(output);
        IO.close(output);
        long x = monitor.getTime();
        // long x = timer.endTimer();
        long count = monitor.getTicks();
        countParseTicks.set(count);
        double xSec = x / 1000.0;
        double rate = count / xSec;
        FmtLog.info(BulkLoaderX.LOG_Nodes, "%s Parse (nodes): %s seconds : %,d triples/quads %,.0f TPS", BulkLoaderX.StageMarker, Timer.timeStr(x), count, rate);
    };
    // [BULK] XXX AsyncParser.asyncParse(files, output)
    Thread thread1 = async(task1, "AsyncParser");
    // Step3: build node table.
    Runnable task3 = () -> {
        Timer timer = new Timer();
        // Don't start timer until sort send something
        // Process stream are already buffered.
        InputStream input = IO.ensureBuffered(fromSortInputStream);
        FileSet fileSet = new FileSet(dsgtdb.getLocation(), Names.nodeTableBaseName);
        BufferChannel blkState = FileFactory.createBufferChannel(fileSet, Names.extBptState);
        long idxTickPoint = BulkLoaderX.DataTick;
        int idxSuperTick = BulkLoaderX.DataSuperTick;
        ProgressMonitorOutput monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Terms, "Index", idxTickPoint, idxSuperTick);
        // Library of tools!
        dsg.executeWrite(() -> {
            BinaryDataFile objectFile = nodeTable.getData();
            Iterator<Record> rIter = records(BulkLoaderX.LOG_Terms, input, objectFile);
            rIter = new ProgressIterator<>(rIter, monitor);
            // Record of (hash, nodeId)
            BPlusTree bpt1 = (BPlusTree) (nodeTable.getIndex());
            BPlusTreeParams bptParams = bpt1.getParams();
            RecordFactory factory = new RecordFactory(SystemTDB.LenNodeHash, NodeId.SIZE);
            // Wait until something has been received from the sort step
            rIter.hasNext();
            monitor.start();
            // .. then start the timer. It is closed after the transaction finishes.
            timer.startTimer();
            BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(rIter, bptParams, factory, blkState, bpt1.getNodeManager().getBlockMgr(), bpt1.getRecordsMgr().getBlockMgr());
            bpt2.sync();
            bpt1.sync();
            objectFile.sync();
            monitor.finish();
        });
        blkState.sync();
        IO.close(input);
        long x = timer.endTimer();
        long count = monitor.getTicks();
        countIndexedNodes.set(count);
        String rateStr = BulkLoaderX.rateStr(count, x);
        FmtLog.info(BulkLoaderX.LOG_Terms, "%s Index terms: %s seconds : %,d indexed RDF terms : %s PerSecond", BulkLoaderX.StageMarker, Timer.timeStr(x), count, rateStr);
    };
    Thread thread3 = async(task3, "AsyncBuild");
    try {
        int exitCode = procSort.waitFor();
        if (exitCode != 0) {
            String msg = IO.readWholeFileAsUTF8(procSort.getErrorStream());
            String logMsg = String.format("Sort RC = %d : Error: %s", exitCode, msg);
            Log.error(BulkLoaderX.LOG_Terms, logMsg);
            // ** Exit process
            System.exit(exitCode);
        } else
            BulkLoaderX.LOG_Terms.info("Sort finished");
    // I/O Stream toSortOutputStream and fromSortInputStream closed by
    // their users - step 1 and step 3.
    } catch (InterruptedException e) {
        BulkLoaderX.LOG_Nodes.error("Failed to cleanly wait-for the subprocess");
        throw new RuntimeException(e);
    }
    BulkLoaderX.waitFor(thread1);
    BulkLoaderX.waitFor(thread3);
    return Pair.create(countParseTicks.get(), countIndexedNodes.get());
}
Also used : Arrays(java.util.Arrays) RiotThriftException(org.apache.jena.riot.thrift.RiotThriftException) IO(org.apache.jena.atlas.io.IO) FileFactory(org.apache.jena.dboe.base.file.FileFactory) DatasetGraph(org.apache.jena.sparql.core.DatasetGraph) NodeId(org.apache.jena.tdb2.store.NodeId) FileSet(org.apache.jena.dboe.base.file.FileSet) RecordFactory(org.apache.jena.dboe.base.record.RecordFactory) TSerializer(org.apache.thrift.TSerializer) RDF_Term(org.apache.jena.riot.thrift.wire.RDF_Term) NodeTable(org.apache.jena.tdb2.store.nodetable.NodeTable) BinaryDataFile(org.apache.jena.dboe.base.file.BinaryDataFile) ProgressMonitorOutput(org.apache.jena.system.progress.ProgressMonitorOutput) Log(org.apache.jena.atlas.logging.Log) BulkLoaderX.async(org.apache.jena.tdb2.xloader.BulkLoaderX.async) TCompactProtocol(org.apache.thrift.protocol.TCompactProtocol) BPlusTreeParams(org.apache.jena.dboe.trans.bplustree.BPlusTreeParams) Names(org.apache.jena.dboe.sys.Names) ProgressIterator(org.apache.jena.system.progress.ProgressIterator) SystemTDB(org.apache.jena.tdb2.sys.SystemTDB) Triple(org.apache.jena.graph.Triple) DatabaseMgr(org.apache.jena.tdb2.DatabaseMgr) NodeIdFactory(org.apache.jena.tdb2.store.NodeIdFactory) List(java.util.List) DatasetGraphTDB(org.apache.jena.tdb2.store.DatasetGraphTDB) ThriftConvert(org.apache.jena.riot.thrift.ThriftConvert) BPlusTreeRewriter(org.apache.jena.dboe.trans.bplustree.rewriter.BPlusTreeRewriter) Record(org.apache.jena.dboe.base.record.Record) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TDBInternal(org.apache.jena.tdb2.sys.TDBInternal) ArrayList(java.util.ArrayList) ProgressStreamRDF(org.apache.jena.system.progress.ProgressStreamRDF) Hash(org.apache.jena.tdb2.store.Hash) NodeTableTRDF(org.apache.jena.tdb2.store.nodetable.NodeTableTRDF) Quad(org.apache.jena.sparql.core.Quad) NodeLib(org.apache.jena.tdb2.lib.NodeLib) OutputStream(java.io.OutputStream) BPlusTree(org.apache.jena.dboe.trans.bplustree.BPlusTree) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) RDFParser(org.apache.jena.riot.RDFParser) TException(org.apache.thrift.TException) IOException(java.io.IOException) StreamRDF(org.apache.jena.riot.system.StreamRDF) SystemIRIx(org.apache.jena.irix.SystemIRIx) FmtLog(org.apache.jena.atlas.logging.FmtLog) org.apache.jena.atlas.lib(org.apache.jena.atlas.lib) AtomicLong(java.util.concurrent.atomic.AtomicLong) BufferChannel(org.apache.jena.dboe.base.file.BufferChannel) IteratorSlotted(org.apache.jena.atlas.iterator.IteratorSlotted) Node(org.apache.jena.graph.Node) IRIProvider(org.apache.jena.irix.IRIProvider) InputStream(java.io.InputStream) BPlusTreeParams(org.apache.jena.dboe.trans.bplustree.BPlusTreeParams) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) DatasetGraph(org.apache.jena.sparql.core.DatasetGraph) ProgressIterator(org.apache.jena.system.progress.ProgressIterator) ProgressStreamRDF(org.apache.jena.system.progress.ProgressStreamRDF) StreamRDF(org.apache.jena.riot.system.StreamRDF) ProgressIterator(org.apache.jena.system.progress.ProgressIterator) Iterator(java.util.Iterator) NodeTable(org.apache.jena.tdb2.store.nodetable.NodeTable) NodeTableTRDF(org.apache.jena.tdb2.store.nodetable.NodeTableTRDF) FileSet(org.apache.jena.dboe.base.file.FileSet) InputStream(java.io.InputStream) ProgressMonitorOutput(org.apache.jena.system.progress.ProgressMonitorOutput) BufferChannel(org.apache.jena.dboe.base.file.BufferChannel) BinaryDataFile(org.apache.jena.dboe.base.file.BinaryDataFile) IRIProvider(org.apache.jena.irix.IRIProvider) RiotThriftException(org.apache.jena.riot.thrift.RiotThriftException) TException(org.apache.thrift.TException) IOException(java.io.IOException) DatasetGraphTDB(org.apache.jena.tdb2.store.DatasetGraphTDB) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProgressStreamRDF(org.apache.jena.system.progress.ProgressStreamRDF) RecordFactory(org.apache.jena.dboe.base.record.RecordFactory) BPlusTree(org.apache.jena.dboe.trans.bplustree.BPlusTree)

Example 33 with Record

use of org.apache.jena.dboe.base.record.Record in project jena by apache.

the class NodeTableNative method accessIndex.

protected final NodeId accessIndex(Node node, boolean create) {
    Hash hash = new Hash(nodeHashToId.getRecordFactory().keyLength());
    NodeLib.setHash(hash, node);
    byte[] k = hash.getBytes();
    // Key only.
    Record r = nodeHashToId.getRecordFactory().create(k);
    synchronized (// Pair to readNodeFromTable.
    this) {
        // Key and value, or null
        Record r2 = nodeHashToId.find(r);
        if (r2 != null) {
            // Found.  Get the NodeId.
            NodeId id = NodeIdFactory.get(r2.getValue(), 0);
            return id;
        }
        // Not found.
        if (!create)
            return NodeId.NodeDoesNotExist;
        // Write the node, which allocates an id for it.
        syncNeeded = true;
        NodeId id = writeNodeToTable(node);
        // Update the r record with the new id.
        // r.value := id bytes;
        NodeIdFactory.set(id, r.getValue(), 0);
        // Put in index - may appear because of concurrency
        if (!nodeHashToId.insert(r))
            throw new TDBException("NodeTableBase::nodeToId - record mysteriously appeared");
        return id;
    }
}
Also used : TDBException(org.apache.jena.tdb2.TDBException) NodeId(org.apache.jena.tdb2.store.NodeId) Record(org.apache.jena.dboe.base.record.Record) Hash(org.apache.jena.tdb2.store.Hash)

Example 34 with Record

use of org.apache.jena.dboe.base.record.Record in project jena by apache.

the class RecordsFromInput method next.

@Override
public Record next() {
    if (!hasNext())
        throw new NoSuchElementException();
    Record r = slot;
    slot = null;
    return r;
}
Also used : Record(org.apache.jena.dboe.base.record.Record) NoSuchElementException(java.util.NoSuchElementException)

Example 35 with Record

use of org.apache.jena.dboe.base.record.Record in project jena by apache.

the class RecordsFromInput method hasNext.

@Override
public boolean hasNext() {
    if (finished)
        return false;
    if (slot != null)
        return true;
    if (idx == -1 || idx == buffer.length) {
        len = fill();
        if (len == -1) {
            finished = true;
            return false;
        }
        idx = 0;
    }
    // Fill one slot.
    Record record = recordFactory.create();
    // System.out.print("In:  ");
    for (int i = 0; i < itemsPerRow; i++) {
        long x = Hex.getLong(buffer, idx);
        idx += 16;
        // Separator or end-of-line.
        idx++;
        int j = (colMap == null) ? i : colMap.putSlotIdx(i);
        int recordOffset = j * SysDB.SizeOfLong;
        Bytes.setLong(x, record.getKey(), recordOffset);
    // System.out.printf("%016X ", x);
    }
    // Buffer all processed.
    if (idx >= len)
        idx = -1;
    slot = record;
    return true;
}
Also used : Record(org.apache.jena.dboe.base.record.Record)

Aggregations

Record (org.apache.jena.dboe.base.record.Record)71 RecordLib.intToRecord (org.apache.jena.dboe.test.RecordLib.intToRecord)6 Pair (org.apache.jena.atlas.lib.Pair)5 NoSuchElementException (java.util.NoSuchElementException)3 BufferChannel (org.apache.jena.dboe.base.file.BufferChannel)3 FileSet (org.apache.jena.dboe.base.file.FileSet)3 RecordBufferPage (org.apache.jena.dboe.base.recordbuffer.RecordBufferPage)3 BPTreeNode (org.apache.jena.dboe.trans.bplustree.BPTreeNode)3 BPlusTree (org.apache.jena.dboe.trans.bplustree.BPlusTree)3 BPlusTreeParams (org.apache.jena.dboe.trans.bplustree.BPlusTreeParams)3 NodeId (org.apache.jena.tdb2.store.NodeId)3 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 StorageException (org.apache.jena.dboe.base.StorageException)2 BlockMgr (org.apache.jena.dboe.base.block.BlockMgr)2 RecordBuffer (org.apache.jena.dboe.base.buffer.RecordBuffer)2 RecordFactory (org.apache.jena.dboe.base.record.RecordFactory)2 Index (org.apache.jena.dboe.index.Index)2 RangeIndex (org.apache.jena.dboe.index.RangeIndex)2 TDBException (org.apache.jena.tdb2.TDBException)2