Search in sources :

Example 31 with Tuple

use of org.apache.jena.atlas.lib.tuple.Tuple in project jena by apache.

the class Indexer method stageIndex.

private void stageIndex(BlockingQueue<List<Tuple<NodeId>>> pipe, TupleIndex idx) {
    TransactionCoordinator coordinator = CoLib.newCoordinator();
    CoLib.add(coordinator, idx);
    CoLib.start(coordinator);
    Transaction transaction = coordinator.begin(TxnType.WRITE);
    boolean workHasBeenDone;
    try {
        Destination<Tuple<NodeId>> loader = loadTuples(idx);
        for (; ; ) {
            List<Tuple<NodeId>> tuples = pipe.take();
            if (tuples.isEmpty())
                break;
            loader.deliver(tuples);
        }
        workHasBeenDone = !idx.isEmpty();
        transaction.commit();
    } catch (Exception ex) {
        Log.error(this, "Interrupted", ex);
        transaction.abort();
        workHasBeenDone = false;
    }
    CoLib.finish(coordinator);
    if (workHasBeenDone)
        output.print("Finish - index %s", idx.getName());
    termination.release();
}
Also used : Transaction(org.apache.jena.dboe.transaction.txn.Transaction) TransactionCoordinator(org.apache.jena.dboe.transaction.txn.TransactionCoordinator) Tuple(org.apache.jena.atlas.lib.tuple.Tuple)

Example 32 with Tuple

use of org.apache.jena.atlas.lib.tuple.Tuple in project jena by apache.

the class LoaderMain method executeData.

/**
 * Create data ingestion and primary index building of a {@link LoaderPlan}.
 * In phase 1, separate threads for parsing, node table loading and primary index building,
 *
 * Used by {@link InputStage#MULTI}.
 */
private static StreamRDFCounting executeData(LoaderPlan loaderPlan, DatasetGraphTDB dsgtdb, Map<String, TupleIndex> indexMap, List<BulkStartFinish> dataProcess, MonitorOutput output) {
    StoragePrefixesTDB dps = (StoragePrefixesTDB) dsgtdb.getStoragePrefixes();
    PrefixHandlerBulk prefixHandler = new PrefixHandlerBulk(dps, output);
    dataProcess.add(prefixHandler);
    // -- Phase 2 block. Indexer and Destination (blocks of Tuple<NodeId>)
    TupleIndex[] idx3 = PhasedOps.indexSetFromNames(loaderPlan.primaryLoad3(), indexMap);
    Indexer indexer3 = new Indexer(output, idx3);
    TupleIndex[] idx4 = PhasedOps.indexSetFromNames(loaderPlan.primaryLoad4(), indexMap);
    Indexer indexer4 = new Indexer(output, idx4);
    dataProcess.add(indexer3);
    dataProcess.add(indexer4);
    Destination<Tuple<NodeId>> functionIndexer3 = indexer3.index();
    Destination<Tuple<NodeId>> functionIndexer4 = indexer4.index();
    // -- Phase 2 block.
    // -- Phase 1.
    // This is the other way round to AsyncParser.
    // Here, we return a StreamRDF to pump data into and the rest of the
    // processing is on other threads. AsyncParser has the processing on the caller thread
    // and so the current thread continues when the processing from the parser is finished.
    DataToTuples dtt = new DataToTuples(dsgtdb, functionIndexer3, functionIndexer4, output);
    DataBatcher dataBatcher = new DataBatcher(dtt.data(), prefixHandler.handler(), output);
    dataProcess.add(dtt);
    dataProcess.add(dataBatcher);
    return dataBatcher;
}
Also used : StoragePrefixesTDB(org.apache.jena.tdb2.store.StoragePrefixesTDB) TupleIndex(org.apache.jena.tdb2.store.tupletable.TupleIndex) Tuple(org.apache.jena.atlas.lib.tuple.Tuple)

Example 33 with Tuple

use of org.apache.jena.atlas.lib.tuple.Tuple in project jena by apache.

the class DataToTuples method action.

// Triples.
private void action() {
    coordinator = CoLib.newCoordinator();
    CoLib.add(coordinator, nodeTable);
    CoLib.start(coordinator);
    transaction = coordinator.begin(TxnType.WRITE);
    try {
        for (; ; ) {
            DataBlock data = input.take();
            if (data == DataBlock.END)
                break;
            if (data.triples != null) {
                List<Tuple<NodeId>> tuples = new ArrayList<>(data.triples.size());
                for (Triple t : data.triples) {
                    countTriples++;
                    accTuples(t, nodeTable, tuples);
                }
                dispatchTuples3(tuples);
            }
            if (data.quads != null) {
                List<Tuple<NodeId>> tuples = new ArrayList<>(data.quads.size());
                for (Quad q : data.quads) {
                    countQuads++;
                    accTuples(q, nodeTable, tuples);
                }
                dispatchTuples4(tuples);
            }
        }
        dispatchTuples3(LoaderConst.END_TUPLES);
        dispatchTuples4(LoaderConst.END_TUPLES);
        transaction.commit();
    } catch (Exception ex) {
        Log.error(this, "Exception during data loading", ex);
        transaction.abort();
    }
    transaction.end();
    CoLib.finish(coordinator);
    termination.release();
}
Also used : Triple(org.apache.jena.graph.Triple) Quad(org.apache.jena.sparql.core.Quad) ArrayList(java.util.ArrayList) Tuple(org.apache.jena.atlas.lib.tuple.Tuple) BulkLoaderException(org.apache.jena.tdb2.loader.BulkLoaderException)

Example 34 with Tuple

use of org.apache.jena.atlas.lib.tuple.Tuple in project jena by apache.

the class PhasedOps method replay.

/**
 * Return (Number, Time in ms)
 */
static ReplayResult replay(TupleIndex srcIdx, Destination<Tuple<NodeId>> dest, MonitorOutput output) {
    ProgressMonitor monitor = ProgressMonitorFactory.progressMonitor("Index", output, LoaderMain.IndexTickPoint, LoaderMain.IndexSuperTick);
    List<Tuple<NodeId>> block = null;
    int len = srcIdx.getTupleLength();
    monitor.start();
    Iterator<Tuple<NodeId>> iter = srcIdx.all();
    while (iter.hasNext()) {
        if (block == null)
            block = new ArrayList<>(LoaderConst.ChunkSize);
        Tuple<NodeId> row = iter.next();
        block.add(row);
        monitor.tick();
        if (block.size() == LoaderConst.ChunkSize) {
            dest.deliver(block);
            block = null;
        }
    }
    if (block != null)
        dest.deliver(block);
    dest.deliver(Collections.emptyList());
    monitor.finish();
    // monitor.finishMessage("Tuples["+len+"]");
    return new ReplayResult(monitor.getTicks(), monitor.getTime());
}
Also used : ProgressMonitor(org.apache.jena.system.progress.ProgressMonitor) NodeId(org.apache.jena.tdb2.store.NodeId) Tuple(org.apache.jena.atlas.lib.tuple.Tuple)

Example 35 with Tuple

use of org.apache.jena.atlas.lib.tuple.Tuple in project jena by apache.

the class tdbstats method stats$.

private static StatsResults stats$(DatasetGraphTDB dsg, Node gn) {
    NodeTable nt = dsg.getTripleTable().getNodeTupleTable().getNodeTable();
    StatsCollectorNodeId stats = new StatsCollectorNodeId(nt);
    if (gn == null) {
        Iterator<Tuple<NodeId>> iter = dsg.getTripleTable().getNodeTupleTable().findAll();
        for (; iter.hasNext(); ) {
            Tuple<NodeId> t = iter.next();
            stats.record(null, t.get(0), t.get(1), t.get(2));
        }
    } else {
        // If the union graph, then we need to scan all quads but with uniqueness.
        boolean unionGraph = Quad.isUnionGraph(gn);
        NodeId gnid = null;
        if (!unionGraph) {
            gnid = nt.getNodeIdForNode(gn);
            if (NodeId.isDoesNotExist(gnid))
                Log.warn(tdbstats.class, "No such graph: " + gn);
        }
        NodeTupleTable ntt = dsg.getQuadTable().getNodeTupleTable();
        Iterator<Tuple<NodeId>> iter = unionGraph ? SolverLibTDB.unionGraph(ntt) : ntt.find(gnid, null, null, null);
        for (; iter.hasNext(); ) {
            Tuple<NodeId> t = iter.next();
            stats.record(t.get(0), t.get(1), t.get(2), t.get(3));
        }
    }
    return stats.results();
}
Also used : NodeTupleTable(org.apache.jena.tdb2.store.nodetupletable.NodeTupleTable) NodeId(org.apache.jena.tdb2.store.NodeId) StatsCollectorNodeId(org.apache.jena.tdb2.solver.stats.StatsCollectorNodeId) NodeTable(org.apache.jena.tdb2.store.nodetable.NodeTable) Tuple(org.apache.jena.atlas.lib.tuple.Tuple) StatsCollectorNodeId(org.apache.jena.tdb2.solver.stats.StatsCollectorNodeId)

Aggregations

Tuple (org.apache.jena.atlas.lib.tuple.Tuple)75 NodeId (org.apache.jena.tdb.store.NodeId)35 Node (org.apache.jena.graph.Node)22 TupleIndex (org.apache.jena.tdb.store.tupletable.TupleIndex)19 Test (org.junit.Test)19 BaseTest (org.apache.jena.atlas.junit.BaseTest)17 Quad (org.apache.jena.sparql.core.Quad)13 NodeId (org.apache.jena.tdb2.store.NodeId)10 Predicate (java.util.function.Predicate)8 Triple (org.apache.jena.graph.Triple)7 Iterator (java.util.Iterator)6 Binding (org.apache.jena.sparql.engine.binding.Binding)6 ArrayList (java.util.ArrayList)5 Iter (org.apache.jena.atlas.iterator.Iter)5 NodeTable (org.apache.jena.tdb.store.nodetable.NodeTable)5 NodeTupleTable (org.apache.jena.tdb.store.nodetupletable.NodeTupleTable)5 NodeTable (org.apache.jena.tdb2.store.nodetable.NodeTable)5 Function (java.util.function.Function)4 InternalErrorException (org.apache.jena.atlas.lib.InternalErrorException)4 TupleFactory (org.apache.jena.atlas.lib.tuple.TupleFactory)4