Search in sources :

Example 6 with ProgressMonitor

use of org.apache.jena.system.progress.ProgressMonitor in project jena by apache.

the class ProcBuildIndexX method indexBuilder.

private static long indexBuilder(DatasetGraph dsg, InputStream input, String indexName) {
    long tickPoint = BulkLoaderX.DataTick;
    int superTick = BulkLoaderX.DataSuperTick;
    // Location of storage, not the DB.
    DatasetGraphTDB dsgtdb = TDBInternal.getDatasetGraphTDB(dsg);
    Location location = dsgtdb.getLocation();
    int keyLength = SystemTDB.SizeOfNodeId * indexName.length();
    int valueLength = 0;
    // The name is the order.
    String primary = indexName;
    String primaryOrder;
    int dftKeyLength;
    int dftValueLength;
    int tupleLength = indexName.length();
    TupleIndex index;
    if (tupleLength == 3) {
        primaryOrder = Names.primaryIndexTriples;
        dftKeyLength = SystemTDB.LenIndexTripleRecord;
        dftValueLength = 0;
        // Find index.
        index = findIndex(dsgtdb.getTripleTable().getNodeTupleTable().getTupleTable().getIndexes(), indexName);
    } else if (tupleLength == 4) {
        primaryOrder = Names.primaryIndexQuads;
        dftKeyLength = SystemTDB.LenIndexQuadRecord;
        dftValueLength = 0;
        index = findIndex(dsgtdb.getQuadTable().getNodeTupleTable().getTupleTable().getIndexes(), indexName);
    } else {
        throw new TDBException("Index name: " + indexName);
    }
    TupleMap colMap = TupleMap.create(primaryOrder, indexName);
    int readCacheSize = 10;
    int writeCacheSize = 100;
    int blockSize = SystemTDB.BlockSize;
    RecordFactory recordFactory = new RecordFactory(dftKeyLength, dftValueLength);
    int order = BPlusTreeParams.calcOrder(blockSize, recordFactory);
    BPlusTreeParams bptParams = new BPlusTreeParams(order, recordFactory);
    int blockSizeNodes = blockSize;
    int blockSizeRecords = blockSize;
    FileSet destination = new FileSet(location, indexName);
    BufferChannel blkState = FileFactory.createBufferChannel(destination, Names.extBptState);
    BlockMgr blkMgrNodes = BlockMgrFactory.create(destination, Names.extBptTree, blockSizeNodes, readCacheSize, writeCacheSize);
    BlockMgr blkMgrRecords = BlockMgrFactory.create(destination, Names.extBptRecords, blockSizeRecords, readCacheSize, writeCacheSize);
    int rowBlock = 1000;
    Iterator<Record> iter = new RecordsFromInput(input, tupleLength, colMap, rowBlock);
    // ProgressMonitor.
    ProgressMonitor monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Index, indexName, tickPoint, superTick);
    ProgressIterator<Record> iter2 = new ProgressIterator<>(iter, monitor);
    monitor.start();
    BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(iter2, bptParams, recordFactory, blkState, blkMgrNodes, blkMgrRecords);
    bpt2.close();
    monitor.finish();
    // [BULK] End stage.
    long count = monitor.getTicks();
    return count;
}
Also used : BPlusTreeParams(org.apache.jena.dboe.trans.bplustree.BPlusTreeParams) FileSet(org.apache.jena.dboe.base.file.FileSet) TDBException(org.apache.jena.tdb2.TDBException) BufferChannel(org.apache.jena.dboe.base.file.BufferChannel) DatasetGraphTDB(org.apache.jena.tdb2.store.DatasetGraphTDB) TupleMap(org.apache.jena.atlas.lib.tuple.TupleMap) ProgressIterator(org.apache.jena.system.progress.ProgressIterator) ProgressMonitor(org.apache.jena.system.progress.ProgressMonitor) RecordFactory(org.apache.jena.dboe.base.record.RecordFactory) BlockMgr(org.apache.jena.dboe.base.block.BlockMgr) Record(org.apache.jena.dboe.base.record.Record) TupleIndex(org.apache.jena.tdb2.store.tupletable.TupleIndex) BPlusTree(org.apache.jena.dboe.trans.bplustree.BPlusTree) Location(org.apache.jena.dboe.base.file.Location)

Example 7 with ProgressMonitor

use of org.apache.jena.system.progress.ProgressMonitor in project jena by apache.

the class ProcIngestDataX method exec.

// Node Table.
public static void exec(String location, XLoaderFiles loaderFiles, List<String> datafiles, boolean collectStats) {
    FmtLog.info(BulkLoaderX.LOG_Data, "Ingest data");
    // Possible parser speed up. This has no effect if parsing in parallel
    // because the parser isn't the slowest step when loading at scale.
    IRIProvider provider = SystemIRIx.getProvider();
    // SystemIRIx.setProvider(new IRIProviderAny());
    // Defaults.
    // DatasetGraph dsg = DatabaseMgr.connectDatasetGraph(location);
    DatasetGraph dsg = getDatasetGraph(location);
    ProgressMonitor monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Data, "Data", BulkLoaderX.DataTick, BulkLoaderX.DataSuperTick);
    // WriteRows does it's own buffering and has direct write-to-buffer.
    // Do not buffer here.
    // Adds gzip processing if required.
    // But we'll need the disk space eventually so we aren't space constrained to use gzip here.
    OutputStream outputTriples = IO.openOutputFile(loaderFiles.triplesFile);
    OutputStream outputQuads = IO.openOutputFile(loaderFiles.quadsFile);
    OutputStream outT = outputTriples;
    OutputStream outQ = outputQuads;
    dsg.executeWrite(() -> {
        Pair<Long, Long> p = build(dsg, monitor, outT, outQ, datafiles);
        String str = DateTimeUtils.nowAsXSDDateTimeString();
        long cTriple = p.getLeft();
        long cQuad = p.getRight();
        FmtLog.info(BulkLoaderX.LOG_Data, "Triples = %,d ; Quads = %,d", cTriple, cQuad);
        JsonObject obj = JSON.buildObject(b -> {
            b.pair("ingested", str);
            b.key("data").startArray();
            datafiles.forEach(fn -> b.value(fn));
            b.finishArray();
            b.pair("triples", cTriple);
            b.pair("quads", cQuad);
        });
        try (OutputStream out = IO.openOutputFile(loaderFiles.loadInfo)) {
            JSON.write(out, obj);
        } catch (IOException ex) {
            IO.exception(ex);
        }
    });
    TDBInternal.expel(dsg);
    SystemIRIx.setProvider(provider);
}
Also used : ProgressMonitor(org.apache.jena.system.progress.ProgressMonitor) OutputStream(java.io.OutputStream) BitsLong(org.apache.jena.atlas.lib.BitsLong) JsonObject(org.apache.jena.atlas.json.JsonObject) IOException(java.io.IOException) IRIProvider(org.apache.jena.irix.IRIProvider) DatasetGraph(org.apache.jena.sparql.core.DatasetGraph)

Aggregations

ProgressMonitor (org.apache.jena.system.progress.ProgressMonitor)7 OutputStream (java.io.OutputStream)2 Tuple (org.apache.jena.atlas.lib.tuple.Tuple)2 IRIProvider (org.apache.jena.irix.IRIProvider)2 TupleIndex (org.apache.jena.tdb2.store.tupletable.TupleIndex)2 IOException (java.io.IOException)1 JsonObject (org.apache.jena.atlas.json.JsonObject)1 BitsLong (org.apache.jena.atlas.lib.BitsLong)1 Timer (org.apache.jena.atlas.lib.Timer)1 TupleMap (org.apache.jena.atlas.lib.tuple.TupleMap)1 BlockMgr (org.apache.jena.dboe.base.block.BlockMgr)1 BufferChannel (org.apache.jena.dboe.base.file.BufferChannel)1 FileSet (org.apache.jena.dboe.base.file.FileSet)1 Location (org.apache.jena.dboe.base.file.Location)1 Record (org.apache.jena.dboe.base.record.Record)1 RecordFactory (org.apache.jena.dboe.base.record.RecordFactory)1 BPlusTree (org.apache.jena.dboe.trans.bplustree.BPlusTree)1 BPlusTreeParams (org.apache.jena.dboe.trans.bplustree.BPlusTreeParams)1 DatasetGraph (org.apache.jena.sparql.core.DatasetGraph)1 ProgressIterator (org.apache.jena.system.progress.ProgressIterator)1