use of org.apache.jena.system.progress.ProgressMonitor in project jena by apache.
the class ProcBuildIndexX method indexBuilder.
private static long indexBuilder(DatasetGraph dsg, InputStream input, String indexName) {
long tickPoint = BulkLoaderX.DataTick;
int superTick = BulkLoaderX.DataSuperTick;
// Location of storage, not the DB.
DatasetGraphTDB dsgtdb = TDBInternal.getDatasetGraphTDB(dsg);
Location location = dsgtdb.getLocation();
int keyLength = SystemTDB.SizeOfNodeId * indexName.length();
int valueLength = 0;
// The name is the order.
String primary = indexName;
String primaryOrder;
int dftKeyLength;
int dftValueLength;
int tupleLength = indexName.length();
TupleIndex index;
if (tupleLength == 3) {
primaryOrder = Names.primaryIndexTriples;
dftKeyLength = SystemTDB.LenIndexTripleRecord;
dftValueLength = 0;
// Find index.
index = findIndex(dsgtdb.getTripleTable().getNodeTupleTable().getTupleTable().getIndexes(), indexName);
} else if (tupleLength == 4) {
primaryOrder = Names.primaryIndexQuads;
dftKeyLength = SystemTDB.LenIndexQuadRecord;
dftValueLength = 0;
index = findIndex(dsgtdb.getQuadTable().getNodeTupleTable().getTupleTable().getIndexes(), indexName);
} else {
throw new TDBException("Index name: " + indexName);
}
TupleMap colMap = TupleMap.create(primaryOrder, indexName);
int readCacheSize = 10;
int writeCacheSize = 100;
int blockSize = SystemTDB.BlockSize;
RecordFactory recordFactory = new RecordFactory(dftKeyLength, dftValueLength);
int order = BPlusTreeParams.calcOrder(blockSize, recordFactory);
BPlusTreeParams bptParams = new BPlusTreeParams(order, recordFactory);
int blockSizeNodes = blockSize;
int blockSizeRecords = blockSize;
FileSet destination = new FileSet(location, indexName);
BufferChannel blkState = FileFactory.createBufferChannel(destination, Names.extBptState);
BlockMgr blkMgrNodes = BlockMgrFactory.create(destination, Names.extBptTree, blockSizeNodes, readCacheSize, writeCacheSize);
BlockMgr blkMgrRecords = BlockMgrFactory.create(destination, Names.extBptRecords, blockSizeRecords, readCacheSize, writeCacheSize);
int rowBlock = 1000;
Iterator<Record> iter = new RecordsFromInput(input, tupleLength, colMap, rowBlock);
// ProgressMonitor.
ProgressMonitor monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Index, indexName, tickPoint, superTick);
ProgressIterator<Record> iter2 = new ProgressIterator<>(iter, monitor);
monitor.start();
BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(iter2, bptParams, recordFactory, blkState, blkMgrNodes, blkMgrRecords);
bpt2.close();
monitor.finish();
// [BULK] End stage.
long count = monitor.getTicks();
return count;
}
use of org.apache.jena.system.progress.ProgressMonitor in project jena by apache.
the class ProcIngestDataX method exec.
// Node Table.
public static void exec(String location, XLoaderFiles loaderFiles, List<String> datafiles, boolean collectStats) {
FmtLog.info(BulkLoaderX.LOG_Data, "Ingest data");
// Possible parser speed up. This has no effect if parsing in parallel
// because the parser isn't the slowest step when loading at scale.
IRIProvider provider = SystemIRIx.getProvider();
// SystemIRIx.setProvider(new IRIProviderAny());
// Defaults.
// DatasetGraph dsg = DatabaseMgr.connectDatasetGraph(location);
DatasetGraph dsg = getDatasetGraph(location);
ProgressMonitor monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Data, "Data", BulkLoaderX.DataTick, BulkLoaderX.DataSuperTick);
// WriteRows does it's own buffering and has direct write-to-buffer.
// Do not buffer here.
// Adds gzip processing if required.
// But we'll need the disk space eventually so we aren't space constrained to use gzip here.
OutputStream outputTriples = IO.openOutputFile(loaderFiles.triplesFile);
OutputStream outputQuads = IO.openOutputFile(loaderFiles.quadsFile);
OutputStream outT = outputTriples;
OutputStream outQ = outputQuads;
dsg.executeWrite(() -> {
Pair<Long, Long> p = build(dsg, monitor, outT, outQ, datafiles);
String str = DateTimeUtils.nowAsXSDDateTimeString();
long cTriple = p.getLeft();
long cQuad = p.getRight();
FmtLog.info(BulkLoaderX.LOG_Data, "Triples = %,d ; Quads = %,d", cTriple, cQuad);
JsonObject obj = JSON.buildObject(b -> {
b.pair("ingested", str);
b.key("data").startArray();
datafiles.forEach(fn -> b.value(fn));
b.finishArray();
b.pair("triples", cTriple);
b.pair("quads", cQuad);
});
try (OutputStream out = IO.openOutputFile(loaderFiles.loadInfo)) {
JSON.write(out, obj);
} catch (IOException ex) {
IO.exception(ex);
}
});
TDBInternal.expel(dsg);
SystemIRIx.setProvider(provider);
}
Aggregations