use of org.apache.jena.tdb2.store.DatasetGraphTDB in project jena by apache.
the class AbstractTestStoreConnectionBasics method store_05.
@Test
public void store_05() {
StoreConnection sConn = StoreConnection.connectCreate(location);
DatasetGraphTDB dsg = sConn.getDatasetGraphTDB();
Txn.executeWrite(dsg, () -> {
dsg.add(q3);
});
Txn.executeWrite(dsg, () -> {
assertTrue(dsg.contains(q3));
});
}
use of org.apache.jena.tdb2.store.DatasetGraphTDB in project jena by apache.
the class StoreConnection method make.
/**
* Return a {@code StoreConnection} for a particular location,
* creating it if it does not exist in storage.
*/
private static synchronized StoreConnection make(Location location, StoreParams params) {
StoreConnection sConn = cache.get(location);
if (sConn == null) {
ProcessFileLock lock = null;
// This is a tdb.lock file in the storage database, not the switchable.
if (SystemTDB.DiskLocationMultiJvmUsagePrevention && !location.isMem()) {
lock = lockForLocation(location);
// Take the lock. This is atomic and non-reentrant.
lock.lockEx();
}
// Recovery happens when TransactionCoordinator.start is called
// during the building of the DatasetGraphTDB
DatasetGraphTDB dsg = TDB2StorageBuilder.build(location, params);
sConn = new StoreConnection(dsg, lock);
if (!location.isMemUnique())
cache.put(location, sConn);
}
return sConn;
}
use of org.apache.jena.tdb2.store.DatasetGraphTDB in project jena by apache.
the class TDBInternal method getNode.
/**
* Return the node for a NodeId (if any). Returns null if the NodeId does not exist in
* the dataset.
*/
public static Node getNode(DatasetGraphTDB dsg, NodeId nodeId) {
if (dsg == null)
return null;
NodeTable nodeTable = dsg.getQuadTable().getNodeTupleTable().getNodeTable();
Node node = nodeTable.getNodeForNodeId(nodeId);
return node;
}
use of org.apache.jena.tdb2.store.DatasetGraphTDB in project jena by apache.
the class TDBInternal method getNodeId.
/**
* Return the NodeId for a node. Returns NodeId.NodeDoesNotExist when the node is not
* found. Returns null when not a TDB-backed dataset.
*/
public static NodeId getNodeId(DatasetGraphTDB dsg, Node node) {
if (dsg == null)
return null;
NodeTable nodeTable = dsg.getQuadTable().getNodeTupleTable().getNodeTable();
NodeId nodeId = nodeTable.getNodeIdForNode(node);
return nodeId;
}
use of org.apache.jena.tdb2.store.DatasetGraphTDB in project jena by apache.
the class ProcBuildNodeTableX method exec2.
/**
* Pair<triples, indexed nodes>
* @param sortThreads
*/
// [BULK] Output, not return.
private static Pair<Long, Long> exec2(String DB, XLoaderFiles loaderFiles, int sortThreads, String sortNodeTableArgs, List<String> datafiles) {
// Threads - 1 parser, 1 builder, 2 sort.
// Steps:
// 1 - parser to and pipe terms to sort
// 2 - sort
// 3 - build node table from unique sort
IRIProvider provider = SystemIRIx.getProvider();
// SystemIRIx.setProvider(new IRIProviderAny());
DatasetGraph dsg = DatabaseMgr.connectDatasetGraph(DB);
DatasetGraphTDB dsgtdb = TDBInternal.getDatasetGraphTDB(dsg);
NodeTable nt = dsgtdb.getTripleTable().getNodeTupleTable().getNodeTable();
NodeTableTRDF nodeTable = (NodeTableTRDF) nt.baseNodeTable();
OutputStream toSortOutputStream;
InputStream fromSortInputStream;
if (sortThreads <= 0)
sortThreads = 2;
// ** Step 2: The sort
Process procSort;
try {
// LOG.info("Step : external sort");
// Mutable list.
List<String> sortCmd = new ArrayList<>(Arrays.asList("sort", "--temporary-directory=" + loaderFiles.TMPDIR, "--buffer-size=50%", "--parallel=" + sortThreads, "--unique", "--key=1,1"));
if (BulkLoaderX.CompressSortNodeTableFiles)
sortCmd.add("--compress-program=" + BulkLoaderX.gzipProgram());
// if ( sortNodeTableArgs != null ) {}
ProcessBuilder pb2 = new ProcessBuilder(sortCmd);
pb2.environment().put("LC_ALL", "C");
procSort = pb2.start();
// To process.
// Let the writer close it.
toSortOutputStream = procSort.getOutputStream();
// From process to the tree builder.
// Let the reader side close it.
fromSortInputStream = procSort.getInputStream();
// // Debug sort process.
// InputStream fromSortErrortStream = proc2.getErrorStream();
// IOUtils.copy(fromSortErrortStream, System.err);
} catch (Exception ex) {
throw new RuntimeException(ex);
}
// ** Step 1 : write intermediate file (hash, thrift bytes).
AtomicLong countParseTicks = new AtomicLong(-1);
AtomicLong countIndexedNodes = new AtomicLong(-1);
long tickPoint = BulkLoaderX.DataTick;
int superTick = BulkLoaderX.DataSuperTick;
Runnable task1 = () -> {
ProgressMonitorOutput monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Nodes, "Nodes", tickPoint, superTick);
OutputStream output = IO.ensureBuffered(toSortOutputStream);
// Counting.
StreamRDF worker = new NodeHashTmpStream(output);
ProgressStreamRDF stream = new ProgressStreamRDF(worker, monitor);
monitor.start();
String label = monitor.getLabel();
datafiles.forEach(datafile -> {
String basename = FileOps.basename(datafile);
monitor.setLabel(basename);
stream.start();
RDFParser.source(datafile).parse(stream);
stream.finish();
});
monitor.finish();
monitor.setLabel(label);
IO.flush(output);
IO.close(output);
long x = monitor.getTime();
// long x = timer.endTimer();
long count = monitor.getTicks();
countParseTicks.set(count);
double xSec = x / 1000.0;
double rate = count / xSec;
FmtLog.info(BulkLoaderX.LOG_Nodes, "%s Parse (nodes): %s seconds : %,d triples/quads %,.0f TPS", BulkLoaderX.StageMarker, Timer.timeStr(x), count, rate);
};
// [BULK] XXX AsyncParser.asyncParse(files, output)
Thread thread1 = async(task1, "AsyncParser");
// Step3: build node table.
Runnable task3 = () -> {
Timer timer = new Timer();
// Don't start timer until sort send something
// Process stream are already buffered.
InputStream input = IO.ensureBuffered(fromSortInputStream);
FileSet fileSet = new FileSet(dsgtdb.getLocation(), Names.nodeTableBaseName);
BufferChannel blkState = FileFactory.createBufferChannel(fileSet, Names.extBptState);
long idxTickPoint = BulkLoaderX.DataTick;
int idxSuperTick = BulkLoaderX.DataSuperTick;
ProgressMonitorOutput monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Terms, "Index", idxTickPoint, idxSuperTick);
// Library of tools!
dsg.executeWrite(() -> {
BinaryDataFile objectFile = nodeTable.getData();
Iterator<Record> rIter = records(BulkLoaderX.LOG_Terms, input, objectFile);
rIter = new ProgressIterator<>(rIter, monitor);
// Record of (hash, nodeId)
BPlusTree bpt1 = (BPlusTree) (nodeTable.getIndex());
BPlusTreeParams bptParams = bpt1.getParams();
RecordFactory factory = new RecordFactory(SystemTDB.LenNodeHash, NodeId.SIZE);
// Wait until something has been received from the sort step
rIter.hasNext();
monitor.start();
// .. then start the timer. It is closed after the transaction finishes.
timer.startTimer();
BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(rIter, bptParams, factory, blkState, bpt1.getNodeManager().getBlockMgr(), bpt1.getRecordsMgr().getBlockMgr());
bpt2.sync();
bpt1.sync();
objectFile.sync();
monitor.finish();
});
blkState.sync();
IO.close(input);
long x = timer.endTimer();
long count = monitor.getTicks();
countIndexedNodes.set(count);
String rateStr = BulkLoaderX.rateStr(count, x);
FmtLog.info(BulkLoaderX.LOG_Terms, "%s Index terms: %s seconds : %,d indexed RDF terms : %s PerSecond", BulkLoaderX.StageMarker, Timer.timeStr(x), count, rateStr);
};
Thread thread3 = async(task3, "AsyncBuild");
try {
int exitCode = procSort.waitFor();
if (exitCode != 0) {
String msg = IO.readWholeFileAsUTF8(procSort.getErrorStream());
String logMsg = String.format("Sort RC = %d : Error: %s", exitCode, msg);
Log.error(BulkLoaderX.LOG_Terms, logMsg);
// ** Exit process
System.exit(exitCode);
} else
BulkLoaderX.LOG_Terms.info("Sort finished");
// I/O Stream toSortOutputStream and fromSortInputStream closed by
// their users - step 1 and step 3.
} catch (InterruptedException e) {
BulkLoaderX.LOG_Nodes.error("Failed to cleanly wait-for the subprocess");
throw new RuntimeException(e);
}
BulkLoaderX.waitFor(thread1);
BulkLoaderX.waitFor(thread3);
return Pair.create(countParseTicks.get(), countIndexedNodes.get());
}
Aggregations