use of org.apache.jena.dboe.base.record.Record in project jena by apache.
the class TestBPlusTreeRewriterNonTxn method findComparison.
public static void findComparison(List<Record> originaldata, BPlusTree bpt2) {
Iterator<Record> iter1 = originaldata.iterator();
long count = 0;
for (; iter1.hasNext(); ) {
count++;
Record r1 = iter1.next();
Record r3 = bpt2.find(r1);
if (r3 == null) {
r3 = bpt2.find(r1);
error("Deviation in find at record %d: %s : null", count, r1);
}
if (!Record.equals(r1, r3))
error("Deviation in find at record %d: %s : %s", count, r1, r3);
}
}
use of org.apache.jena.dboe.base.record.Record in project jena by apache.
the class ProcBuildNodeTableX method exec2.
/**
* Pair<triples, indexed nodes>
* @param sortThreads
*/
// [BULK] Output, not return.
private static Pair<Long, Long> exec2(String DB, XLoaderFiles loaderFiles, int sortThreads, String sortNodeTableArgs, List<String> datafiles) {
// Threads - 1 parser, 1 builder, 2 sort.
// Steps:
// 1 - parser to and pipe terms to sort
// 2 - sort
// 3 - build node table from unique sort
IRIProvider provider = SystemIRIx.getProvider();
// SystemIRIx.setProvider(new IRIProviderAny());
DatasetGraph dsg = DatabaseMgr.connectDatasetGraph(DB);
DatasetGraphTDB dsgtdb = TDBInternal.getDatasetGraphTDB(dsg);
NodeTable nt = dsgtdb.getTripleTable().getNodeTupleTable().getNodeTable();
NodeTableTRDF nodeTable = (NodeTableTRDF) nt.baseNodeTable();
OutputStream toSortOutputStream;
InputStream fromSortInputStream;
if (sortThreads <= 0)
sortThreads = 2;
// ** Step 2: The sort
Process procSort;
try {
// LOG.info("Step : external sort");
// Mutable list.
List<String> sortCmd = new ArrayList<>(Arrays.asList("sort", "--temporary-directory=" + loaderFiles.TMPDIR, "--buffer-size=50%", "--parallel=" + sortThreads, "--unique", "--key=1,1"));
if (BulkLoaderX.CompressSortNodeTableFiles)
sortCmd.add("--compress-program=" + BulkLoaderX.gzipProgram());
// if ( sortNodeTableArgs != null ) {}
ProcessBuilder pb2 = new ProcessBuilder(sortCmd);
pb2.environment().put("LC_ALL", "C");
procSort = pb2.start();
// To process.
// Let the writer close it.
toSortOutputStream = procSort.getOutputStream();
// From process to the tree builder.
// Let the reader side close it.
fromSortInputStream = procSort.getInputStream();
// // Debug sort process.
// InputStream fromSortErrortStream = proc2.getErrorStream();
// IOUtils.copy(fromSortErrortStream, System.err);
} catch (Exception ex) {
throw new RuntimeException(ex);
}
// ** Step 1 : write intermediate file (hash, thrift bytes).
AtomicLong countParseTicks = new AtomicLong(-1);
AtomicLong countIndexedNodes = new AtomicLong(-1);
long tickPoint = BulkLoaderX.DataTick;
int superTick = BulkLoaderX.DataSuperTick;
Runnable task1 = () -> {
ProgressMonitorOutput monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Nodes, "Nodes", tickPoint, superTick);
OutputStream output = IO.ensureBuffered(toSortOutputStream);
// Counting.
StreamRDF worker = new NodeHashTmpStream(output);
ProgressStreamRDF stream = new ProgressStreamRDF(worker, monitor);
monitor.start();
String label = monitor.getLabel();
datafiles.forEach(datafile -> {
String basename = FileOps.basename(datafile);
monitor.setLabel(basename);
stream.start();
RDFParser.source(datafile).parse(stream);
stream.finish();
});
monitor.finish();
monitor.setLabel(label);
IO.flush(output);
IO.close(output);
long x = monitor.getTime();
// long x = timer.endTimer();
long count = monitor.getTicks();
countParseTicks.set(count);
double xSec = x / 1000.0;
double rate = count / xSec;
FmtLog.info(BulkLoaderX.LOG_Nodes, "%s Parse (nodes): %s seconds : %,d triples/quads %,.0f TPS", BulkLoaderX.StageMarker, Timer.timeStr(x), count, rate);
};
// [BULK] XXX AsyncParser.asyncParse(files, output)
Thread thread1 = async(task1, "AsyncParser");
// Step3: build node table.
Runnable task3 = () -> {
Timer timer = new Timer();
// Don't start timer until sort send something
// Process stream are already buffered.
InputStream input = IO.ensureBuffered(fromSortInputStream);
FileSet fileSet = new FileSet(dsgtdb.getLocation(), Names.nodeTableBaseName);
BufferChannel blkState = FileFactory.createBufferChannel(fileSet, Names.extBptState);
long idxTickPoint = BulkLoaderX.DataTick;
int idxSuperTick = BulkLoaderX.DataSuperTick;
ProgressMonitorOutput monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Terms, "Index", idxTickPoint, idxSuperTick);
// Library of tools!
dsg.executeWrite(() -> {
BinaryDataFile objectFile = nodeTable.getData();
Iterator<Record> rIter = records(BulkLoaderX.LOG_Terms, input, objectFile);
rIter = new ProgressIterator<>(rIter, monitor);
// Record of (hash, nodeId)
BPlusTree bpt1 = (BPlusTree) (nodeTable.getIndex());
BPlusTreeParams bptParams = bpt1.getParams();
RecordFactory factory = new RecordFactory(SystemTDB.LenNodeHash, NodeId.SIZE);
// Wait until something has been received from the sort step
rIter.hasNext();
monitor.start();
// .. then start the timer. It is closed after the transaction finishes.
timer.startTimer();
BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(rIter, bptParams, factory, blkState, bpt1.getNodeManager().getBlockMgr(), bpt1.getRecordsMgr().getBlockMgr());
bpt2.sync();
bpt1.sync();
objectFile.sync();
monitor.finish();
});
blkState.sync();
IO.close(input);
long x = timer.endTimer();
long count = monitor.getTicks();
countIndexedNodes.set(count);
String rateStr = BulkLoaderX.rateStr(count, x);
FmtLog.info(BulkLoaderX.LOG_Terms, "%s Index terms: %s seconds : %,d indexed RDF terms : %s PerSecond", BulkLoaderX.StageMarker, Timer.timeStr(x), count, rateStr);
};
Thread thread3 = async(task3, "AsyncBuild");
try {
int exitCode = procSort.waitFor();
if (exitCode != 0) {
String msg = IO.readWholeFileAsUTF8(procSort.getErrorStream());
String logMsg = String.format("Sort RC = %d : Error: %s", exitCode, msg);
Log.error(BulkLoaderX.LOG_Terms, logMsg);
// ** Exit process
System.exit(exitCode);
} else
BulkLoaderX.LOG_Terms.info("Sort finished");
// I/O Stream toSortOutputStream and fromSortInputStream closed by
// their users - step 1 and step 3.
} catch (InterruptedException e) {
BulkLoaderX.LOG_Nodes.error("Failed to cleanly wait-for the subprocess");
throw new RuntimeException(e);
}
BulkLoaderX.waitFor(thread1);
BulkLoaderX.waitFor(thread3);
return Pair.create(countParseTicks.get(), countIndexedNodes.get());
}
use of org.apache.jena.dboe.base.record.Record in project jena by apache.
the class NodeTableNative method accessIndex.
protected final NodeId accessIndex(Node node, boolean create) {
Hash hash = new Hash(nodeHashToId.getRecordFactory().keyLength());
NodeLib.setHash(hash, node);
byte[] k = hash.getBytes();
// Key only.
Record r = nodeHashToId.getRecordFactory().create(k);
synchronized (// Pair to readNodeFromTable.
this) {
// Key and value, or null
Record r2 = nodeHashToId.find(r);
if (r2 != null) {
// Found. Get the NodeId.
NodeId id = NodeIdFactory.get(r2.getValue(), 0);
return id;
}
// Not found.
if (!create)
return NodeId.NodeDoesNotExist;
// Write the node, which allocates an id for it.
syncNeeded = true;
NodeId id = writeNodeToTable(node);
// Update the r record with the new id.
// r.value := id bytes;
NodeIdFactory.set(id, r.getValue(), 0);
// Put in index - may appear because of concurrency
if (!nodeHashToId.insert(r))
throw new TDBException("NodeTableBase::nodeToId - record mysteriously appeared");
return id;
}
}
use of org.apache.jena.dboe.base.record.Record in project jena by apache.
the class RecordsFromInput method next.
@Override
public Record next() {
if (!hasNext())
throw new NoSuchElementException();
Record r = slot;
slot = null;
return r;
}
use of org.apache.jena.dboe.base.record.Record in project jena by apache.
the class RecordsFromInput method hasNext.
@Override
public boolean hasNext() {
if (finished)
return false;
if (slot != null)
return true;
if (idx == -1 || idx == buffer.length) {
len = fill();
if (len == -1) {
finished = true;
return false;
}
idx = 0;
}
// Fill one slot.
Record record = recordFactory.create();
// System.out.print("In: ");
for (int i = 0; i < itemsPerRow; i++) {
long x = Hex.getLong(buffer, idx);
idx += 16;
// Separator or end-of-line.
idx++;
int j = (colMap == null) ? i : colMap.putSlotIdx(i);
int recordOffset = j * SysDB.SizeOfLong;
Bytes.setLong(x, record.getKey(), recordOffset);
// System.out.printf("%016X ", x);
}
// Buffer all processed.
if (idx >= len)
idx = -1;
slot = record;
return true;
}
Aggregations