use of org.apache.jena.dboe.base.file.BufferChannel in project jena by apache.
the class TestStateMgrData method state_data_02.
@Test
public void state_data_02() {
BufferChannel x = FileFactory.createBufferChannelMem();
long[] data = { 2, 3 };
StateMgrDataIdx sm = new StateMgrDataIdx(x, data);
sm.writeState();
sm.set(1, 99L);
sm.writeState();
ByteBuffer bb = ByteBuffer.allocate(2 * Long.BYTES);
x.read(bb, 0);
assertEquals(99L, bb.getLong(Long.BYTES));
}
use of org.apache.jena.dboe.base.file.BufferChannel in project jena by apache.
the class TestStateMgrData method state_data_03.
@Test
public void state_data_03() {
BufferChannel x = FileFactory.createBufferChannelMem();
{
ByteBuffer bb = ByteBuffer.allocate(Long.BYTES);
bb.putLong(0, -8888);
bb.rewind();
x.write(bb);
bb.putLong(0, -1234);
bb.rewind();
x.write(bb);
x.sync();
}
long[] data = { 2, 3 };
StateMgrDataIdx sm = new StateMgrDataIdx(x, data);
assertEquals(-8888L, sm.get(0));
assertEquals(-1234L, sm.get(1));
}
use of org.apache.jena.dboe.base.file.BufferChannel in project jena by apache.
the class ProcBuildNodeTableX method exec2.
/**
* Pair<triples, indexed nodes>
* @param sortThreads
*/
// [BULK] Output, not return.
private static Pair<Long, Long> exec2(String DB, XLoaderFiles loaderFiles, int sortThreads, String sortNodeTableArgs, List<String> datafiles) {
// Threads - 1 parser, 1 builder, 2 sort.
// Steps:
// 1 - parser to and pipe terms to sort
// 2 - sort
// 3 - build node table from unique sort
IRIProvider provider = SystemIRIx.getProvider();
// SystemIRIx.setProvider(new IRIProviderAny());
DatasetGraph dsg = DatabaseMgr.connectDatasetGraph(DB);
DatasetGraphTDB dsgtdb = TDBInternal.getDatasetGraphTDB(dsg);
NodeTable nt = dsgtdb.getTripleTable().getNodeTupleTable().getNodeTable();
NodeTableTRDF nodeTable = (NodeTableTRDF) nt.baseNodeTable();
OutputStream toSortOutputStream;
InputStream fromSortInputStream;
if (sortThreads <= 0)
sortThreads = 2;
// ** Step 2: The sort
Process procSort;
try {
// LOG.info("Step : external sort");
// Mutable list.
List<String> sortCmd = new ArrayList<>(Arrays.asList("sort", "--temporary-directory=" + loaderFiles.TMPDIR, "--buffer-size=50%", "--parallel=" + sortThreads, "--unique", "--key=1,1"));
if (BulkLoaderX.CompressSortNodeTableFiles)
sortCmd.add("--compress-program=" + BulkLoaderX.gzipProgram());
// if ( sortNodeTableArgs != null ) {}
ProcessBuilder pb2 = new ProcessBuilder(sortCmd);
pb2.environment().put("LC_ALL", "C");
procSort = pb2.start();
// To process.
// Let the writer close it.
toSortOutputStream = procSort.getOutputStream();
// From process to the tree builder.
// Let the reader side close it.
fromSortInputStream = procSort.getInputStream();
// // Debug sort process.
// InputStream fromSortErrortStream = proc2.getErrorStream();
// IOUtils.copy(fromSortErrortStream, System.err);
} catch (Exception ex) {
throw new RuntimeException(ex);
}
// ** Step 1 : write intermediate file (hash, thrift bytes).
AtomicLong countParseTicks = new AtomicLong(-1);
AtomicLong countIndexedNodes = new AtomicLong(-1);
long tickPoint = BulkLoaderX.DataTick;
int superTick = BulkLoaderX.DataSuperTick;
Runnable task1 = () -> {
ProgressMonitorOutput monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Nodes, "Nodes", tickPoint, superTick);
OutputStream output = IO.ensureBuffered(toSortOutputStream);
// Counting.
StreamRDF worker = new NodeHashTmpStream(output);
ProgressStreamRDF stream = new ProgressStreamRDF(worker, monitor);
monitor.start();
String label = monitor.getLabel();
datafiles.forEach(datafile -> {
String basename = FileOps.basename(datafile);
monitor.setLabel(basename);
stream.start();
RDFParser.source(datafile).parse(stream);
stream.finish();
});
monitor.finish();
monitor.setLabel(label);
IO.flush(output);
IO.close(output);
long x = monitor.getTime();
// long x = timer.endTimer();
long count = monitor.getTicks();
countParseTicks.set(count);
double xSec = x / 1000.0;
double rate = count / xSec;
FmtLog.info(BulkLoaderX.LOG_Nodes, "%s Parse (nodes): %s seconds : %,d triples/quads %,.0f TPS", BulkLoaderX.StageMarker, Timer.timeStr(x), count, rate);
};
// [BULK] XXX AsyncParser.asyncParse(files, output)
Thread thread1 = async(task1, "AsyncParser");
// Step3: build node table.
Runnable task3 = () -> {
Timer timer = new Timer();
// Don't start timer until sort send something
// Process stream are already buffered.
InputStream input = IO.ensureBuffered(fromSortInputStream);
FileSet fileSet = new FileSet(dsgtdb.getLocation(), Names.nodeTableBaseName);
BufferChannel blkState = FileFactory.createBufferChannel(fileSet, Names.extBptState);
long idxTickPoint = BulkLoaderX.DataTick;
int idxSuperTick = BulkLoaderX.DataSuperTick;
ProgressMonitorOutput monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Terms, "Index", idxTickPoint, idxSuperTick);
// Library of tools!
dsg.executeWrite(() -> {
BinaryDataFile objectFile = nodeTable.getData();
Iterator<Record> rIter = records(BulkLoaderX.LOG_Terms, input, objectFile);
rIter = new ProgressIterator<>(rIter, monitor);
// Record of (hash, nodeId)
BPlusTree bpt1 = (BPlusTree) (nodeTable.getIndex());
BPlusTreeParams bptParams = bpt1.getParams();
RecordFactory factory = new RecordFactory(SystemTDB.LenNodeHash, NodeId.SIZE);
// Wait until something has been received from the sort step
rIter.hasNext();
monitor.start();
// .. then start the timer. It is closed after the transaction finishes.
timer.startTimer();
BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(rIter, bptParams, factory, blkState, bpt1.getNodeManager().getBlockMgr(), bpt1.getRecordsMgr().getBlockMgr());
bpt2.sync();
bpt1.sync();
objectFile.sync();
monitor.finish();
});
blkState.sync();
IO.close(input);
long x = timer.endTimer();
long count = monitor.getTicks();
countIndexedNodes.set(count);
String rateStr = BulkLoaderX.rateStr(count, x);
FmtLog.info(BulkLoaderX.LOG_Terms, "%s Index terms: %s seconds : %,d indexed RDF terms : %s PerSecond", BulkLoaderX.StageMarker, Timer.timeStr(x), count, rateStr);
};
Thread thread3 = async(task3, "AsyncBuild");
try {
int exitCode = procSort.waitFor();
if (exitCode != 0) {
String msg = IO.readWholeFileAsUTF8(procSort.getErrorStream());
String logMsg = String.format("Sort RC = %d : Error: %s", exitCode, msg);
Log.error(BulkLoaderX.LOG_Terms, logMsg);
// ** Exit process
System.exit(exitCode);
} else
BulkLoaderX.LOG_Terms.info("Sort finished");
// I/O Stream toSortOutputStream and fromSortInputStream closed by
// their users - step 1 and step 3.
} catch (InterruptedException e) {
BulkLoaderX.LOG_Nodes.error("Failed to cleanly wait-for the subprocess");
throw new RuntimeException(e);
}
BulkLoaderX.waitFor(thread1);
BulkLoaderX.waitFor(thread3);
return Pair.create(countParseTicks.get(), countIndexedNodes.get());
}
use of org.apache.jena.dboe.base.file.BufferChannel in project jena by apache.
the class TestStateMgrData method state_data_01.
@Test
public void state_data_01() {
BufferChannel x = FileFactory.createBufferChannelMem();
long[] data = { 2, 3 };
StateMgrDataIdx sm = new StateMgrDataIdx(x, data);
assertEquals(data.length, sm.getData().length);
assertEquals(2L, sm.get(0));
assertEquals(3L, sm.get(1));
// Test initial state written
ByteBuffer bb = ByteBuffer.allocate(2 * Long.BYTES);
x.read(bb, 0);
assertEquals(2L, bb.getLong(0));
assertEquals(3L, bb.getLong(Long.BYTES));
}
use of org.apache.jena.dboe.base.file.BufferChannel in project jena by apache.
the class ProcBuildIndexX method indexBuilder.
private static long indexBuilder(DatasetGraph dsg, InputStream input, String indexName) {
long tickPoint = BulkLoaderX.DataTick;
int superTick = BulkLoaderX.DataSuperTick;
// Location of storage, not the DB.
DatasetGraphTDB dsgtdb = TDBInternal.getDatasetGraphTDB(dsg);
Location location = dsgtdb.getLocation();
int keyLength = SystemTDB.SizeOfNodeId * indexName.length();
int valueLength = 0;
// The name is the order.
String primary = indexName;
String primaryOrder;
int dftKeyLength;
int dftValueLength;
int tupleLength = indexName.length();
TupleIndex index;
if (tupleLength == 3) {
primaryOrder = Names.primaryIndexTriples;
dftKeyLength = SystemTDB.LenIndexTripleRecord;
dftValueLength = 0;
// Find index.
index = findIndex(dsgtdb.getTripleTable().getNodeTupleTable().getTupleTable().getIndexes(), indexName);
} else if (tupleLength == 4) {
primaryOrder = Names.primaryIndexQuads;
dftKeyLength = SystemTDB.LenIndexQuadRecord;
dftValueLength = 0;
index = findIndex(dsgtdb.getQuadTable().getNodeTupleTable().getTupleTable().getIndexes(), indexName);
} else {
throw new TDBException("Index name: " + indexName);
}
TupleMap colMap = TupleMap.create(primaryOrder, indexName);
int readCacheSize = 10;
int writeCacheSize = 100;
int blockSize = SystemTDB.BlockSize;
RecordFactory recordFactory = new RecordFactory(dftKeyLength, dftValueLength);
int order = BPlusTreeParams.calcOrder(blockSize, recordFactory);
BPlusTreeParams bptParams = new BPlusTreeParams(order, recordFactory);
int blockSizeNodes = blockSize;
int blockSizeRecords = blockSize;
FileSet destination = new FileSet(location, indexName);
BufferChannel blkState = FileFactory.createBufferChannel(destination, Names.extBptState);
BlockMgr blkMgrNodes = BlockMgrFactory.create(destination, Names.extBptTree, blockSizeNodes, readCacheSize, writeCacheSize);
BlockMgr blkMgrRecords = BlockMgrFactory.create(destination, Names.extBptRecords, blockSizeRecords, readCacheSize, writeCacheSize);
int rowBlock = 1000;
Iterator<Record> iter = new RecordsFromInput(input, tupleLength, colMap, rowBlock);
// ProgressMonitor.
ProgressMonitor monitor = ProgressMonitorOutput.create(BulkLoaderX.LOG_Index, indexName, tickPoint, superTick);
ProgressIterator<Record> iter2 = new ProgressIterator<>(iter, monitor);
monitor.start();
BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(iter2, bptParams, recordFactory, blkState, blkMgrNodes, blkMgrRecords);
bpt2.close();
monitor.finish();
// [BULK] End stage.
long count = monitor.getTicks();
return count;
}
Aggregations