use of com.baidu.hugegraph.computer.core.store.file.hgkvfile.HgkvDir in project hugegraph-computer by hugegraph.
the class HgkvDirTest method testHgkvDirBuilder.
@Test
public void testHgkvDirBuilder() throws IOException {
// The data must be ordered
List<Integer> data = ImmutableList.of(2, 3, 2, 1, 5, 2, 5, 9, 6, 2);
List<KvEntry> kvEntries = StoreTestUtil.kvEntriesFromMap(data);
String path = StoreTestUtil.availablePathById("1");
try (KvEntryFileWriter builder = new HgkvDirBuilderImpl(CONFIG, path)) {
for (KvEntry entry : kvEntries) {
builder.write(entry);
}
builder.finish();
// Open the file and determine the footer is as expected
HgkvDir dir = HgkvDirImpl.open(path);
Assert.assertEquals(HgkvFileImpl.MAGIC, dir.magic());
String version = HgkvDirImpl.MAJOR_VERSION + "." + HgkvDirImpl.MINOR_VERSION;
Assert.assertEquals(version, dir.version());
Assert.assertEquals(5, dir.numEntries());
int maxKey = StoreTestUtil.byteArrayToInt(dir.max());
Assert.assertEquals(6, maxKey);
int minKey = StoreTestUtil.byteArrayToInt(dir.min());
Assert.assertEquals(2, minKey);
} finally {
FileUtils.deleteQuietly(new File(path));
}
}
use of com.baidu.hugegraph.computer.core.store.file.hgkvfile.HgkvDir in project hugegraph-computer by hugegraph.
the class SortLargeDataTest method sumOfEntryValue.
private static long sumOfEntryValue(Sorter sorter, List<String> files) throws Exception {
long entrySize = 0L;
for (String file : files) {
HgkvDir dir = HgkvDirImpl.open(file);
entrySize += dir.numEntries();
}
LOG.info("Finally kvEntry size: {}", entrySize);
try (PeekableIterator<KvEntry> iterator = sorter.iterator(files, false)) {
long result = 0;
while (iterator.hasNext()) {
KvEntry next = iterator.next();
result += StoreTestUtil.dataFromPointer(next.value());
}
return result;
}
}
use of com.baidu.hugegraph.computer.core.store.file.hgkvfile.HgkvDir in project hugegraph-computer by hugegraph.
the class SorterTest method testSortSubKvBuffers.
@Test
public void testSortSubKvBuffers() throws Exception {
Config config = UnitTestBase.updateWithRequiredOptions(ComputerOptions.INPUT_MAX_EDGES_IN_ONE_VERTEX, "2", ComputerOptions.TRANSPORT_RECV_FILE_MODE, "false");
int flushThreshold = config.get(ComputerOptions.INPUT_MAX_EDGES_IN_ONE_VERTEX);
BytesInput i1 = this.sortedSubKvBuffer(config);
BytesInput i2 = this.sortedSubKvBuffer(config);
BytesInput i3 = this.sortedSubKvBuffer(config);
List<RandomAccessInput> buffers = ImmutableList.of(i1, i2, i3);
Sorter sorter = SorterTestUtil.createSorter(config);
PointerCombiner combiner = SorterTestUtil.createPointerCombiner(IntValue::new, new IntValueSumCombiner());
OuterSortFlusher flusher = new CombineSubKvOuterSortFlusher(combiner, flushThreshold);
flusher.sources(buffers.size());
String outputFile = StoreTestUtil.availablePathById("1");
sorter.mergeBuffers(buffers, flusher, outputFile, true);
/*
* Assert result
* key 1 subKv 3 3, 5 3
* key 2 subKv 5 3, 8 6
* key 2 subKv 9 3
* key 3 subKv 2 6, 3 3
* key 3 subKv 4 3
*/
ImmutableList<String> outputs = ImmutableList.of(outputFile);
Iterator<KvEntry> kvIter = sorter.iterator(outputs, true);
SorterTestUtil.assertSubKvByKv(kvIter.next(), 1, 3, 3, 5, 3);
SorterTestUtil.assertSubKvByKv(kvIter.next(), 2, 5, 3, 8, 6);
SorterTestUtil.assertSubKvByKv(kvIter.next(), 2, 9, 3);
SorterTestUtil.assertSubKvByKv(kvIter.next(), 3, 2, 6, 3, 3);
SorterTestUtil.assertSubKvByKv(kvIter.next(), 3, 4, 3);
// Assert file properties
HgkvDir dir = HgkvDirImpl.open(outputFile);
Assert.assertEquals(5, dir.numEntries());
Assert.assertEquals(8, dir.numSubEntries());
}
use of com.baidu.hugegraph.computer.core.store.file.hgkvfile.HgkvDir in project hugegraph-computer by hugegraph.
the class DisperseEvenlySelector method selectedByHgkvFile.
@Override
public List<SelectedFiles> selectedByHgkvFile(List<String> inputs, List<String> outputs) throws IOException {
E.checkArgument(inputs.size() >= outputs.size(), "The inputs size of InputFilesSelector must be >= " + "outputs size, but got %s inputs < %s outputs", inputs.size(), outputs.size());
List<HgkvDir> inputDirs = new ArrayList<>();
for (String input : inputs) {
inputDirs.add(HgkvDirImpl.open(input));
}
/*
* Reverse sort by entries number first
* will make the disperse result more uniform
*/
inputDirs = inputDirs.stream().sorted(Comparator.comparingLong(HgkvFile::numEntries).reversed()).collect(Collectors.toList());
// Init heap data
List<Node> heapNodes = new ArrayList<>(outputs.size());
int i = 0;
for (; i < outputs.size(); i++) {
HgkvDir inputDir = inputDirs.get(i);
Node heapNode = new Node(inputDir.numEntries(), Lists.newArrayList(inputDir.path()), outputs.get(i));
heapNodes.add(heapNode);
}
Heap<Node> heap = new Heap<>(heapNodes, Comparator.comparingLong(Node::num));
// Distribute the remaining input
for (; i < inputDirs.size(); i++) {
HgkvDir inputDir = inputDirs.get(i);
Node topNode = heap.top();
topNode.addInput(inputDir.path());
topNode.addNum(inputDir.numEntries());
heap.adjust(0);
}
List<SelectedFiles> results = new ArrayList<>();
for (Node node : heapNodes) {
SelectedFiles result = new DefaultSelectedFiles(node.output(), node.inputs());
results.add(result);
}
return results;
}
Aggregations