use of java.util.function.IntFunction in project neo4j by neo4j.
the class ServerGroupsIT method shouldUpdateGroupsOnStart.
@Test
public void shouldUpdateGroupsOnStart() throws Exception {
AtomicReference<String> suffix = new AtomicReference<>("before");
List<List<String>> expected;
Map<String, IntFunction<String>> instanceCoreParams = new HashMap<>();
instanceCoreParams.put(CausalClusteringSettings.server_groups.name(), (id) -> String.join(", ", makeCoreGroups(suffix.get(), id)));
Map<String, IntFunction<String>> instanceReplicaParams = new HashMap<>();
instanceReplicaParams.put(CausalClusteringSettings.server_groups.name(), (id) -> String.join(", ", makeReplicaGroups(suffix.get(), id)));
int nServers = 3;
cluster = new Cluster(testDir.directory("cluster"), nServers, nServers, new HazelcastDiscoveryServiceFactory(), emptyMap(), instanceCoreParams, emptyMap(), instanceReplicaParams, Standard.LATEST_NAME);
// when
cluster.start();
// then
expected = new ArrayList<>();
for (CoreClusterMember core : cluster.coreMembers()) {
expected.add(makeCoreGroups(suffix.get(), core.serverId()));
expected.add(makeReplicaGroups(suffix.get(), core.serverId()));
}
for (CoreClusterMember core : cluster.coreMembers()) {
assertEventually(core + " should have groups", () -> getServerGroups(core.database()), new GroupsMatcher(expected), 30, SECONDS);
}
// when
expected.remove(makeCoreGroups(suffix.get(), 1));
expected.remove(makeReplicaGroups(suffix.get(), 2));
cluster.getCoreMemberById(1).shutdown();
cluster.getReadReplicaById(2).shutdown();
// should update groups of restarted servers
suffix.set("after");
cluster.addCoreMemberWithId(1).start();
cluster.addReadReplicaWithId(2).start();
expected.add(makeCoreGroups(suffix.get(), 1));
expected.add(makeReplicaGroups(suffix.get(), 2));
// then
for (CoreClusterMember core : cluster.coreMembers()) {
assertEventually(core + " should have groups", () -> getServerGroups(core.database()), new GroupsMatcher(expected), 30, SECONDS);
}
}
use of java.util.function.IntFunction in project neo4j by neo4j.
the class BackupStoreCopyInteractionStressTesting method shouldBehaveCorrectlyUnderStress.
@Test
public void shouldBehaveCorrectlyUnderStress() throws Exception {
int numberOfCores = parseInt(fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_NUMBER_OF_CORES", DEFAULT_NUMBER_OF_CORES));
int numberOfEdges = parseInt(fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_NUMBER_OF_EDGES", DEFAULT_NUMBER_OF_EDGES));
long durationInMinutes = parseLong(fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_DURATION", DEFAULT_DURATION_IN_MINUTES));
String workingDirectory = fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_WORKING_DIRECTORY", DEFAULT_WORKING_DIR);
int baseCoreBackupPort = parseInt(fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_BASE_CORE_BACKUP_PORT", DEFAULT_BASE_CORE_BACKUP_PORT));
int baseEdgeBackupPort = parseInt(fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_BASE_EDGE_BACKUP_PORT", DEFAULT_BASE_EDGE_BACKUP_PORT));
boolean enableIndexes = parseBoolean(fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_ENABLE_INDEXES", DEFAULT_ENABLE_INDEXES));
String txPrune = fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_TX_PRUNE", DEFAULT_TX_PRUNE);
File clusterDirectory = ensureExistsAndEmpty(new File(workingDirectory, "cluster"));
File backupDirectory = ensureExistsAndEmpty(new File(workingDirectory, "backups"));
BiFunction<Boolean, Integer, SocketAddress> backupAddress = (isCore, id) -> new AdvertisedSocketAddress("localhost", (isCore ? baseCoreBackupPort : baseEdgeBackupPort) + id);
Map<String, String> coreParams = enableRaftMessageLogging(configureRaftLogRotationAndPruning(configureTxLogRotationAndPruning(new HashMap<>(), txPrune)));
Map<String, String> readReplicaParams = configureTxLogRotationAndPruning(new HashMap<>(), txPrune);
Map<String, IntFunction<String>> instanceCoreParams = configureBackup(new HashMap<>(), id -> backupAddress.apply(true, id));
Map<String, IntFunction<String>> instanceReadReplicaParams = configureBackup(new HashMap<>(), id -> backupAddress.apply(false, id));
HazelcastDiscoveryServiceFactory discoveryServiceFactory = new HazelcastDiscoveryServiceFactory();
Cluster cluster = new Cluster(clusterDirectory, numberOfCores, numberOfEdges, discoveryServiceFactory, coreParams, instanceCoreParams, readReplicaParams, instanceReadReplicaParams, Standard.LATEST_NAME);
AtomicBoolean stopTheWorld = new AtomicBoolean();
BooleanSupplier notExpired = untilTimeExpired(durationInMinutes, MINUTES);
BooleanSupplier keepGoing = () -> !stopTheWorld.get() && notExpired.getAsBoolean();
Runnable onFailure = () -> stopTheWorld.set(true);
ExecutorService service = Executors.newFixedThreadPool(3);
try {
cluster.start();
if (enableIndexes) {
Workload.setupIndexes(cluster);
}
Future<Throwable> workload = service.submit(new Workload(keepGoing, onFailure, cluster));
Future<Throwable> startStopWorker = service.submit(new StartStopLoad(fs, pageCache, keepGoing, onFailure, cluster, numberOfCores, numberOfEdges));
Future<Throwable> backupWorker = service.submit(new BackupLoad(keepGoing, onFailure, cluster, numberOfCores, numberOfEdges, backupDirectory, backupAddress));
long timeout = durationInMinutes + 5;
assertNull(Exceptions.stringify(workload.get()), workload.get(timeout, MINUTES));
assertNull(Exceptions.stringify(startStopWorker.get()), startStopWorker.get(timeout, MINUTES));
assertNull(Exceptions.stringify(backupWorker.get()), backupWorker.get(timeout, MINUTES));
} finally {
cluster.shutdown();
service.shutdown();
}
// let's cleanup disk space when everything went well
FileUtils.deleteRecursively(clusterDirectory);
FileUtils.deleteRecursively(backupDirectory);
}
use of java.util.function.IntFunction in project lucene-solr by apache.
the class BKDWriter method build.
/** The array (sized numDims) of PathSlice describe the cell we have currently recursed to.
/* This method is used when we are merging previously written segments, in the numDims > 1 case. */
private void build(int nodeID, int leafNodeOffset, PathSlice[] slices, LongBitSet ordBitSet, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, int[] parentSplits, byte[] splitPackedValues, long[] leafBlockFPs, List<Closeable> toCloseHeroically) throws IOException {
for (PathSlice slice : slices) {
assert slice.count == slices[0].count;
}
if (numDims == 1 && slices[0].writer instanceof OfflinePointWriter && slices[0].count <= maxPointsSortInHeap) {
// Special case for 1D, to cutover to heap once we recurse deeply enough:
slices[0] = switchToHeap(slices[0], toCloseHeroically);
}
if (nodeID >= leafNodeOffset) {
// Leaf node: write block
// We can write the block in any order so by default we write it sorted by the dimension that has the
// least number of unique bytes at commonPrefixLengths[dim], which makes compression more efficient
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
for (int dim = 0; dim < numDims; dim++) {
if (slices[dim].writer instanceof HeapPointWriter == false) {
// Adversarial cases can cause this, e.g. very lopsided data, all equal points, such that we started
// offline, but then kept splitting only in one dimension, and so never had to rewrite into heap writer
slices[dim] = switchToHeap(slices[dim], toCloseHeroically);
}
PathSlice source = slices[dim];
HeapPointWriter heapSource = (HeapPointWriter) source.writer;
// Find common prefix by comparing first and last values, already sorted in this dimension:
heapSource.readPackedValue(Math.toIntExact(source.start), scratch1);
heapSource.readPackedValue(Math.toIntExact(source.start + source.count - 1), scratch2);
int offset = dim * bytesPerDim;
commonPrefixLengths[dim] = bytesPerDim;
for (int j = 0; j < bytesPerDim; j++) {
if (scratch1[offset + j] != scratch2[offset + j]) {
commonPrefixLengths[dim] = j;
break;
}
}
int prefix = commonPrefixLengths[dim];
if (prefix < bytesPerDim) {
int cardinality = 1;
byte previous = scratch1[offset + prefix];
for (long i = 1; i < source.count; ++i) {
heapSource.readPackedValue(Math.toIntExact(source.start + i), scratch2);
byte b = scratch2[offset + prefix];
assert Byte.toUnsignedInt(previous) <= Byte.toUnsignedInt(b);
if (b != previous) {
cardinality++;
previous = b;
}
}
assert cardinality <= 256;
if (cardinality < sortedDimCardinality) {
sortedDim = dim;
sortedDimCardinality = cardinality;
}
}
}
PathSlice source = slices[sortedDim];
// We ensured that maxPointsSortInHeap was >= maxPointsInLeafNode, so we better be in heap at this point:
HeapPointWriter heapSource = (HeapPointWriter) source.writer;
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
//System.out.println(" write leaf block @ fp=" + out.getFilePointer());
// Write docIDs first, as their own chunk, so that at intersect time we can add all docIDs w/o
// loading the values:
int count = Math.toIntExact(source.count);
assert count > 0 : "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
writeLeafBlockDocs(out, heapSource.docIDs, Math.toIntExact(source.start), count);
// TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
// from the index, much like how terms dict does so from the FST:
// Write the common prefixes:
writeCommonPrefixes(out, commonPrefixLengths, scratch1);
// Write the full values:
IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
final BytesRef scratch = new BytesRef();
{
scratch.length = packedBytesLength;
}
@Override
public BytesRef apply(int i) {
heapSource.getPackedValueSlice(Math.toIntExact(source.start + i), scratch);
return scratch;
}
};
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues, heapSource.docIDs, Math.toIntExact(source.start));
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
} else {
// Inner node: partition/recurse
int splitDim;
if (numDims > 1) {
splitDim = split(minPackedValue, maxPackedValue, parentSplits);
} else {
splitDim = 0;
}
PathSlice source = slices[splitDim];
assert nodeID < splitPackedValues.length : "nodeID=" + nodeID + " splitValues.length=" + splitPackedValues.length;
// How many points will be in the left tree:
long rightCount = source.count / 2;
long leftCount = source.count - rightCount;
byte[] splitValue = markRightTree(rightCount, splitDim, source, ordBitSet);
int address = nodeID * (1 + bytesPerDim);
splitPackedValues[address] = (byte) splitDim;
System.arraycopy(splitValue, 0, splitPackedValues, address + 1, bytesPerDim);
// Partition all PathSlice that are not the split dim into sorted left and right sets, so we can recurse:
PathSlice[] leftSlices = new PathSlice[numDims];
PathSlice[] rightSlices = new PathSlice[numDims];
byte[] minSplitPackedValue = new byte[packedBytesLength];
System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, packedBytesLength);
byte[] maxSplitPackedValue = new byte[packedBytesLength];
System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, packedBytesLength);
// When we are on this dim, below, we clear the ordBitSet:
int dimToClear;
if (numDims - 1 == splitDim) {
dimToClear = numDims - 2;
} else {
dimToClear = numDims - 1;
}
for (int dim = 0; dim < numDims; dim++) {
if (dim == splitDim) {
// No need to partition on this dim since it's a simple slice of the incoming already sorted slice, and we
// will re-use its shared reader when visiting it as we recurse:
leftSlices[dim] = new PathSlice(source.writer, source.start, leftCount);
rightSlices[dim] = new PathSlice(source.writer, source.start + leftCount, rightCount);
System.arraycopy(splitValue, 0, minSplitPackedValue, dim * bytesPerDim, bytesPerDim);
System.arraycopy(splitValue, 0, maxSplitPackedValue, dim * bytesPerDim, bytesPerDim);
continue;
}
// Not inside the try because we don't want to close this one now, so that after recursion is done,
// we will have done a singel full sweep of the file:
PointReader reader = slices[dim].writer.getSharedReader(slices[dim].start, slices[dim].count, toCloseHeroically);
try (PointWriter leftPointWriter = getPointWriter(leftCount, "left" + dim);
PointWriter rightPointWriter = getPointWriter(source.count - leftCount, "right" + dim)) {
long nextRightCount = reader.split(source.count, ordBitSet, leftPointWriter, rightPointWriter, dim == dimToClear);
if (rightCount != nextRightCount) {
throw new IllegalStateException("wrong number of points in split: expected=" + rightCount + " but actual=" + nextRightCount);
}
leftSlices[dim] = new PathSlice(leftPointWriter, 0, leftCount);
rightSlices[dim] = new PathSlice(rightPointWriter, 0, rightCount);
} catch (Throwable t) {
throw verifyChecksum(t, slices[dim].writer);
}
}
parentSplits[splitDim]++;
// Recurse on left tree:
build(2 * nodeID, leafNodeOffset, leftSlices, ordBitSet, out, minPackedValue, maxSplitPackedValue, parentSplits, splitPackedValues, leafBlockFPs, toCloseHeroically);
for (int dim = 0; dim < numDims; dim++) {
// Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
if (dim != splitDim) {
leftSlices[dim].writer.destroy();
}
}
// TODO: we could "tail recurse" here? have our parent discard its refs as we recurse right?
// Recurse on right tree:
build(2 * nodeID + 1, leafNodeOffset, rightSlices, ordBitSet, out, minSplitPackedValue, maxPackedValue, parentSplits, splitPackedValues, leafBlockFPs, toCloseHeroically);
for (int dim = 0; dim < numDims; dim++) {
// Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
if (dim != splitDim) {
rightSlices[dim].writer.destroy();
}
}
parentSplits[splitDim]--;
}
}
use of java.util.function.IntFunction in project lucene-solr by apache.
the class BKDWriter method build.
/* Recursively reorders the provided reader and writes the bkd-tree on the fly; this method is used
* when we are writing a new segment directly from IndexWriter's indexing buffer (MutablePointsReader). */
private void build(int nodeID, int leafNodeOffset, MutablePointValues reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, int[] parentSplits, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) throws IOException {
if (nodeID >= leafNodeOffset) {
// leaf node
final int count = to - from;
assert count <= maxPointsInLeafNode;
// Compute common prefixes
Arrays.fill(commonPrefixLengths, bytesPerDim);
reader.getValue(from, scratchBytesRef1);
for (int i = from + 1; i < to; ++i) {
reader.getValue(i, scratchBytesRef2);
for (int dim = 0; dim < numDims; dim++) {
final int offset = dim * bytesPerDim;
for (int j = 0; j < commonPrefixLengths[dim]; j++) {
if (scratchBytesRef1.bytes[scratchBytesRef1.offset + offset + j] != scratchBytesRef2.bytes[scratchBytesRef2.offset + offset + j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
}
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
FixedBitSet[] usedBytes = new FixedBitSet[numDims];
for (int dim = 0; dim < numDims; ++dim) {
if (commonPrefixLengths[dim] < bytesPerDim) {
usedBytes[dim] = new FixedBitSet(256);
}
}
for (int i = from + 1; i < to; ++i) {
for (int dim = 0; dim < numDims; dim++) {
if (usedBytes[dim] != null) {
byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
usedBytes[dim].set(Byte.toUnsignedInt(b));
}
}
}
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
for (int dim = 0; dim < numDims; ++dim) {
if (usedBytes[dim] != null) {
final int cardinality = usedBytes[dim].cardinality();
if (cardinality < sortedDimCardinality) {
sortedDim = dim;
sortedDimCardinality = cardinality;
}
}
}
// sort by sortedDim
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, from, to, scratchBytesRef1, scratchBytesRef2);
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
assert scratchOut.getPosition() == 0;
// Write doc IDs
int[] docIDs = spareDocIds;
for (int i = from; i < to; ++i) {
docIDs[i - from] = reader.getDocID(i);
}
//System.out.println("writeLeafBlock pos=" + out.getFilePointer());
writeLeafBlockDocs(scratchOut, docIDs, 0, count);
// Write the common prefixes:
reader.getValue(from, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
writeCommonPrefixes(scratchOut, commonPrefixLengths, scratch1);
// Write the full values:
IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
@Override
public BytesRef apply(int i) {
reader.getValue(from + i, scratchBytesRef1);
return scratchBytesRef1;
}
};
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues, docIDs, 0);
writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, count, sortedDim, packedValues);
out.writeBytes(scratchOut.getBytes(), 0, scratchOut.getPosition());
scratchOut.reset();
} else {
// inner node
// compute the split dimension and partition around it
final int splitDim = split(minPackedValue, maxPackedValue, parentSplits);
final int mid = (from + to + 1) >>> 1;
int commonPrefixLen = bytesPerDim;
for (int i = 0; i < bytesPerDim; ++i) {
if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
commonPrefixLen = i;
break;
}
}
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen, reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
// set the split value
final int address = nodeID * (1 + bytesPerDim);
splitPackedValues[address] = (byte) splitDim;
reader.getValue(mid, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
// recurse
parentSplits[splitDim]++;
build(nodeID * 2, leafNodeOffset, reader, from, mid, out, minPackedValue, maxSplitPackedValue, parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out, minSplitPackedValue, maxPackedValue, parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
parentSplits[splitDim]--;
}
}
use of java.util.function.IntFunction in project lucene-solr by apache.
the class SimpleTextBKDWriter method build.
/** The array (sized numDims) of PathSlice describe the cell we have currently recursed to. */
private void build(int nodeID, int leafNodeOffset, PathSlice[] slices, LongBitSet ordBitSet, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, List<Closeable> toCloseHeroically) throws IOException {
for (PathSlice slice : slices) {
assert slice.count == slices[0].count;
}
if (numDims == 1 && slices[0].writer instanceof OfflinePointWriter && slices[0].count <= maxPointsSortInHeap) {
// Special case for 1D, to cutover to heap once we recurse deeply enough:
slices[0] = switchToHeap(slices[0], toCloseHeroically);
}
if (nodeID >= leafNodeOffset) {
// Leaf node: write block
// We can write the block in any order so by default we write it sorted by the dimension that has the
// least number of unique bytes at commonPrefixLengths[dim], which makes compression more efficient
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
for (int dim = 0; dim < numDims; dim++) {
if (slices[dim].writer instanceof HeapPointWriter == false) {
// Adversarial cases can cause this, e.g. very lopsided data, all equal points, such that we started
// offline, but then kept splitting only in one dimension, and so never had to rewrite into heap writer
slices[dim] = switchToHeap(slices[dim], toCloseHeroically);
}
PathSlice source = slices[dim];
HeapPointWriter heapSource = (HeapPointWriter) source.writer;
// Find common prefix by comparing first and last values, already sorted in this dimension:
heapSource.readPackedValue(Math.toIntExact(source.start), scratch1);
heapSource.readPackedValue(Math.toIntExact(source.start + source.count - 1), scratch2);
int offset = dim * bytesPerDim;
commonPrefixLengths[dim] = bytesPerDim;
for (int j = 0; j < bytesPerDim; j++) {
if (scratch1[offset + j] != scratch2[offset + j]) {
commonPrefixLengths[dim] = j;
break;
}
}
int prefix = commonPrefixLengths[dim];
if (prefix < bytesPerDim) {
int cardinality = 1;
byte previous = scratch1[offset + prefix];
for (long i = 1; i < source.count; ++i) {
heapSource.readPackedValue(Math.toIntExact(source.start + i), scratch2);
byte b = scratch2[offset + prefix];
assert Byte.toUnsignedInt(previous) <= Byte.toUnsignedInt(b);
if (b != previous) {
cardinality++;
previous = b;
}
}
assert cardinality <= 256;
if (cardinality < sortedDimCardinality) {
sortedDim = dim;
sortedDimCardinality = cardinality;
}
}
}
PathSlice source = slices[sortedDim];
// We ensured that maxPointsSortInHeap was >= maxPointsInLeafNode, so we better be in heap at this point:
HeapPointWriter heapSource = (HeapPointWriter) source.writer;
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
//System.out.println(" write leaf block @ fp=" + out.getFilePointer());
// Write docIDs first, as their own chunk, so that at intersect time we can add all docIDs w/o
// loading the values:
int count = Math.toIntExact(source.count);
assert count > 0 : "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
writeLeafBlockDocs(out, heapSource.docIDs, Math.toIntExact(source.start), count);
// TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
// from the index, much like how terms dict does so from the FST:
// Write the full values:
IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
final BytesRef scratch = new BytesRef();
{
scratch.length = packedBytesLength;
}
@Override
public BytesRef apply(int i) {
heapSource.getPackedValueSlice(Math.toIntExact(source.start + i), scratch);
return scratch;
}
};
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues, heapSource.docIDs, Math.toIntExact(source.start));
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
} else {
// Inner node: partition/recurse
int splitDim;
if (numDims > 1) {
splitDim = split(minPackedValue, maxPackedValue);
} else {
splitDim = 0;
}
PathSlice source = slices[splitDim];
assert nodeID < splitPackedValues.length : "nodeID=" + nodeID + " splitValues.length=" + splitPackedValues.length;
// How many points will be in the left tree:
long rightCount = source.count / 2;
long leftCount = source.count - rightCount;
byte[] splitValue = markRightTree(rightCount, splitDim, source, ordBitSet);
int address = nodeID * (1 + bytesPerDim);
splitPackedValues[address] = (byte) splitDim;
System.arraycopy(splitValue, 0, splitPackedValues, address + 1, bytesPerDim);
// Partition all PathSlice that are not the split dim into sorted left and right sets, so we can recurse:
PathSlice[] leftSlices = new PathSlice[numDims];
PathSlice[] rightSlices = new PathSlice[numDims];
byte[] minSplitPackedValue = new byte[packedBytesLength];
System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, packedBytesLength);
byte[] maxSplitPackedValue = new byte[packedBytesLength];
System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, packedBytesLength);
// When we are on this dim, below, we clear the ordBitSet:
int dimToClear;
if (numDims - 1 == splitDim) {
dimToClear = numDims - 2;
} else {
dimToClear = numDims - 1;
}
for (int dim = 0; dim < numDims; dim++) {
if (dim == splitDim) {
// No need to partition on this dim since it's a simple slice of the incoming already sorted slice, and we
// will re-use its shared reader when visiting it as we recurse:
leftSlices[dim] = new PathSlice(source.writer, source.start, leftCount);
rightSlices[dim] = new PathSlice(source.writer, source.start + leftCount, rightCount);
System.arraycopy(splitValue, 0, minSplitPackedValue, dim * bytesPerDim, bytesPerDim);
System.arraycopy(splitValue, 0, maxSplitPackedValue, dim * bytesPerDim, bytesPerDim);
continue;
}
// Not inside the try because we don't want to close this one now, so that after recursion is done,
// we will have done a singel full sweep of the file:
PointReader reader = slices[dim].writer.getSharedReader(slices[dim].start, slices[dim].count, toCloseHeroically);
try (PointWriter leftPointWriter = getPointWriter(leftCount, "left" + dim);
PointWriter rightPointWriter = getPointWriter(source.count - leftCount, "right" + dim)) {
long nextRightCount = reader.split(source.count, ordBitSet, leftPointWriter, rightPointWriter, dim == dimToClear);
if (rightCount != nextRightCount) {
throw new IllegalStateException("wrong number of points in split: expected=" + rightCount + " but actual=" + nextRightCount);
}
leftSlices[dim] = new PathSlice(leftPointWriter, 0, leftCount);
rightSlices[dim] = new PathSlice(rightPointWriter, 0, rightCount);
} catch (Throwable t) {
throw verifyChecksum(t, slices[dim].writer);
}
}
// Recurse on left tree:
build(2 * nodeID, leafNodeOffset, leftSlices, ordBitSet, out, minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, toCloseHeroically);
for (int dim = 0; dim < numDims; dim++) {
// Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
if (dim != splitDim) {
leftSlices[dim].writer.destroy();
}
}
// TODO: we could "tail recurse" here? have our parent discard its refs as we recurse right?
// Recurse on right tree:
build(2 * nodeID + 1, leafNodeOffset, rightSlices, ordBitSet, out, minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, toCloseHeroically);
for (int dim = 0; dim < numDims; dim++) {
// Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
if (dim != splitDim) {
rightSlices[dim].writer.destroy();
}
}
}
}
Aggregations