Search in sources :

Example 1 with IntFunction

use of java.util.function.IntFunction in project neo4j by neo4j.

the class ServerGroupsIT method shouldUpdateGroupsOnStart.

@Test
public void shouldUpdateGroupsOnStart() throws Exception {
    AtomicReference<String> suffix = new AtomicReference<>("before");
    List<List<String>> expected;
    Map<String, IntFunction<String>> instanceCoreParams = new HashMap<>();
    instanceCoreParams.put(CausalClusteringSettings.server_groups.name(), (id) -> String.join(", ", makeCoreGroups(suffix.get(), id)));
    Map<String, IntFunction<String>> instanceReplicaParams = new HashMap<>();
    instanceReplicaParams.put(CausalClusteringSettings.server_groups.name(), (id) -> String.join(", ", makeReplicaGroups(suffix.get(), id)));
    int nServers = 3;
    cluster = new Cluster(testDir.directory("cluster"), nServers, nServers, new HazelcastDiscoveryServiceFactory(), emptyMap(), instanceCoreParams, emptyMap(), instanceReplicaParams, Standard.LATEST_NAME);
    // when
    cluster.start();
    // then
    expected = new ArrayList<>();
    for (CoreClusterMember core : cluster.coreMembers()) {
        expected.add(makeCoreGroups(suffix.get(), core.serverId()));
        expected.add(makeReplicaGroups(suffix.get(), core.serverId()));
    }
    for (CoreClusterMember core : cluster.coreMembers()) {
        assertEventually(core + " should have groups", () -> getServerGroups(core.database()), new GroupsMatcher(expected), 30, SECONDS);
    }
    // when
    expected.remove(makeCoreGroups(suffix.get(), 1));
    expected.remove(makeReplicaGroups(suffix.get(), 2));
    cluster.getCoreMemberById(1).shutdown();
    cluster.getReadReplicaById(2).shutdown();
    // should update groups of restarted servers
    suffix.set("after");
    cluster.addCoreMemberWithId(1).start();
    cluster.addReadReplicaWithId(2).start();
    expected.add(makeCoreGroups(suffix.get(), 1));
    expected.add(makeReplicaGroups(suffix.get(), 2));
    // then
    for (CoreClusterMember core : cluster.coreMembers()) {
        assertEventually(core + " should have groups", () -> getServerGroups(core.database()), new GroupsMatcher(expected), 30, SECONDS);
    }
}
Also used : HashMap(java.util.HashMap) CoreClusterMember(org.neo4j.causalclustering.discovery.CoreClusterMember) Cluster(org.neo4j.causalclustering.discovery.Cluster) AtomicReference(java.util.concurrent.atomic.AtomicReference) HazelcastDiscoveryServiceFactory(org.neo4j.causalclustering.discovery.HazelcastDiscoveryServiceFactory) IntFunction(java.util.function.IntFunction) ArrayList(java.util.ArrayList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) Test(org.junit.Test)

Example 2 with IntFunction

use of java.util.function.IntFunction in project neo4j by neo4j.

the class BackupStoreCopyInteractionStressTesting method shouldBehaveCorrectlyUnderStress.

@Test
public void shouldBehaveCorrectlyUnderStress() throws Exception {
    int numberOfCores = parseInt(fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_NUMBER_OF_CORES", DEFAULT_NUMBER_OF_CORES));
    int numberOfEdges = parseInt(fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_NUMBER_OF_EDGES", DEFAULT_NUMBER_OF_EDGES));
    long durationInMinutes = parseLong(fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_DURATION", DEFAULT_DURATION_IN_MINUTES));
    String workingDirectory = fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_WORKING_DIRECTORY", DEFAULT_WORKING_DIR);
    int baseCoreBackupPort = parseInt(fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_BASE_CORE_BACKUP_PORT", DEFAULT_BASE_CORE_BACKUP_PORT));
    int baseEdgeBackupPort = parseInt(fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_BASE_EDGE_BACKUP_PORT", DEFAULT_BASE_EDGE_BACKUP_PORT));
    boolean enableIndexes = parseBoolean(fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_ENABLE_INDEXES", DEFAULT_ENABLE_INDEXES));
    String txPrune = fromEnv("BACKUP_STORE_COPY_INTERACTION_STRESS_TX_PRUNE", DEFAULT_TX_PRUNE);
    File clusterDirectory = ensureExistsAndEmpty(new File(workingDirectory, "cluster"));
    File backupDirectory = ensureExistsAndEmpty(new File(workingDirectory, "backups"));
    BiFunction<Boolean, Integer, SocketAddress> backupAddress = (isCore, id) -> new AdvertisedSocketAddress("localhost", (isCore ? baseCoreBackupPort : baseEdgeBackupPort) + id);
    Map<String, String> coreParams = enableRaftMessageLogging(configureRaftLogRotationAndPruning(configureTxLogRotationAndPruning(new HashMap<>(), txPrune)));
    Map<String, String> readReplicaParams = configureTxLogRotationAndPruning(new HashMap<>(), txPrune);
    Map<String, IntFunction<String>> instanceCoreParams = configureBackup(new HashMap<>(), id -> backupAddress.apply(true, id));
    Map<String, IntFunction<String>> instanceReadReplicaParams = configureBackup(new HashMap<>(), id -> backupAddress.apply(false, id));
    HazelcastDiscoveryServiceFactory discoveryServiceFactory = new HazelcastDiscoveryServiceFactory();
    Cluster cluster = new Cluster(clusterDirectory, numberOfCores, numberOfEdges, discoveryServiceFactory, coreParams, instanceCoreParams, readReplicaParams, instanceReadReplicaParams, Standard.LATEST_NAME);
    AtomicBoolean stopTheWorld = new AtomicBoolean();
    BooleanSupplier notExpired = untilTimeExpired(durationInMinutes, MINUTES);
    BooleanSupplier keepGoing = () -> !stopTheWorld.get() && notExpired.getAsBoolean();
    Runnable onFailure = () -> stopTheWorld.set(true);
    ExecutorService service = Executors.newFixedThreadPool(3);
    try {
        cluster.start();
        if (enableIndexes) {
            Workload.setupIndexes(cluster);
        }
        Future<Throwable> workload = service.submit(new Workload(keepGoing, onFailure, cluster));
        Future<Throwable> startStopWorker = service.submit(new StartStopLoad(fs, pageCache, keepGoing, onFailure, cluster, numberOfCores, numberOfEdges));
        Future<Throwable> backupWorker = service.submit(new BackupLoad(keepGoing, onFailure, cluster, numberOfCores, numberOfEdges, backupDirectory, backupAddress));
        long timeout = durationInMinutes + 5;
        assertNull(Exceptions.stringify(workload.get()), workload.get(timeout, MINUTES));
        assertNull(Exceptions.stringify(startStopWorker.get()), startStopWorker.get(timeout, MINUTES));
        assertNull(Exceptions.stringify(backupWorker.get()), backupWorker.get(timeout, MINUTES));
    } finally {
        cluster.shutdown();
        service.shutdown();
    }
    // let's cleanup disk space when everything went well
    FileUtils.deleteRecursively(clusterDirectory);
    FileUtils.deleteRecursively(backupDirectory);
}
Also used : StressTestingHelper.ensureExistsAndEmpty(org.neo4j.helper.StressTestingHelper.ensureExistsAndEmpty) Suppliers.untilTimeExpired(org.neo4j.function.Suppliers.untilTimeExpired) BiFunction(java.util.function.BiFunction) Exceptions(org.neo4j.helpers.Exceptions) StressTestingHelper.fromEnv(org.neo4j.helper.StressTestingHelper.fromEnv) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) MINUTES(java.util.concurrent.TimeUnit.MINUTES) BooleanSupplier(java.util.function.BooleanSupplier) SocketAddress(org.neo4j.helpers.SocketAddress) Future(java.util.concurrent.Future) AdvertisedSocketAddress(org.neo4j.helpers.AdvertisedSocketAddress) ClusterConfiguration.configureRaftLogRotationAndPruning(org.neo4j.causalclustering.stresstests.ClusterConfiguration.configureRaftLogRotationAndPruning) HazelcastDiscoveryServiceFactory(org.neo4j.causalclustering.discovery.HazelcastDiscoveryServiceFactory) PageCacheRule(org.neo4j.test.rule.PageCacheRule) Map(java.util.Map) ClusterConfiguration.configureBackup(org.neo4j.causalclustering.stresstests.ClusterConfiguration.configureBackup) System.getProperty(java.lang.System.getProperty) ExecutorService(java.util.concurrent.ExecutorService) Before(org.junit.Before) IntFunction(java.util.function.IntFunction) Standard(org.neo4j.kernel.impl.store.format.standard.Standard) PageCache(org.neo4j.io.pagecache.PageCache) ClusterConfiguration.enableRaftMessageLogging(org.neo4j.causalclustering.stresstests.ClusterConfiguration.enableRaftMessageLogging) FileUtils(org.neo4j.io.fs.FileUtils) Test(org.junit.Test) Integer.parseInt(java.lang.Integer.parseInt) File(java.io.File) Executors(java.util.concurrent.Executors) Cluster(org.neo4j.causalclustering.discovery.Cluster) RuleChain(org.junit.rules.RuleChain) Rule(org.junit.Rule) DefaultFileSystemRule(org.neo4j.test.rule.fs.DefaultFileSystemRule) Boolean.parseBoolean(java.lang.Boolean.parseBoolean) Assert.assertNull(org.junit.Assert.assertNull) DatabaseConfiguration.configureTxLogRotationAndPruning(org.neo4j.helper.DatabaseConfiguration.configureTxLogRotationAndPruning) Long.parseLong(java.lang.Long.parseLong) FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) AdvertisedSocketAddress(org.neo4j.helpers.AdvertisedSocketAddress) HazelcastDiscoveryServiceFactory(org.neo4j.causalclustering.discovery.HazelcastDiscoveryServiceFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Boolean.parseBoolean(java.lang.Boolean.parseBoolean) SocketAddress(org.neo4j.helpers.SocketAddress) AdvertisedSocketAddress(org.neo4j.helpers.AdvertisedSocketAddress) BooleanSupplier(java.util.function.BooleanSupplier) Cluster(org.neo4j.causalclustering.discovery.Cluster) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) IntFunction(java.util.function.IntFunction) ExecutorService(java.util.concurrent.ExecutorService) File(java.io.File) Test(org.junit.Test)

Example 3 with IntFunction

use of java.util.function.IntFunction in project lucene-solr by apache.

the class BKDWriter method build.

/** The array (sized numDims) of PathSlice describe the cell we have currently recursed to.
  /*  This method is used when we are merging previously written segments, in the numDims > 1 case. */
private void build(int nodeID, int leafNodeOffset, PathSlice[] slices, LongBitSet ordBitSet, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, int[] parentSplits, byte[] splitPackedValues, long[] leafBlockFPs, List<Closeable> toCloseHeroically) throws IOException {
    for (PathSlice slice : slices) {
        assert slice.count == slices[0].count;
    }
    if (numDims == 1 && slices[0].writer instanceof OfflinePointWriter && slices[0].count <= maxPointsSortInHeap) {
        // Special case for 1D, to cutover to heap once we recurse deeply enough:
        slices[0] = switchToHeap(slices[0], toCloseHeroically);
    }
    if (nodeID >= leafNodeOffset) {
        // Leaf node: write block
        // We can write the block in any order so by default we write it sorted by the dimension that has the
        // least number of unique bytes at commonPrefixLengths[dim], which makes compression more efficient
        int sortedDim = 0;
        int sortedDimCardinality = Integer.MAX_VALUE;
        for (int dim = 0; dim < numDims; dim++) {
            if (slices[dim].writer instanceof HeapPointWriter == false) {
                // Adversarial cases can cause this, e.g. very lopsided data, all equal points, such that we started
                // offline, but then kept splitting only in one dimension, and so never had to rewrite into heap writer
                slices[dim] = switchToHeap(slices[dim], toCloseHeroically);
            }
            PathSlice source = slices[dim];
            HeapPointWriter heapSource = (HeapPointWriter) source.writer;
            // Find common prefix by comparing first and last values, already sorted in this dimension:
            heapSource.readPackedValue(Math.toIntExact(source.start), scratch1);
            heapSource.readPackedValue(Math.toIntExact(source.start + source.count - 1), scratch2);
            int offset = dim * bytesPerDim;
            commonPrefixLengths[dim] = bytesPerDim;
            for (int j = 0; j < bytesPerDim; j++) {
                if (scratch1[offset + j] != scratch2[offset + j]) {
                    commonPrefixLengths[dim] = j;
                    break;
                }
            }
            int prefix = commonPrefixLengths[dim];
            if (prefix < bytesPerDim) {
                int cardinality = 1;
                byte previous = scratch1[offset + prefix];
                for (long i = 1; i < source.count; ++i) {
                    heapSource.readPackedValue(Math.toIntExact(source.start + i), scratch2);
                    byte b = scratch2[offset + prefix];
                    assert Byte.toUnsignedInt(previous) <= Byte.toUnsignedInt(b);
                    if (b != previous) {
                        cardinality++;
                        previous = b;
                    }
                }
                assert cardinality <= 256;
                if (cardinality < sortedDimCardinality) {
                    sortedDim = dim;
                    sortedDimCardinality = cardinality;
                }
            }
        }
        PathSlice source = slices[sortedDim];
        // We ensured that maxPointsSortInHeap was >= maxPointsInLeafNode, so we better be in heap at this point:
        HeapPointWriter heapSource = (HeapPointWriter) source.writer;
        // Save the block file pointer:
        leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
        //System.out.println("  write leaf block @ fp=" + out.getFilePointer());
        // Write docIDs first, as their own chunk, so that at intersect time we can add all docIDs w/o
        // loading the values:
        int count = Math.toIntExact(source.count);
        assert count > 0 : "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
        writeLeafBlockDocs(out, heapSource.docIDs, Math.toIntExact(source.start), count);
        // TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
        // from the index, much like how terms dict does so from the FST:
        // Write the common prefixes:
        writeCommonPrefixes(out, commonPrefixLengths, scratch1);
        // Write the full values:
        IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {

            final BytesRef scratch = new BytesRef();

            {
                scratch.length = packedBytesLength;
            }

            @Override
            public BytesRef apply(int i) {
                heapSource.getPackedValueSlice(Math.toIntExact(source.start + i), scratch);
                return scratch;
            }
        };
        assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues, heapSource.docIDs, Math.toIntExact(source.start));
        writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
    } else {
        // Inner node: partition/recurse
        int splitDim;
        if (numDims > 1) {
            splitDim = split(minPackedValue, maxPackedValue, parentSplits);
        } else {
            splitDim = 0;
        }
        PathSlice source = slices[splitDim];
        assert nodeID < splitPackedValues.length : "nodeID=" + nodeID + " splitValues.length=" + splitPackedValues.length;
        // How many points will be in the left tree:
        long rightCount = source.count / 2;
        long leftCount = source.count - rightCount;
        byte[] splitValue = markRightTree(rightCount, splitDim, source, ordBitSet);
        int address = nodeID * (1 + bytesPerDim);
        splitPackedValues[address] = (byte) splitDim;
        System.arraycopy(splitValue, 0, splitPackedValues, address + 1, bytesPerDim);
        // Partition all PathSlice that are not the split dim into sorted left and right sets, so we can recurse:
        PathSlice[] leftSlices = new PathSlice[numDims];
        PathSlice[] rightSlices = new PathSlice[numDims];
        byte[] minSplitPackedValue = new byte[packedBytesLength];
        System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, packedBytesLength);
        byte[] maxSplitPackedValue = new byte[packedBytesLength];
        System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, packedBytesLength);
        // When we are on this dim, below, we clear the ordBitSet:
        int dimToClear;
        if (numDims - 1 == splitDim) {
            dimToClear = numDims - 2;
        } else {
            dimToClear = numDims - 1;
        }
        for (int dim = 0; dim < numDims; dim++) {
            if (dim == splitDim) {
                // No need to partition on this dim since it's a simple slice of the incoming already sorted slice, and we
                // will re-use its shared reader when visiting it as we recurse:
                leftSlices[dim] = new PathSlice(source.writer, source.start, leftCount);
                rightSlices[dim] = new PathSlice(source.writer, source.start + leftCount, rightCount);
                System.arraycopy(splitValue, 0, minSplitPackedValue, dim * bytesPerDim, bytesPerDim);
                System.arraycopy(splitValue, 0, maxSplitPackedValue, dim * bytesPerDim, bytesPerDim);
                continue;
            }
            // Not inside the try because we don't want to close this one now, so that after recursion is done,
            // we will have done a singel full sweep of the file:
            PointReader reader = slices[dim].writer.getSharedReader(slices[dim].start, slices[dim].count, toCloseHeroically);
            try (PointWriter leftPointWriter = getPointWriter(leftCount, "left" + dim);
                PointWriter rightPointWriter = getPointWriter(source.count - leftCount, "right" + dim)) {
                long nextRightCount = reader.split(source.count, ordBitSet, leftPointWriter, rightPointWriter, dim == dimToClear);
                if (rightCount != nextRightCount) {
                    throw new IllegalStateException("wrong number of points in split: expected=" + rightCount + " but actual=" + nextRightCount);
                }
                leftSlices[dim] = new PathSlice(leftPointWriter, 0, leftCount);
                rightSlices[dim] = new PathSlice(rightPointWriter, 0, rightCount);
            } catch (Throwable t) {
                throw verifyChecksum(t, slices[dim].writer);
            }
        }
        parentSplits[splitDim]++;
        // Recurse on left tree:
        build(2 * nodeID, leafNodeOffset, leftSlices, ordBitSet, out, minPackedValue, maxSplitPackedValue, parentSplits, splitPackedValues, leafBlockFPs, toCloseHeroically);
        for (int dim = 0; dim < numDims; dim++) {
            // Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
            if (dim != splitDim) {
                leftSlices[dim].writer.destroy();
            }
        }
        // TODO: we could "tail recurse" here?  have our parent discard its refs as we recurse right?
        // Recurse on right tree:
        build(2 * nodeID + 1, leafNodeOffset, rightSlices, ordBitSet, out, minSplitPackedValue, maxPackedValue, parentSplits, splitPackedValues, leafBlockFPs, toCloseHeroically);
        for (int dim = 0; dim < numDims; dim++) {
            // Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
            if (dim != splitDim) {
                rightSlices[dim].writer.destroy();
            }
        }
        parentSplits[splitDim]--;
    }
}
Also used : IntFunction(java.util.function.IntFunction) BytesRef(org.apache.lucene.util.BytesRef)

Example 4 with IntFunction

use of java.util.function.IntFunction in project lucene-solr by apache.

the class BKDWriter method build.

/* Recursively reorders the provided reader and writes the bkd-tree on the fly; this method is used
   * when we are writing a new segment directly from IndexWriter's indexing buffer (MutablePointsReader). */
private void build(int nodeID, int leafNodeOffset, MutablePointValues reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, int[] parentSplits, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) throws IOException {
    if (nodeID >= leafNodeOffset) {
        // leaf node
        final int count = to - from;
        assert count <= maxPointsInLeafNode;
        // Compute common prefixes
        Arrays.fill(commonPrefixLengths, bytesPerDim);
        reader.getValue(from, scratchBytesRef1);
        for (int i = from + 1; i < to; ++i) {
            reader.getValue(i, scratchBytesRef2);
            for (int dim = 0; dim < numDims; dim++) {
                final int offset = dim * bytesPerDim;
                for (int j = 0; j < commonPrefixLengths[dim]; j++) {
                    if (scratchBytesRef1.bytes[scratchBytesRef1.offset + offset + j] != scratchBytesRef2.bytes[scratchBytesRef2.offset + offset + j]) {
                        commonPrefixLengths[dim] = j;
                        break;
                    }
                }
            }
        }
        // Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
        FixedBitSet[] usedBytes = new FixedBitSet[numDims];
        for (int dim = 0; dim < numDims; ++dim) {
            if (commonPrefixLengths[dim] < bytesPerDim) {
                usedBytes[dim] = new FixedBitSet(256);
            }
        }
        for (int i = from + 1; i < to; ++i) {
            for (int dim = 0; dim < numDims; dim++) {
                if (usedBytes[dim] != null) {
                    byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
                    usedBytes[dim].set(Byte.toUnsignedInt(b));
                }
            }
        }
        int sortedDim = 0;
        int sortedDimCardinality = Integer.MAX_VALUE;
        for (int dim = 0; dim < numDims; ++dim) {
            if (usedBytes[dim] != null) {
                final int cardinality = usedBytes[dim].cardinality();
                if (cardinality < sortedDimCardinality) {
                    sortedDim = dim;
                    sortedDimCardinality = cardinality;
                }
            }
        }
        // sort by sortedDim
        MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, from, to, scratchBytesRef1, scratchBytesRef2);
        // Save the block file pointer:
        leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
        assert scratchOut.getPosition() == 0;
        // Write doc IDs
        int[] docIDs = spareDocIds;
        for (int i = from; i < to; ++i) {
            docIDs[i - from] = reader.getDocID(i);
        }
        //System.out.println("writeLeafBlock pos=" + out.getFilePointer());
        writeLeafBlockDocs(scratchOut, docIDs, 0, count);
        // Write the common prefixes:
        reader.getValue(from, scratchBytesRef1);
        System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
        writeCommonPrefixes(scratchOut, commonPrefixLengths, scratch1);
        // Write the full values:
        IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {

            @Override
            public BytesRef apply(int i) {
                reader.getValue(from + i, scratchBytesRef1);
                return scratchBytesRef1;
            }
        };
        assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues, docIDs, 0);
        writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, count, sortedDim, packedValues);
        out.writeBytes(scratchOut.getBytes(), 0, scratchOut.getPosition());
        scratchOut.reset();
    } else {
        // inner node
        // compute the split dimension and partition around it
        final int splitDim = split(minPackedValue, maxPackedValue, parentSplits);
        final int mid = (from + to + 1) >>> 1;
        int commonPrefixLen = bytesPerDim;
        for (int i = 0; i < bytesPerDim; ++i) {
            if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
                commonPrefixLen = i;
                break;
            }
        }
        MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen, reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
        // set the split value
        final int address = nodeID * (1 + bytesPerDim);
        splitPackedValues[address] = (byte) splitDim;
        reader.getValue(mid, scratchBytesRef1);
        System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
        byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
        byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
        System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
        System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
        // recurse
        parentSplits[splitDim]++;
        build(nodeID * 2, leafNodeOffset, reader, from, mid, out, minPackedValue, maxSplitPackedValue, parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
        build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out, minSplitPackedValue, maxPackedValue, parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
        parentSplits[splitDim]--;
    }
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet) IntFunction(java.util.function.IntFunction) BytesRef(org.apache.lucene.util.BytesRef)

Example 5 with IntFunction

use of java.util.function.IntFunction in project lucene-solr by apache.

the class SimpleTextBKDWriter method build.

/** The array (sized numDims) of PathSlice describe the cell we have currently recursed to. */
private void build(int nodeID, int leafNodeOffset, PathSlice[] slices, LongBitSet ordBitSet, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, List<Closeable> toCloseHeroically) throws IOException {
    for (PathSlice slice : slices) {
        assert slice.count == slices[0].count;
    }
    if (numDims == 1 && slices[0].writer instanceof OfflinePointWriter && slices[0].count <= maxPointsSortInHeap) {
        // Special case for 1D, to cutover to heap once we recurse deeply enough:
        slices[0] = switchToHeap(slices[0], toCloseHeroically);
    }
    if (nodeID >= leafNodeOffset) {
        // Leaf node: write block
        // We can write the block in any order so by default we write it sorted by the dimension that has the
        // least number of unique bytes at commonPrefixLengths[dim], which makes compression more efficient
        int sortedDim = 0;
        int sortedDimCardinality = Integer.MAX_VALUE;
        for (int dim = 0; dim < numDims; dim++) {
            if (slices[dim].writer instanceof HeapPointWriter == false) {
                // Adversarial cases can cause this, e.g. very lopsided data, all equal points, such that we started
                // offline, but then kept splitting only in one dimension, and so never had to rewrite into heap writer
                slices[dim] = switchToHeap(slices[dim], toCloseHeroically);
            }
            PathSlice source = slices[dim];
            HeapPointWriter heapSource = (HeapPointWriter) source.writer;
            // Find common prefix by comparing first and last values, already sorted in this dimension:
            heapSource.readPackedValue(Math.toIntExact(source.start), scratch1);
            heapSource.readPackedValue(Math.toIntExact(source.start + source.count - 1), scratch2);
            int offset = dim * bytesPerDim;
            commonPrefixLengths[dim] = bytesPerDim;
            for (int j = 0; j < bytesPerDim; j++) {
                if (scratch1[offset + j] != scratch2[offset + j]) {
                    commonPrefixLengths[dim] = j;
                    break;
                }
            }
            int prefix = commonPrefixLengths[dim];
            if (prefix < bytesPerDim) {
                int cardinality = 1;
                byte previous = scratch1[offset + prefix];
                for (long i = 1; i < source.count; ++i) {
                    heapSource.readPackedValue(Math.toIntExact(source.start + i), scratch2);
                    byte b = scratch2[offset + prefix];
                    assert Byte.toUnsignedInt(previous) <= Byte.toUnsignedInt(b);
                    if (b != previous) {
                        cardinality++;
                        previous = b;
                    }
                }
                assert cardinality <= 256;
                if (cardinality < sortedDimCardinality) {
                    sortedDim = dim;
                    sortedDimCardinality = cardinality;
                }
            }
        }
        PathSlice source = slices[sortedDim];
        // We ensured that maxPointsSortInHeap was >= maxPointsInLeafNode, so we better be in heap at this point:
        HeapPointWriter heapSource = (HeapPointWriter) source.writer;
        // Save the block file pointer:
        leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
        //System.out.println("  write leaf block @ fp=" + out.getFilePointer());
        // Write docIDs first, as their own chunk, so that at intersect time we can add all docIDs w/o
        // loading the values:
        int count = Math.toIntExact(source.count);
        assert count > 0 : "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
        writeLeafBlockDocs(out, heapSource.docIDs, Math.toIntExact(source.start), count);
        // TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
        // from the index, much like how terms dict does so from the FST:
        // Write the full values:
        IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {

            final BytesRef scratch = new BytesRef();

            {
                scratch.length = packedBytesLength;
            }

            @Override
            public BytesRef apply(int i) {
                heapSource.getPackedValueSlice(Math.toIntExact(source.start + i), scratch);
                return scratch;
            }
        };
        assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues, heapSource.docIDs, Math.toIntExact(source.start));
        writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
    } else {
        // Inner node: partition/recurse
        int splitDim;
        if (numDims > 1) {
            splitDim = split(minPackedValue, maxPackedValue);
        } else {
            splitDim = 0;
        }
        PathSlice source = slices[splitDim];
        assert nodeID < splitPackedValues.length : "nodeID=" + nodeID + " splitValues.length=" + splitPackedValues.length;
        // How many points will be in the left tree:
        long rightCount = source.count / 2;
        long leftCount = source.count - rightCount;
        byte[] splitValue = markRightTree(rightCount, splitDim, source, ordBitSet);
        int address = nodeID * (1 + bytesPerDim);
        splitPackedValues[address] = (byte) splitDim;
        System.arraycopy(splitValue, 0, splitPackedValues, address + 1, bytesPerDim);
        // Partition all PathSlice that are not the split dim into sorted left and right sets, so we can recurse:
        PathSlice[] leftSlices = new PathSlice[numDims];
        PathSlice[] rightSlices = new PathSlice[numDims];
        byte[] minSplitPackedValue = new byte[packedBytesLength];
        System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, packedBytesLength);
        byte[] maxSplitPackedValue = new byte[packedBytesLength];
        System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, packedBytesLength);
        // When we are on this dim, below, we clear the ordBitSet:
        int dimToClear;
        if (numDims - 1 == splitDim) {
            dimToClear = numDims - 2;
        } else {
            dimToClear = numDims - 1;
        }
        for (int dim = 0; dim < numDims; dim++) {
            if (dim == splitDim) {
                // No need to partition on this dim since it's a simple slice of the incoming already sorted slice, and we
                // will re-use its shared reader when visiting it as we recurse:
                leftSlices[dim] = new PathSlice(source.writer, source.start, leftCount);
                rightSlices[dim] = new PathSlice(source.writer, source.start + leftCount, rightCount);
                System.arraycopy(splitValue, 0, minSplitPackedValue, dim * bytesPerDim, bytesPerDim);
                System.arraycopy(splitValue, 0, maxSplitPackedValue, dim * bytesPerDim, bytesPerDim);
                continue;
            }
            // Not inside the try because we don't want to close this one now, so that after recursion is done,
            // we will have done a singel full sweep of the file:
            PointReader reader = slices[dim].writer.getSharedReader(slices[dim].start, slices[dim].count, toCloseHeroically);
            try (PointWriter leftPointWriter = getPointWriter(leftCount, "left" + dim);
                PointWriter rightPointWriter = getPointWriter(source.count - leftCount, "right" + dim)) {
                long nextRightCount = reader.split(source.count, ordBitSet, leftPointWriter, rightPointWriter, dim == dimToClear);
                if (rightCount != nextRightCount) {
                    throw new IllegalStateException("wrong number of points in split: expected=" + rightCount + " but actual=" + nextRightCount);
                }
                leftSlices[dim] = new PathSlice(leftPointWriter, 0, leftCount);
                rightSlices[dim] = new PathSlice(rightPointWriter, 0, rightCount);
            } catch (Throwable t) {
                throw verifyChecksum(t, slices[dim].writer);
            }
        }
        // Recurse on left tree:
        build(2 * nodeID, leafNodeOffset, leftSlices, ordBitSet, out, minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, toCloseHeroically);
        for (int dim = 0; dim < numDims; dim++) {
            // Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
            if (dim != splitDim) {
                leftSlices[dim].writer.destroy();
            }
        }
        // TODO: we could "tail recurse" here?  have our parent discard its refs as we recurse right?
        // Recurse on right tree:
        build(2 * nodeID + 1, leafNodeOffset, rightSlices, ordBitSet, out, minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, toCloseHeroically);
        for (int dim = 0; dim < numDims; dim++) {
            // Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
            if (dim != splitDim) {
                rightSlices[dim].writer.destroy();
            }
        }
    }
}
Also used : OfflinePointWriter(org.apache.lucene.util.bkd.OfflinePointWriter) HeapPointWriter(org.apache.lucene.util.bkd.HeapPointWriter) PointWriter(org.apache.lucene.util.bkd.PointWriter) OfflinePointWriter(org.apache.lucene.util.bkd.OfflinePointWriter) HeapPointWriter(org.apache.lucene.util.bkd.HeapPointWriter) PointReader(org.apache.lucene.util.bkd.PointReader) OfflinePointReader(org.apache.lucene.util.bkd.OfflinePointReader) IntFunction(java.util.function.IntFunction) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

IntFunction (java.util.function.IntFunction)32 List (java.util.List)10 Collectors (java.util.stream.Collectors)10 Test (org.junit.Test)9 Arrays (java.util.Arrays)8 IOException (java.io.IOException)7 ArrayList (java.util.ArrayList)7 IntStream (java.util.stream.IntStream)7 LoggerFactory (org.slf4j.LoggerFactory)6 HashMap (java.util.HashMap)5 Map (java.util.Map)5 Objects (java.util.Objects)5 Set (java.util.Set)5 Logger (org.slf4j.Logger)5 File (java.io.File)4 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)4 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)4 Function (java.util.function.Function)4 BytesRef (org.apache.lucene.util.BytesRef)4 Nullable (javax.annotation.Nullable)3