use of com.carrotsearch.hppc.IntHashSet in project elasticsearch by elastic.
the class HyperLogLogPlusPlusTests method testAccuracy.
public void testAccuracy() {
final long bucket = randomInt(20);
final int numValues = randomIntBetween(1, 100000);
final int maxValue = randomIntBetween(1, randomBoolean() ? 1000 : 100000);
final int p = randomIntBetween(14, MAX_PRECISION);
IntHashSet set = new IntHashSet();
HyperLogLogPlusPlus e = new HyperLogLogPlusPlus(p, BigArrays.NON_RECYCLING_INSTANCE, 1);
for (int i = 0; i < numValues; ++i) {
final int n = randomInt(maxValue);
set.add(n);
final long hash = BitMixer.mix64(n);
e.collect(bucket, hash);
if (randomInt(100) == 0) {
//System.out.println(e.cardinality(bucket) + " <> " + set.size());
assertThat((double) e.cardinality(bucket), closeTo(set.size(), 0.1 * set.size()));
}
}
assertThat((double) e.cardinality(bucket), closeTo(set.size(), 0.1 * set.size()));
}
use of com.carrotsearch.hppc.IntHashSet in project elasticsearch by elastic.
the class RelocationIT method testRelocationWhileIndexingRandom.
@TestLogging("org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.search:TRACE")
public void testRelocationWhileIndexingRandom() throws Exception {
int numberOfRelocations = scaledRandomIntBetween(1, rarely() ? 10 : 4);
int numberOfReplicas = randomBoolean() ? 0 : 1;
int numberOfNodes = numberOfReplicas == 0 ? 2 : 3;
logger.info("testRelocationWhileIndexingRandom(numRelocations={}, numberOfReplicas={}, numberOfNodes={})", numberOfRelocations, numberOfReplicas, numberOfNodes);
String[] nodes = new String[numberOfNodes];
logger.info("--> starting [node1] ...");
nodes[0] = internalCluster().startNode();
logger.info("--> creating test index ...");
prepareCreate("test", Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", numberOfReplicas)).get();
for (int i = 2; i <= numberOfNodes; i++) {
logger.info("--> starting [node{}] ...", i);
nodes[i - 1] = internalCluster().startNode();
if (i != numberOfNodes) {
ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForNodes(Integer.toString(i)).setWaitForGreenStatus().execute().actionGet();
assertThat(healthResponse.isTimedOut(), equalTo(false));
}
}
int numDocs = scaledRandomIntBetween(200, 2500);
try (BackgroundIndexer indexer = new BackgroundIndexer("test", "type1", client(), numDocs)) {
logger.info("--> waiting for {} docs to be indexed ...", numDocs);
waitForDocs(numDocs, indexer);
logger.info("--> {} docs indexed", numDocs);
logger.info("--> starting relocations...");
// if we have replicas shift those
int nodeShiftBased = numberOfReplicas;
for (int i = 0; i < numberOfRelocations; i++) {
int fromNode = (i % 2);
int toNode = fromNode == 0 ? 1 : 0;
fromNode += nodeShiftBased;
toNode += nodeShiftBased;
numDocs = scaledRandomIntBetween(200, 1000);
logger.debug("--> Allow indexer to index [{}] documents", numDocs);
indexer.continueIndexing(numDocs);
logger.info("--> START relocate the shard from {} to {}", nodes[fromNode], nodes[toNode]);
client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, nodes[fromNode], nodes[toNode])).get();
if (rarely()) {
logger.debug("--> flushing");
client().admin().indices().prepareFlush().get();
}
ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForNoRelocatingShards(true).setTimeout(ACCEPTABLE_RELOCATION_TIME).execute().actionGet();
assertThat(clusterHealthResponse.isTimedOut(), equalTo(false));
indexer.pauseIndexing();
logger.info("--> DONE relocate the shard from {} to {}", fromNode, toNode);
}
logger.info("--> done relocations");
logger.info("--> waiting for indexing threads to stop ...");
indexer.stop();
logger.info("--> indexing threads stopped");
logger.info("--> refreshing the index");
client().admin().indices().prepareRefresh("test").execute().actionGet();
logger.info("--> searching the index");
boolean ranOnce = false;
for (int i = 0; i < 10; i++) {
logger.info("--> START search test round {}", i + 1);
SearchHits hits = client().prepareSearch("test").setQuery(matchAllQuery()).setSize((int) indexer.totalIndexedDocs()).storedFields().execute().actionGet().getHits();
ranOnce = true;
if (hits.getTotalHits() != indexer.totalIndexedDocs()) {
int[] hitIds = new int[(int) indexer.totalIndexedDocs()];
for (int hit = 0; hit < indexer.totalIndexedDocs(); hit++) {
hitIds[hit] = hit + 1;
}
IntHashSet set = IntHashSet.from(hitIds);
for (SearchHit hit : hits.getHits()) {
int id = Integer.parseInt(hit.getId());
if (!set.remove(id)) {
logger.error("Extra id [{}]", id);
}
}
set.forEach((IntProcedure) value -> {
logger.error("Missing id [{}]", value);
});
}
assertThat(hits.getTotalHits(), equalTo(indexer.totalIndexedDocs()));
logger.info("--> DONE search test round {}", i + 1);
}
if (!ranOnce) {
fail();
}
}
}
use of com.carrotsearch.hppc.IntHashSet in project elasticsearch by elastic.
the class EquivalenceIT method testDuelTerms.
// test long/double/string terms aggs with high number of buckets that require array growth
public void testDuelTerms() throws Exception {
final int numDocs = scaledRandomIntBetween(1000, 2000);
final int maxNumTerms = randomIntBetween(10, 5000);
final IntHashSet valuesSet = new IntHashSet();
cluster().wipeIndices("idx");
prepareCreate("idx").addMapping("type", jsonBuilder().startObject().startObject("type").startObject("properties").startObject("num").field("type", "double").endObject().startObject("string_values").field("type", "keyword").startObject("fields").startObject("doc_values").field("type", "keyword").field("index", false).endObject().endObject().endObject().startObject("long_values").field("type", "long").endObject().startObject("double_values").field("type", "double").endObject().endObject().endObject().endObject()).execute().actionGet();
List<IndexRequestBuilder> indexingRequests = new ArrayList<>();
for (int i = 0; i < numDocs; ++i) {
final int[] values = new int[randomInt(4)];
for (int j = 0; j < values.length; ++j) {
values[j] = randomInt(maxNumTerms - 1) - 1000;
valuesSet.add(values[j]);
}
XContentBuilder source = jsonBuilder().startObject().field("num", randomDouble()).startArray("long_values");
for (int j = 0; j < values.length; ++j) {
source = source.value(values[j]);
}
source = source.endArray().startArray("double_values");
for (int j = 0; j < values.length; ++j) {
source = source.value((double) values[j]);
}
source = source.endArray().startArray("string_values");
for (int j = 0; j < values.length; ++j) {
source = source.value(Integer.toString(values[j]));
}
source = source.endArray().endObject();
indexingRequests.add(client().prepareIndex("idx", "type").setSource(source));
}
indexRandom(true, indexingRequests);
assertNoFailures(client().admin().indices().prepareRefresh("idx").setIndicesOptions(IndicesOptions.lenientExpandOpen()).execute().get());
TermsAggregatorFactory.ExecutionMode[] globalOrdinalModes = new TermsAggregatorFactory.ExecutionMode[] { TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS_HASH, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS };
SearchResponse resp = client().prepareSearch("idx").addAggregation(terms("long").field("long_values").size(maxNumTerms).collectMode(randomFrom(SubAggCollectionMode.values())).subAggregation(min("min").field("num"))).addAggregation(terms("double").field("double_values").size(maxNumTerms).collectMode(randomFrom(SubAggCollectionMode.values())).subAggregation(max("max").field("num"))).addAggregation(terms("string_map").field("string_values").collectMode(randomFrom(SubAggCollectionMode.values())).executionHint(TermsAggregatorFactory.ExecutionMode.MAP.toString()).size(maxNumTerms).subAggregation(stats("stats").field("num"))).addAggregation(terms("string_global_ordinals").field("string_values").collectMode(randomFrom(SubAggCollectionMode.values())).executionHint(globalOrdinalModes[randomInt(globalOrdinalModes.length - 1)].toString()).size(maxNumTerms).subAggregation(extendedStats("stats").field("num"))).addAggregation(terms("string_global_ordinals_doc_values").field("string_values.doc_values").collectMode(randomFrom(SubAggCollectionMode.values())).executionHint(globalOrdinalModes[randomInt(globalOrdinalModes.length - 1)].toString()).size(maxNumTerms).subAggregation(extendedStats("stats").field("num"))).execute().actionGet();
assertAllSuccessful(resp);
assertEquals(numDocs, resp.getHits().getTotalHits());
final Terms longTerms = resp.getAggregations().get("long");
final Terms doubleTerms = resp.getAggregations().get("double");
final Terms stringMapTerms = resp.getAggregations().get("string_map");
final Terms stringGlobalOrdinalsTerms = resp.getAggregations().get("string_global_ordinals");
final Terms stringGlobalOrdinalsDVTerms = resp.getAggregations().get("string_global_ordinals_doc_values");
assertEquals(valuesSet.size(), longTerms.getBuckets().size());
assertEquals(valuesSet.size(), doubleTerms.getBuckets().size());
assertEquals(valuesSet.size(), stringMapTerms.getBuckets().size());
assertEquals(valuesSet.size(), stringGlobalOrdinalsTerms.getBuckets().size());
assertEquals(valuesSet.size(), stringGlobalOrdinalsDVTerms.getBuckets().size());
for (Terms.Bucket bucket : longTerms.getBuckets()) {
final Terms.Bucket doubleBucket = doubleTerms.getBucketByKey(Double.toString(Long.parseLong(bucket.getKeyAsString())));
final Terms.Bucket stringMapBucket = stringMapTerms.getBucketByKey(bucket.getKeyAsString());
final Terms.Bucket stringGlobalOrdinalsBucket = stringGlobalOrdinalsTerms.getBucketByKey(bucket.getKeyAsString());
final Terms.Bucket stringGlobalOrdinalsDVBucket = stringGlobalOrdinalsDVTerms.getBucketByKey(bucket.getKeyAsString());
assertNotNull(doubleBucket);
assertNotNull(stringMapBucket);
assertNotNull(stringGlobalOrdinalsBucket);
assertNotNull(stringGlobalOrdinalsDVBucket);
assertEquals(bucket.getDocCount(), doubleBucket.getDocCount());
assertEquals(bucket.getDocCount(), stringMapBucket.getDocCount());
assertEquals(bucket.getDocCount(), stringGlobalOrdinalsBucket.getDocCount());
assertEquals(bucket.getDocCount(), stringGlobalOrdinalsDVBucket.getDocCount());
}
}
use of com.carrotsearch.hppc.IntHashSet in project elasticsearch by elastic.
the class DedicatedClusterSnapshotRestoreIT method testRestoreIndexWithShardsMissingInLocalGateway.
public void testRestoreIndexWithShardsMissingInLocalGateway() throws Exception {
logger.info("--> start 2 nodes");
Settings nodeSettings = Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), EnableAllocationDecider.Rebalance.NONE).build();
internalCluster().startNode(nodeSettings);
internalCluster().startNode(nodeSettings);
cluster().wipeIndices("_all");
logger.info("--> create repository");
PutRepositoryResponse putRepositoryResponse = client().admin().cluster().preparePutRepository("test-repo").setType("fs").setSettings(Settings.builder().put("location", randomRepoPath())).execute().actionGet();
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));
int numberOfShards = 6;
logger.info("--> create an index that will have some unallocated shards");
assertAcked(prepareCreate("test-idx", 2, Settings.builder().put("number_of_shards", numberOfShards).put("number_of_replicas", 0)));
ensureGreen();
logger.info("--> indexing some data into test-idx");
for (int i = 0; i < 100; i++) {
index("test-idx", "doc", Integer.toString(i), "foo", "bar" + i);
}
refresh();
assertThat(client().prepareSearch("test-idx").setSize(0).get().getHits().getTotalHits(), equalTo(100L));
logger.info("--> start snapshot");
assertThat(client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-1").setIndices("test-idx").setWaitForCompletion(true).get().getSnapshotInfo().state(), equalTo(SnapshotState.SUCCESS));
logger.info("--> close the index");
assertAcked(client().admin().indices().prepareClose("test-idx"));
logger.info("--> shutdown one of the nodes that should make half of the shards unavailable");
internalCluster().restartRandomDataNode(new InternalTestCluster.RestartCallback() {
@Override
public boolean clearData(String nodeName) {
return true;
}
});
assertThat(client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setTimeout("1m").setWaitForNodes("2").execute().actionGet().isTimedOut(), equalTo(false));
logger.info("--> restore index snapshot");
assertThat(client().admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap-1").setRestoreGlobalState(false).setWaitForCompletion(true).get().getRestoreInfo().successfulShards(), equalTo(6));
ensureGreen("test-idx");
assertThat(client().prepareSearch("test-idx").setSize(0).get().getHits().getTotalHits(), equalTo(100L));
IntSet reusedShards = new IntHashSet();
for (RecoveryState recoveryState : client().admin().indices().prepareRecoveries("test-idx").get().shardRecoveryStates().get("test-idx")) {
if (recoveryState.getIndex().reusedBytes() > 0) {
reusedShards.add(recoveryState.getShardId().getId());
}
}
logger.info("--> check that at least half of the shards had some reuse: [{}]", reusedShards);
assertThat(reusedShards.size(), greaterThanOrEqualTo(numberOfShards / 2));
}
use of com.carrotsearch.hppc.IntHashSet in project graphhopper by graphhopper.
the class LandmarkStorage method createLandmarks.
/**
* This method calculates the landmarks and initial weightings to & from them.
*/
public void createLandmarks() {
if (isInitialized())
throw new IllegalStateException("Initialize the landmark storage only once!");
// fill 'from' and 'to' weights with maximum value
long maxBytes = (long) graph.getNodes() * LM_ROW_LENGTH;
this.landmarkWeightDA.create(2000);
this.landmarkWeightDA.ensureCapacity(maxBytes);
for (long pointer = 0; pointer < maxBytes; pointer += 2) {
landmarkWeightDA.setShort(pointer, (short) SHORT_INFINITY);
}
String additionalInfo = "";
// guess the factor
if (factor <= 0) {
// A 'factor' is necessary to store the weight in just a short value but without loosing too much precision.
// This factor is rather delicate to pick, we estimate it through the graph boundaries its maximum distance.
// For small areas we use max_bounds_dist*X and otherwise we use a big fixed value for this distance.
// If we would pick the distance too big for small areas this could lead to (slightly) suboptimal routes as there
// will be too big rounding errors. But picking it too small is dangerous regarding performance
// e.g. for Germany at least 1500km is very important otherwise speed is at least twice as slow e.g. for just 1000km
BBox bounds = graph.getBounds();
double distanceInMeter = Helper.DIST_EARTH.calcDist(bounds.maxLat, bounds.maxLon, bounds.minLat, bounds.minLon) * 7;
if (distanceInMeter > 50_000 * 7 || /* for tests and convenience we do for now: */
!bounds.isValid())
distanceInMeter = 30_000_000;
double maxWeight = weighting.getMinWeight(distanceInMeter);
setMaximumWeight(maxWeight);
additionalInfo = ", maxWeight:" + maxWeight + ", from max distance:" + distanceInMeter / 1000f + "km";
}
LOGGER.info("init landmarks for subnetworks with node count greater than " + minimumNodes + " with factor:" + factor + additionalInfo);
// special subnetwork 0
int[] empty = new int[landmarks];
Arrays.fill(empty, UNSET_SUBNETWORK);
landmarkIDs.add(empty);
byte[] subnetworks = new byte[graph.getNodes()];
Arrays.fill(subnetworks, (byte) UNSET_SUBNETWORK);
EdgeFilter tarjanFilter = new DefaultEdgeFilter(encoder, false, true);
IntHashSet blockedEdges = new IntHashSet();
// the ruleLookup splits certain areas from each other but avoids making this a permanent change so that other algorithms still can route through these regions.
if (ruleLookup != null && ruleLookup.size() > 0) {
StopWatch sw = new StopWatch().start();
blockedEdges = findBorderEdgeIds(ruleLookup);
tarjanFilter = new BlockedEdgesFilter(encoder, false, true, blockedEdges);
LOGGER.info("Made " + blockedEdges.size() + " edges inaccessible. Calculated country cut in " + sw.stop().getSeconds() + "s, " + Helper.getMemInfo());
}
StopWatch sw = new StopWatch().start();
// we cannot reuse the components calculated in PrepareRoutingSubnetworks as the edgeIds changed in between (called graph.optimize)
// also calculating subnetworks from scratch makes bigger problems when working with many oneways
TarjansSCCAlgorithm tarjanAlgo = new TarjansSCCAlgorithm(graph, tarjanFilter, true);
List<IntArrayList> graphComponents = tarjanAlgo.findComponents();
LOGGER.info("Calculated tarjan subnetworks in " + sw.stop().getSeconds() + "s, " + Helper.getMemInfo());
EdgeExplorer tmpExplorer = graph.createEdgeExplorer(new RequireBothDirectionsEdgeFilter(encoder));
int nodes = 0;
for (IntArrayList subnetworkIds : graphComponents) {
nodes += subnetworkIds.size();
if (subnetworkIds.size() < minimumNodes)
continue;
int index = subnetworkIds.size() - 1;
// ensure start node is reachable from both sides and no subnetwork is associated
for (; index >= 0; index--) {
int nextStartNode = subnetworkIds.get(index);
if (subnetworks[nextStartNode] == UNSET_SUBNETWORK && GHUtility.count(tmpExplorer.setBaseNode(nextStartNode)) > 0) {
GHPoint p = createPoint(graph, nextStartNode);
LOGGER.info("start node: " + nextStartNode + " (" + p + ") subnetwork size: " + subnetworkIds.size() + ", " + Helper.getMemInfo() + ((ruleLookup == null) ? "" : " area:" + ruleLookup.lookupRule(p).getId()));
if (createLandmarksForSubnetwork(nextStartNode, subnetworks, blockedEdges))
break;
}
}
if (index < 0)
LOGGER.warn("next start node not found in big enough network of size " + subnetworkIds.size() + ", first element is " + subnetworkIds.get(0) + ", " + createPoint(graph, subnetworkIds.get(0)));
}
int subnetworkCount = landmarkIDs.size();
// store all landmark node IDs and one int for the factor itself.
this.landmarkWeightDA.ensureCapacity(maxBytes + /* landmark weights */
subnetworkCount * landmarks);
// calculate offset to point into landmark mapping
long bytePos = maxBytes;
for (int[] landmarks : landmarkIDs) {
for (int lmNodeId : landmarks) {
landmarkWeightDA.setInt(bytePos, lmNodeId);
bytePos += 4L;
}
}
landmarkWeightDA.setHeader(0 * 4, graph.getNodes());
landmarkWeightDA.setHeader(1 * 4, landmarks);
landmarkWeightDA.setHeader(2 * 4, subnetworkCount);
if (factor * DOUBLE_MLTPL > Integer.MAX_VALUE)
throw new UnsupportedOperationException("landmark weight factor cannot be bigger than Integer.MAX_VALUE " + factor * DOUBLE_MLTPL);
landmarkWeightDA.setHeader(3 * 4, (int) Math.round(factor * DOUBLE_MLTPL));
// serialize fast byte[] into DataAccess
subnetworkStorage.create(graph.getNodes());
for (int nodeId = 0; nodeId < subnetworks.length; nodeId++) {
subnetworkStorage.setSubnetwork(nodeId, subnetworks[nodeId]);
}
LOGGER.info("Finished landmark creation. Subnetwork node count sum " + nodes + " vs. nodes " + graph.getNodes());
initialized = true;
}
Aggregations