use of com.google.common.hash.HashFunction in project hive by apache.
the class TestMurmur3 method testHashCodesM3_128_string.
@Test
public void testHashCodesM3_128_string() {
String key = "test";
int seed = 123;
HashFunction hf = Hashing.murmur3_128(seed);
// guava stores the hashcodes in little endian order
ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
buf.put(hf.hashBytes(key.getBytes()).asBytes());
buf.flip();
long gl1 = buf.getLong();
long gl2 = buf.getLong(8);
long[] hc = Murmur3.hash128(key.getBytes(), 0, key.getBytes().length, seed);
long m1 = hc[0];
long m2 = hc[1];
assertEquals(gl1, m1);
assertEquals(gl2, m2);
key = "testkey128_testkey128";
buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
buf.put(hf.hashBytes(key.getBytes()).asBytes());
buf.flip();
gl1 = buf.getLong();
gl2 = buf.getLong(8);
byte[] keyBytes = key.getBytes();
hc = Murmur3.hash128(keyBytes, 0, keyBytes.length, seed);
m1 = hc[0];
m2 = hc[1];
assertEquals(gl1, m1);
assertEquals(gl2, m2);
byte[] offsetKeyBytes = new byte[keyBytes.length + 35];
Arrays.fill(offsetKeyBytes, (byte) -1);
System.arraycopy(keyBytes, 0, offsetKeyBytes, 35, keyBytes.length);
hc = Murmur3.hash128(offsetKeyBytes, 35, keyBytes.length, seed);
assertEquals(gl1, hc[0]);
assertEquals(gl2, hc[1]);
}
use of com.google.common.hash.HashFunction in project druid by druid-io.
the class CachingClusteredClientTest method testGroupByCaching.
@Test
public void testGroupByCaching() throws Exception {
List<AggregatorFactory> aggsWithUniques = ImmutableList.<AggregatorFactory>builder().addAll(AGGS).add(new HyperUniquesAggregatorFactory("uniques", "uniques")).build();
final HashFunction hashFn = Hashing.murmur3_128();
GroupByQuery.Builder builder = new GroupByQuery.Builder().setDataSource(DATA_SOURCE).setQuerySegmentSpec(SEG_SPEC).setDimFilter(DIM_FILTER).setGranularity(GRANULARITY).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec("a", "a"))).setAggregatorSpecs(aggsWithUniques).setPostAggregatorSpecs(POST_AGGS).setContext(CONTEXT);
final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
collector.add(hashFn.hashString("abc123", Charsets.UTF_8).asBytes());
collector.add(hashFn.hashString("123abc", Charsets.UTF_8).asBytes());
testQueryCaching(client, builder.build(), new Interval("2011-01-01/2011-01-02"), makeGroupByResults(new DateTime("2011-01-01"), ImmutableMap.of("a", "a", "rows", 1, "imps", 1, "impers", 1, "uniques", collector)), new Interval("2011-01-02/2011-01-03"), makeGroupByResults(new DateTime("2011-01-02"), ImmutableMap.of("a", "b", "rows", 2, "imps", 2, "impers", 2, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)));
QueryRunner runner = new FinalizeResultsQueryRunner(client, GroupByQueryRunnerTest.makeQueryRunnerFactory(new GroupByQueryConfig()).getToolchest());
HashMap<String, Object> context = new HashMap<String, Object>();
TestHelper.assertExpectedObjects(makeGroupByResults(new DateTime("2011-01-05T"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), runner.run(builder.setInterval("2011-01-05/2011-01-10").build(), context), "");
}
use of com.google.common.hash.HashFunction in project druid by druid-io.
the class HyperLogLogCollectorTest method showErrorRate.
// Provides a nice printout of error rates as a function of cardinality
@Ignore
@Test
public void showErrorRate() throws Exception {
HashFunction fn = Hashing.murmur3_128();
Random random = new Random();
double error = 0.0d;
int count = 0;
final int[] valsToCheck = { 10, 20, 50, 100, 1000, 2000, 5000, 10000, 20000, 50000, 100000, 1000000, 2000000, 10000000, Integer.MAX_VALUE };
for (int numThings : valsToCheck) {
long startTime = System.currentTimeMillis();
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
for (int i = 0; i < numThings; ++i) {
if (i != 0 && i % 100000000 == 0) {
++count;
error = computeError(error, count, i, startTime, collector);
}
collector.add(fn.hashLong(random.nextLong()).asBytes());
}
++count;
error = computeError(error, count, numThings, startTime, collector);
}
}
use of com.google.common.hash.HashFunction in project lucene-solr by apache.
the class StatsComponentTest method testCardinality.
/** @see #testHllOptions */
public void testCardinality() throws Exception {
SolrCore core = h.getCore();
// insure we have the same hasher a_l would use
HashFunction hasher = HllOptions.parseHllOptions(params("cardinality", "true"), core.getLatestSchema().getField("a_l")).getHasher();
String[] baseParams = new String[] { "q", "*:*", "stats", "true", "indent", "true", "rows", "0" };
assertQ("empty cardinalities", req(params("stats.field", "{!key=a cardinality=true}a_l", "stats.field", "{!key=pa cardinality=true}prehashed_a_l", "stats.field", "{!key=b cardinality=true}b_l", "stats.field", "{!key=c cardinality=true}c_l"), baseParams), cardinalityXpath("a", 0), cardinalityXpath("pa", 0), cardinalityXpath("b", 0), cardinalityXpath("c", 0));
int id = 0;
// add trivial docs to test basic cardinality
for (int i = 0; i < 100; i++) {
// add the same values multiple times (diff docs)
for (int j = 0; j < 5; j++) {
++id;
assertU(adoc("id", "" + id, "a_l", "" + i, "prehashed_a_l", "" + hasher.hashLong((long) i).asLong(), "b_l", "" + (i % 7), "c_l", "" + id));
}
}
assertU(commit());
assertQ("various cardinalities", req(params("stats.field", "{!key=a cardinality=true}a_l", "stats.field", "{!key=pa hllPreHashed=true cardinality=true}prehashed_a_l", "stats.field", "{!key=b cardinality=true}b_l", "stats.field", "{!key=c cardinality=true}c_l"), baseParams), cardinalityXpath("a", 100), cardinalityXpath("pa", 100), cardinalityXpath("b", 7), cardinalityXpath("c", 500));
// various ways of explicitly saying "don't bother to compute cardinality"
for (SolrParams p : new SolrParams[] { params("stats.field", "{!key=a min=true cardinality=false}a_l"), params("stats.field", "{!key=a min=true cardinality=$doit}a_l", "doit", "false"), // missing doit param
params("stats.field", "{!key=a min=true cardinality=$doit}a_l"), // other tunning options shouldn't change things
params("stats.field", "{!key=a min=true hllPreHashed=true cardinality=false}a_l"), params("stats.field", "{!key=a min=true hllRegwidth=4 cardinality=$doit}a_l", "doit", "false"), // missing doit param
params("stats.field", "{!key=a min=true hllLog2m=18 cardinality=$doit}a_l") }) {
assertQ("min w/cardinality explicitly disabled", req(p, baseParams), "count(//lst[@name='stats_fields']/lst[@name='a']/double[@name='min'])=1", "count(//lst[@name='stats_fields']/lst[@name='a']/long[@name='cardinality'])=0");
}
}
use of com.google.common.hash.HashFunction in project phoenix by apache.
the class SkipScanFilter method hashCode.
@Override
public int hashCode() {
HashFunction hf = Hashing.goodFastHash(32);
Hasher h = hf.newHasher();
h.putInt(slots.size());
for (int i = 0; i < slots.size(); i++) {
h.putInt(slots.get(i).size());
for (int j = 0; j < slots.size(); j++) {
h.putBytes(slots.get(i).get(j).getLowerRange());
h.putBytes(slots.get(i).get(j).getUpperRange());
}
}
return h.hash().asInt();
}
Aggregations