Search in sources :

Example 6 with HashFunction

use of com.google.common.hash.HashFunction in project hive by apache.

the class TestMurmur3 method testHashCodesM3_128_string.

@Test
public void testHashCodesM3_128_string() {
    String key = "test";
    int seed = 123;
    HashFunction hf = Hashing.murmur3_128(seed);
    // guava stores the hashcodes in little endian order
    ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
    buf.put(hf.hashBytes(key.getBytes()).asBytes());
    buf.flip();
    long gl1 = buf.getLong();
    long gl2 = buf.getLong(8);
    long[] hc = Murmur3.hash128(key.getBytes(), 0, key.getBytes().length, seed);
    long m1 = hc[0];
    long m2 = hc[1];
    assertEquals(gl1, m1);
    assertEquals(gl2, m2);
    key = "testkey128_testkey128";
    buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
    buf.put(hf.hashBytes(key.getBytes()).asBytes());
    buf.flip();
    gl1 = buf.getLong();
    gl2 = buf.getLong(8);
    byte[] keyBytes = key.getBytes();
    hc = Murmur3.hash128(keyBytes, 0, keyBytes.length, seed);
    m1 = hc[0];
    m2 = hc[1];
    assertEquals(gl1, m1);
    assertEquals(gl2, m2);
    byte[] offsetKeyBytes = new byte[keyBytes.length + 35];
    Arrays.fill(offsetKeyBytes, (byte) -1);
    System.arraycopy(keyBytes, 0, offsetKeyBytes, 35, keyBytes.length);
    hc = Murmur3.hash128(offsetKeyBytes, 35, keyBytes.length, seed);
    assertEquals(gl1, hc[0]);
    assertEquals(gl2, hc[1]);
}
Also used : HashFunction(com.google.common.hash.HashFunction) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 7 with HashFunction

use of com.google.common.hash.HashFunction in project druid by druid-io.

the class CachingClusteredClientTest method testGroupByCaching.

@Test
public void testGroupByCaching() throws Exception {
    List<AggregatorFactory> aggsWithUniques = ImmutableList.<AggregatorFactory>builder().addAll(AGGS).add(new HyperUniquesAggregatorFactory("uniques", "uniques")).build();
    final HashFunction hashFn = Hashing.murmur3_128();
    GroupByQuery.Builder builder = new GroupByQuery.Builder().setDataSource(DATA_SOURCE).setQuerySegmentSpec(SEG_SPEC).setDimFilter(DIM_FILTER).setGranularity(GRANULARITY).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec("a", "a"))).setAggregatorSpecs(aggsWithUniques).setPostAggregatorSpecs(POST_AGGS).setContext(CONTEXT);
    final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
    collector.add(hashFn.hashString("abc123", Charsets.UTF_8).asBytes());
    collector.add(hashFn.hashString("123abc", Charsets.UTF_8).asBytes());
    testQueryCaching(client, builder.build(), new Interval("2011-01-01/2011-01-02"), makeGroupByResults(new DateTime("2011-01-01"), ImmutableMap.of("a", "a", "rows", 1, "imps", 1, "impers", 1, "uniques", collector)), new Interval("2011-01-02/2011-01-03"), makeGroupByResults(new DateTime("2011-01-02"), ImmutableMap.of("a", "b", "rows", 2, "imps", 2, "impers", 2, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)));
    QueryRunner runner = new FinalizeResultsQueryRunner(client, GroupByQueryRunnerTest.makeQueryRunnerFactory(new GroupByQueryConfig()).getToolchest());
    HashMap<String, Object> context = new HashMap<String, Object>();
    TestHelper.assertExpectedObjects(makeGroupByResults(new DateTime("2011-01-05T"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), runner.run(builder.setInterval("2011-01-05/2011-01-10").build(), context), "");
}
Also used : DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) HashMap(java.util.HashMap) HyperLogLogCollector(io.druid.hll.HyperLogLogCollector) TopNQueryBuilder(io.druid.query.topn.TopNQueryBuilder) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) DateTime(org.joda.time.DateTime) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) QueryRunner(io.druid.query.QueryRunner) GroupByQuery(io.druid.query.groupby.GroupByQuery) HashFunction(com.google.common.hash.HashFunction) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) Interval(org.joda.time.Interval) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 8 with HashFunction

use of com.google.common.hash.HashFunction in project druid by druid-io.

the class HyperLogLogCollectorTest method showErrorRate.

// Provides a nice printout of error rates as a function of cardinality
@Ignore
@Test
public void showErrorRate() throws Exception {
    HashFunction fn = Hashing.murmur3_128();
    Random random = new Random();
    double error = 0.0d;
    int count = 0;
    final int[] valsToCheck = { 10, 20, 50, 100, 1000, 2000, 5000, 10000, 20000, 50000, 100000, 1000000, 2000000, 10000000, Integer.MAX_VALUE };
    for (int numThings : valsToCheck) {
        long startTime = System.currentTimeMillis();
        HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
        for (int i = 0; i < numThings; ++i) {
            if (i != 0 && i % 100000000 == 0) {
                ++count;
                error = computeError(error, count, i, startTime, collector);
            }
            collector.add(fn.hashLong(random.nextLong()).asBytes());
        }
        ++count;
        error = computeError(error, count, numThings, startTime, collector);
    }
}
Also used : HashFunction(com.google.common.hash.HashFunction) Random(java.util.Random) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 9 with HashFunction

use of com.google.common.hash.HashFunction in project lucene-solr by apache.

the class StatsComponentTest method testCardinality.

/** @see #testHllOptions */
public void testCardinality() throws Exception {
    SolrCore core = h.getCore();
    // insure we have the same hasher a_l would use
    HashFunction hasher = HllOptions.parseHllOptions(params("cardinality", "true"), core.getLatestSchema().getField("a_l")).getHasher();
    String[] baseParams = new String[] { "q", "*:*", "stats", "true", "indent", "true", "rows", "0" };
    assertQ("empty cardinalities", req(params("stats.field", "{!key=a cardinality=true}a_l", "stats.field", "{!key=pa cardinality=true}prehashed_a_l", "stats.field", "{!key=b cardinality=true}b_l", "stats.field", "{!key=c cardinality=true}c_l"), baseParams), cardinalityXpath("a", 0), cardinalityXpath("pa", 0), cardinalityXpath("b", 0), cardinalityXpath("c", 0));
    int id = 0;
    // add trivial docs to test basic cardinality
    for (int i = 0; i < 100; i++) {
        // add the same values multiple times (diff docs)
        for (int j = 0; j < 5; j++) {
            ++id;
            assertU(adoc("id", "" + id, "a_l", "" + i, "prehashed_a_l", "" + hasher.hashLong((long) i).asLong(), "b_l", "" + (i % 7), "c_l", "" + id));
        }
    }
    assertU(commit());
    assertQ("various cardinalities", req(params("stats.field", "{!key=a cardinality=true}a_l", "stats.field", "{!key=pa hllPreHashed=true cardinality=true}prehashed_a_l", "stats.field", "{!key=b cardinality=true}b_l", "stats.field", "{!key=c cardinality=true}c_l"), baseParams), cardinalityXpath("a", 100), cardinalityXpath("pa", 100), cardinalityXpath("b", 7), cardinalityXpath("c", 500));
    // various ways of explicitly saying "don't bother to compute cardinality"
    for (SolrParams p : new SolrParams[] { params("stats.field", "{!key=a min=true cardinality=false}a_l"), params("stats.field", "{!key=a min=true cardinality=$doit}a_l", "doit", "false"), // missing doit param
    params("stats.field", "{!key=a min=true cardinality=$doit}a_l"), // other tunning options shouldn't change things
    params("stats.field", "{!key=a min=true hllPreHashed=true cardinality=false}a_l"), params("stats.field", "{!key=a min=true hllRegwidth=4 cardinality=$doit}a_l", "doit", "false"), // missing doit param
    params("stats.field", "{!key=a min=true hllLog2m=18 cardinality=$doit}a_l") }) {
        assertQ("min w/cardinality explicitly disabled", req(p, baseParams), "count(//lst[@name='stats_fields']/lst[@name='a']/double[@name='min'])=1", "count(//lst[@name='stats_fields']/lst[@name='a']/long[@name='cardinality'])=0");
    }
}
Also used : HashFunction(com.google.common.hash.HashFunction) SolrCore(org.apache.solr.core.SolrCore) SolrParams(org.apache.solr.common.params.SolrParams) MapSolrParams(org.apache.solr.common.params.MapSolrParams)

Example 10 with HashFunction

use of com.google.common.hash.HashFunction in project phoenix by apache.

the class SkipScanFilter method hashCode.

@Override
public int hashCode() {
    HashFunction hf = Hashing.goodFastHash(32);
    Hasher h = hf.newHasher();
    h.putInt(slots.size());
    for (int i = 0; i < slots.size(); i++) {
        h.putInt(slots.get(i).size());
        for (int j = 0; j < slots.size(); j++) {
            h.putBytes(slots.get(i).get(j).getLowerRange());
            h.putBytes(slots.get(i).get(j).getUpperRange());
        }
    }
    return h.hash().asInt();
}
Also used : Hasher(com.google.common.hash.Hasher) HashFunction(com.google.common.hash.HashFunction)

Aggregations

HashFunction (com.google.common.hash.HashFunction)23 Test (org.junit.Test)12 Random (java.util.Random)7 ByteBuffer (java.nio.ByteBuffer)5 HashCode (com.google.common.hash.HashCode)3 Hasher (com.google.common.hash.Hasher)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 BaseEncoding (com.google.common.io.BaseEncoding)2 IOException (java.io.IOException)2 ArrayDeque (java.util.ArrayDeque)2 HashSet (java.util.HashSet)2 SolrCore (org.apache.solr.core.SolrCore)2 Ignore (org.junit.Ignore)2 NonNull (com.android.annotations.NonNull)1 NSArray (com.dd.plist.NSArray)1 NSData (com.dd.plist.NSData)1 NSDate (com.dd.plist.NSDate)1 NSDictionary (com.dd.plist.NSDictionary)1 NSObject (com.dd.plist.NSObject)1