Search in sources :

Example 31 with SummarizerConfiguration

use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.

the class SummaryIT method testLocalityGroups.

@Test
public void testLocalityGroups() throws Exception {
    final String table = getUniqueNames(1)[0];
    Connector c = getConnector();
    NewTableConfiguration ntc = new NewTableConfiguration();
    SummarizerConfiguration sc1 = SummarizerConfiguration.builder(FamilySummarizer.class).build();
    SummarizerConfiguration sc2 = SummarizerConfiguration.builder(BasicSummarizer.class).build();
    ntc.enableSummarization(sc1, sc2);
    c.tableOperations().create(table, ntc);
    Map<String, Set<Text>> lgroups = new HashMap<>();
    lgroups.put("lg1", ImmutableSet.of(new Text("chocolate"), new Text("coffee")));
    lgroups.put("lg2", ImmutableSet.of(new Text(" broccoli "), new Text("cabbage")));
    c.tableOperations().setLocalityGroups(table, lgroups);
    Map<Key, Value> expected = new HashMap<>();
    try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
        write(bw, expected, "order:001", "chocolate", "dark", 3l, "99kg");
        write(bw, expected, "order:001", "chocolate", "light", 4l, "94kg");
        write(bw, expected, "order:001", "coffee", "dark", 5l, "33kg");
        write(bw, expected, "order:001", "broccoli", "crowns", 6l, "2kg");
        write(bw, expected, "order:001", "cheddar", "canadian", 7l, "40kg");
        write(bw, expected, "order:653", "chocolate", "dark", 3l, "3kg");
        write(bw, expected, "order:653", "chocolate", "light", 4l, "4kg");
        write(bw, expected, "order:653", "coffee", "dark", 5l, "2kg");
        write(bw, expected, "order:653", "broccoli", "crowns", 6l, "105kg");
        write(bw, expected, "order:653", "cabbage", "heads", 7l, "199kg");
        write(bw, expected, "order:653", "cheddar", "canadian", 8l, "43kg");
    }
    List<Summary> summaries = c.tableOperations().summaries(table).flush(true).retrieve();
    Assert.assertEquals(2, summaries.stream().map(Summary::getSummarizerConfiguration).distinct().count());
    for (Summary summary : summaries) {
        if (summary.getSummarizerConfiguration().equals(sc1)) {
            Map<String, Long> expectedStats = nm("c:chocolate", 4l, "c:coffee", 2l, "c:broccoli", 2l, "c:cheddar", 2l, "c:cabbage", 1l, TOO_LONG_STAT, 0l, TOO_MANY_STAT, 0l, SEEN_STAT, 11l, EMITTED_STAT, 11l, DELETES_IGNORED_STAT, 0l);
            Assert.assertEquals(expectedStats, summary.getStatistics());
            Assert.assertEquals(0, summary.getFileStatistics().getInaccurate());
            Assert.assertEquals(1, summary.getFileStatistics().getTotal());
        } else if (summary.getSummarizerConfiguration().equals(sc2)) {
            Map<String, Long> expectedStats = nm(DELETES_STAT, 0l, TOTAL_STAT, 11l, MIN_TIMESTAMP_STAT, 3l, MAX_TIMESTAMP_STAT, 8l);
            Assert.assertEquals(expectedStats, summary.getStatistics());
            Assert.assertEquals(0, summary.getFileStatistics().getInaccurate());
            Assert.assertEquals(1, summary.getFileStatistics().getTotal());
        } else {
            Assert.fail("unexpected summary config " + summary.getSummarizerConfiguration());
        }
    }
    Map<Key, Value> actual = new HashMap<>();
    c.createScanner(table, Authorizations.EMPTY).forEach(e -> actual.put(e.getKey(), e.getValue()));
    Assert.assertEquals(expected, actual);
}
Also used : Connector(org.apache.accumulo.core.client.Connector) SortedSet(java.util.SortedSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) TreeSet(java.util.TreeSet) HashMap(java.util.HashMap) Text(org.apache.hadoop.io.Text) FamilySummarizer(org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer) NewTableConfiguration(org.apache.accumulo.core.client.admin.NewTableConfiguration) Value(org.apache.accumulo.core.data.Value) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Summary(org.apache.accumulo.core.client.summary.Summary) BatchWriter(org.apache.accumulo.core.client.BatchWriter) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 32 with SummarizerConfiguration

use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.

the class SummaryIT method testBuggySummarizer.

@Test
public void testBuggySummarizer() throws Exception {
    final String table = getUniqueNames(1)[0];
    Connector c = getConnector();
    NewTableConfiguration ntc = new NewTableConfiguration();
    SummarizerConfiguration sc1 = SummarizerConfiguration.builder(BuggySummarizer.class).build();
    ntc.enableSummarization(sc1);
    c.tableOperations().create(table, ntc);
    // add a single split so that summary stats merge is forced
    c.tableOperations().addSplits(table, new TreeSet<>(Collections.singleton(new Text("g"))));
    try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
        write(bw, "bar1", "f1", "q1", "v1");
        write(bw, "bar2", "f1", "q1", "v2");
        write(bw, "foo1", "f1", "q1", "v3");
    }
    c.tableOperations().flush(table, null, null, true);
    try {
        c.tableOperations().summaries(table).retrieve();
        Assert.fail("Expected server side failure and did not see it");
    } catch (AccumuloServerException ase) {
    }
}
Also used : AccumuloServerException(org.apache.accumulo.core.client.impl.AccumuloServerException) Connector(org.apache.accumulo.core.client.Connector) NewTableConfiguration(org.apache.accumulo.core.client.admin.NewTableConfiguration) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) Text(org.apache.hadoop.io.Text) BatchWriter(org.apache.accumulo.core.client.BatchWriter) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Test(org.junit.Test)

Example 33 with SummarizerConfiguration

use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.

the class SummaryIT method selectionTest.

@Test
public void selectionTest() throws Exception {
    final String table = getUniqueNames(1)[0];
    Connector c = getConnector();
    NewTableConfiguration ntc = new NewTableConfiguration();
    SummarizerConfiguration sc1 = SummarizerConfiguration.builder(BasicSummarizer.class).build();
    SummarizerConfiguration sc2 = SummarizerConfiguration.builder(KeySizeSummarizer.class).addOption("maxLen", "512").build();
    ntc.enableSummarization(sc1, sc2);
    c.tableOperations().create(table, ntc);
    BatchWriter bw = writeData(table, c);
    bw.close();
    c.tableOperations().flush(table, null, null, true);
    LongSummaryStatistics stats = getTimestampStats(table, c);
    Collection<Summary> summaries = c.tableOperations().summaries(table).withConfiguration(sc2).retrieve();
    Assert.assertEquals(1, summaries.size());
    checkSummary(summaries, sc2, "len=14", 100_000l);
    summaries = c.tableOperations().summaries(table).withConfiguration(sc1).retrieve();
    Assert.assertEquals(1, summaries.size());
    checkSummary(summaries, sc1, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
    // retrieve a non-existant summary
    SummarizerConfiguration sc3 = SummarizerConfiguration.builder(KeySizeSummarizer.class.getName()).addOption("maxLen", "256").build();
    summaries = c.tableOperations().summaries(table).withConfiguration(sc3).retrieve();
    Assert.assertEquals(0, summaries.size());
    summaries = c.tableOperations().summaries(table).withConfiguration(sc1, sc2).retrieve();
    Assert.assertEquals(2, summaries.size());
    checkSummary(summaries, sc1, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
    checkSummary(summaries, sc2, "len=14", 100_000l);
    summaries = c.tableOperations().summaries(table).retrieve();
    Assert.assertEquals(2, summaries.size());
    checkSummary(summaries, sc1, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
    checkSummary(summaries, sc2, "len=14", 100_000l);
    summaries = c.tableOperations().summaries(table).withMatchingConfiguration(".*BasicSummarizer \\{\\}.*").retrieve();
    Assert.assertEquals(1, summaries.size());
    checkSummary(summaries, sc1, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
    summaries = c.tableOperations().summaries(table).withMatchingConfiguration(".*KeySizeSummarizer \\{maxLen=512\\}.*").retrieve();
    Assert.assertEquals(1, summaries.size());
    checkSummary(summaries, sc2, "len=14", 100_000l);
    summaries = c.tableOperations().summaries(table).withMatchingConfiguration(".*KeySizeSummarizer \\{maxLen=256\\}.*").retrieve();
    Assert.assertEquals(0, summaries.size());
    summaries = c.tableOperations().summaries(table).withMatchingConfiguration(".*BasicSummarizer \\{\\}.*").withConfiguration(sc2).retrieve();
    Assert.assertEquals(2, summaries.size());
    checkSummary(summaries, sc1, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
    checkSummary(summaries, sc2, "len=14", 100_000l);
    // Ensure a bad regex fails fast.
    try {
        summaries = c.tableOperations().summaries(table).withMatchingConfiguration(".*KeySizeSummarizer {maxLen=256}.*").retrieve();
        Assert.fail("Bad regex should have caused exception");
    } catch (PatternSyntaxException e) {
    }
}
Also used : LongSummaryStatistics(java.util.LongSummaryStatistics) Connector(org.apache.accumulo.core.client.Connector) NewTableConfiguration(org.apache.accumulo.core.client.admin.NewTableConfiguration) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Summary(org.apache.accumulo.core.client.summary.Summary) BatchWriter(org.apache.accumulo.core.client.BatchWriter) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) PatternSyntaxException(java.util.regex.PatternSyntaxException) Test(org.junit.Test)

Example 34 with SummarizerConfiguration

use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.

the class SummaryIT method testPermissions.

@Test
public void testPermissions() throws Exception {
    final String table = getUniqueNames(1)[0];
    Connector c = getConnector();
    NewTableConfiguration ntc = new NewTableConfiguration();
    SummarizerConfiguration sc1 = SummarizerConfiguration.builder(FooCounter.class).build();
    ntc.enableSummarization(sc1);
    c.tableOperations().create(table, ntc);
    try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
        write(bw, "bar1", "f1", "q1", "v1");
        write(bw, "bar2", "f1", "q1", "v2");
        write(bw, "foo1", "f1", "q1", "v3");
    }
    c.tableOperations().flush(table, null, null, true);
    PasswordToken passTok = new PasswordToken("letmesee");
    c.securityOperations().createLocalUser("user1", passTok);
    String instanceName = c.getInstance().getInstanceName();
    String zookeepers = c.getInstance().getZooKeepers();
    Connector c2 = new ZooKeeperInstance(instanceName, zookeepers).getConnector("user1", passTok);
    try {
        c2.tableOperations().summaries(table).retrieve();
        Assert.fail("Expected operation to fail because user does not have permssion to get summaries");
    } catch (AccumuloSecurityException ase) {
        Assert.assertEquals(SecurityErrorCode.PERMISSION_DENIED, ase.getSecurityErrorCode());
    }
    c.securityOperations().grantTablePermission("user1", table, TablePermission.GET_SUMMARIES);
    int tries = 0;
    while (tries < 10) {
        try {
            Summary summary = c2.tableOperations().summaries(table).retrieve().get(0);
            Assert.assertEquals(2, summary.getStatistics().size());
            Assert.assertEquals(2l, (long) summary.getStatistics().getOrDefault("bars", 0l));
            Assert.assertEquals(1l, (long) summary.getStatistics().getOrDefault("foos", 0l));
            break;
        } catch (AccumuloSecurityException ase) {
            UtilWaitThread.sleep(500);
            tries++;
        }
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) NewTableConfiguration(org.apache.accumulo.core.client.admin.NewTableConfiguration) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Summary(org.apache.accumulo.core.client.summary.Summary) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) BatchWriter(org.apache.accumulo.core.client.BatchWriter) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) Test(org.junit.Test)

Example 35 with SummarizerConfiguration

use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.

the class SummaryIT method compactionTest.

@Test
public void compactionTest() throws Exception {
    final String table = getUniqueNames(1)[0];
    Connector c = getConnector();
    NewTableConfiguration ntc = new NewTableConfiguration();
    SummarizerConfiguration sc1 = SummarizerConfiguration.builder(FooCounter.class.getName()).build();
    ntc.enableSummarization(sc1);
    c.tableOperations().create(table, ntc);
    try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
        write(bw, "bar1", "f1", "q1", "v1");
        write(bw, "bar2", "f1", "q1", "v2");
        write(bw, "foo1", "f1", "q1", "v3");
    }
    // Create a compaction config that will filter out foos if there are too many. Uses summary data to know if there are too many foos.
    CompactionStrategyConfig csc = new CompactionStrategyConfig(FooCS.class.getName());
    List<IteratorSetting> iterators = Collections.singletonList(new IteratorSetting(100, FooFilter.class));
    CompactionConfig compactConfig = new CompactionConfig().setFlush(true).setCompactionStrategy(csc).setIterators(iterators).setWait(true);
    // this compaction should make no changes because there are less foos than bars
    c.tableOperations().compact(table, compactConfig);
    try (Scanner scanner = c.createScanner(table, Authorizations.EMPTY)) {
        Stream<Entry<Key, Value>> stream = StreamSupport.stream(scanner.spliterator(), false);
        Map<String, Long> counts = // convert to row
        stream.map(e -> e.getKey().getRowData().toString()).map(// strip numbers off row
        r -> r.replaceAll("[0-9]+", "")).collect(// count different row types
        groupingBy(identity(), counting()));
        Assert.assertEquals(1l, (long) counts.getOrDefault("foo", 0l));
        Assert.assertEquals(2l, (long) counts.getOrDefault("bar", 0l));
        Assert.assertEquals(2, counts.size());
    }
    try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
        write(bw, "foo2", "f1", "q1", "v4");
        write(bw, "foo3", "f1", "q1", "v5");
        write(bw, "foo4", "f1", "q1", "v6");
    }
    // this compaction should remove all foos because there are more foos than bars
    c.tableOperations().compact(table, compactConfig);
    try (Scanner scanner = c.createScanner(table, Authorizations.EMPTY)) {
        Stream<Entry<Key, Value>> stream = StreamSupport.stream(scanner.spliterator(), false);
        Map<String, Long> counts = // convert to row
        stream.map(e -> e.getKey().getRowData().toString()).map(// strip numbers off row
        r -> r.replaceAll("[0-9]+", "")).collect(// count different row types
        groupingBy(identity(), counting()));
        Assert.assertEquals(0l, (long) counts.getOrDefault("foo", 0l));
        Assert.assertEquals(2l, (long) counts.getOrDefault("bar", 0l));
        Assert.assertEquals(1, counts.size());
    }
}
Also used : Arrays(java.util.Arrays) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) SortedSet(java.util.SortedSet) Collectors.counting(java.util.stream.Collectors.counting) DELETES_IGNORED_STAT(org.apache.accumulo.core.client.summary.CountingSummarizer.DELETES_IGNORED_STAT) TOO_MANY_STAT(org.apache.accumulo.core.client.summary.CountingSummarizer.TOO_MANY_STAT) AccumuloServerException(org.apache.accumulo.core.client.impl.AccumuloServerException) Text(org.apache.hadoop.io.Text) Random(java.util.Random) Mutation(org.apache.accumulo.core.data.Mutation) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) MIN_TIMESTAMP_STAT(org.apache.accumulo.test.functional.BasicSummarizer.MIN_TIMESTAMP_STAT) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) Summarizer(org.apache.accumulo.core.client.summary.Summarizer) TOTAL_STAT(org.apache.accumulo.test.functional.BasicSummarizer.TOTAL_STAT) Map(java.util.Map) Value(org.apache.accumulo.core.data.Value) PatternSyntaxException(java.util.regex.PatternSyntaxException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TOO_LONG_STAT(org.apache.accumulo.core.client.summary.CountingSummarizer.TOO_LONG_STAT) Collection(java.util.Collection) MutationsRejectedException(org.apache.accumulo.core.client.MutationsRejectedException) Set(java.util.Set) SecurityErrorCode(org.apache.accumulo.core.client.security.SecurityErrorCode) CompactionStrategyConfig(org.apache.accumulo.core.client.admin.CompactionStrategyConfig) List(java.util.List) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) Filter(org.apache.accumulo.core.iterators.Filter) Stream(java.util.stream.Stream) FamilySummarizer(org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer) DELETES_STAT(org.apache.accumulo.test.functional.BasicSummarizer.DELETES_STAT) Function.identity(java.util.function.Function.identity) Entry(java.util.Map.Entry) Scanner(org.apache.accumulo.core.client.Scanner) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) SEEN_STAT(org.apache.accumulo.core.client.summary.CountingSummarizer.SEEN_STAT) CompactionPlan(org.apache.accumulo.tserver.compaction.CompactionPlan) Iterables(com.google.common.collect.Iterables) VisibilitySummarizer(org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) UtilWaitThread(org.apache.accumulo.fate.util.UtilWaitThread) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) FileStatistics(org.apache.accumulo.core.client.summary.Summary.FileStatistics) HashMap(java.util.HashMap) CompactionConfig(org.apache.accumulo.core.client.admin.CompactionConfig) TreeSet(java.util.TreeSet) Connector(org.apache.accumulo.core.client.Connector) Builder(com.google.common.collect.ImmutableMap.Builder) NewTableConfiguration(org.apache.accumulo.core.client.admin.NewTableConfiguration) AccumuloClusterHarness(org.apache.accumulo.harness.AccumuloClusterHarness) TablePermission(org.apache.accumulo.core.security.TablePermission) Lists(com.google.common.collect.Lists) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Key(org.apache.accumulo.core.data.Key) EMITTED_STAT(org.apache.accumulo.core.client.summary.CountingSummarizer.EMITTED_STAT) MAX_TIMESTAMP_STAT(org.apache.accumulo.test.functional.BasicSummarizer.MAX_TIMESTAMP_STAT) StreamSupport(java.util.stream.StreamSupport) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) LongSummaryStatistics(java.util.LongSummaryStatistics) Summary(org.apache.accumulo.core.client.summary.Summary) MajorCompactionRequest(org.apache.accumulo.tserver.compaction.MajorCompactionRequest) IOException(java.io.IOException) Test(org.junit.Test) Authorizations(org.apache.accumulo.core.security.Authorizations) AccumuloException(org.apache.accumulo.core.client.AccumuloException) Range(org.apache.accumulo.core.data.Range) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) BatchWriter(org.apache.accumulo.core.client.BatchWriter) CompactionStrategy(org.apache.accumulo.tserver.compaction.CompactionStrategy) Assert(org.junit.Assert) Collections(java.util.Collections) Connector(org.apache.accumulo.core.client.Connector) Scanner(org.apache.accumulo.core.client.Scanner) CompactionStrategyConfig(org.apache.accumulo.core.client.admin.CompactionStrategyConfig) Entry(java.util.Map.Entry) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) NewTableConfiguration(org.apache.accumulo.core.client.admin.NewTableConfiguration) CompactionConfig(org.apache.accumulo.core.client.admin.CompactionConfig) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) BatchWriter(org.apache.accumulo.core.client.BatchWriter) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Test(org.junit.Test)

Aggregations

SummarizerConfiguration (org.apache.accumulo.core.client.summary.SummarizerConfiguration)41 Test (org.junit.Test)33 HashMap (java.util.HashMap)28 Key (org.apache.accumulo.core.data.Key)22 Value (org.apache.accumulo.core.data.Value)22 Collector (org.apache.accumulo.core.client.summary.Summarizer.Collector)19 EntryLengthSummarizer (org.apache.accumulo.core.client.summary.summarizers.EntryLengthSummarizer)16 Summary (org.apache.accumulo.core.client.summary.Summary)13 NewTableConfiguration (org.apache.accumulo.core.client.admin.NewTableConfiguration)10 CounterSummary (org.apache.accumulo.core.client.summary.CounterSummary)10 Connector (org.apache.accumulo.core.client.Connector)9 BatchWriter (org.apache.accumulo.core.client.BatchWriter)8 Text (org.apache.hadoop.io.Text)8 FamilySummarizer (org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer)7 ArrayList (java.util.ArrayList)6 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)6 IOException (java.io.IOException)5 Collection (java.util.Collection)5 Map (java.util.Map)5 Entry (java.util.Map.Entry)5