Search in sources :

Example 1 with FileStatistics

use of org.apache.accumulo.core.client.summary.Summary.FileStatistics in project accumulo by apache.

the class SummaryCollectionTest method testDeleted.

@Test
public void testDeleted() {
    SummarizerConfiguration conf = SummarizerConfiguration.builder(FamilySummarizer.class).build();
    HashMap<String, Long> stats = new HashMap<>();
    stats.put("c:foo", 9L);
    FileSummary fs1 = new FileSummary(conf, stats, false);
    SummaryCollection sc1 = new SummaryCollection(Collections.singleton(fs1));
    stats = new HashMap<>();
    stats.put("c:foo", 5L);
    stats.put("c:bar", 3L);
    FileSummary fs2 = new FileSummary(conf, stats, true);
    SummaryCollection sc2 = new SummaryCollection(Collections.singleton(fs2));
    SummaryCollection sc3 = new SummaryCollection(Collections.emptyList());
    SummaryCollection sc4 = new SummaryCollection(Collections.emptyList(), true);
    SummarizerFactory factory = new SummarizerFactory();
    SummaryCollection mergeSc = new SummaryCollection();
    for (SummaryCollection sc : Arrays.asList(sc1, sc2, sc3, sc4, sc4)) {
        mergeSc.merge(sc, factory);
    }
    for (SummaryCollection sc : Arrays.asList(mergeSc, new SummaryCollection(mergeSc.toThrift()))) {
        List<Summary> summaries = sc.getSummaries();
        Assert.assertEquals(1, summaries.size());
        Summary summary = summaries.get(0);
        FileStatistics filestats = summary.getFileStatistics();
        Assert.assertEquals(5, filestats.getTotal());
        Assert.assertEquals(1, filestats.getExtra());
        Assert.assertEquals(0, filestats.getLarge());
        Assert.assertEquals(1, filestats.getMissing());
        Assert.assertEquals(2, filestats.getDeleted());
        Assert.assertEquals(4, filestats.getInaccurate());
    }
}
Also used : FileStatistics(org.apache.accumulo.core.client.summary.Summary.FileStatistics) HashMap(java.util.HashMap) FamilySummarizer(org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer) FileSummary(org.apache.accumulo.core.summary.SummaryCollection.FileSummary) FileSummary(org.apache.accumulo.core.summary.SummaryCollection.FileSummary) Summary(org.apache.accumulo.core.client.summary.Summary) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Test(org.junit.Test)

Example 2 with FileStatistics

use of org.apache.accumulo.core.client.summary.Summary.FileStatistics in project accumulo by apache.

the class SummaryIT method testManyFiles.

@Test
public void testManyFiles() throws Exception {
    final String table = getUniqueNames(1)[0];
    Connector c = getConnector();
    NewTableConfiguration ntc = new NewTableConfiguration();
    ntc.enableSummarization(SummarizerConfiguration.builder(FamilySummarizer.class).build());
    c.tableOperations().create(table, ntc);
    Random rand = new Random(42);
    int q = 0;
    SortedSet<Text> partitionKeys = new TreeSet<>();
    for (int split = 100_000; split < 1_000_000; split += 100_000) {
        partitionKeys.add(new Text(String.format("%06d", split)));
    }
    c.tableOperations().addSplits(table, partitionKeys);
    Map<String, Long> famCounts = new HashMap<>();
    for (int t = 0; t < 20; t++) {
        // this loop should cause a varying number of files and compactions
        try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
            for (int i = 0; i < 10000; i++) {
                String row = String.format("%06d", rand.nextInt(1_000_000));
                String fam = String.format("%03d", rand.nextInt(100));
                String qual = String.format("%06d", q++);
                write(bw, row, fam, qual, "val");
                famCounts.merge(fam, 1L, Long::sum);
            }
        }
        List<Summary> summaries = c.tableOperations().summaries(table).flush(true).retrieve();
        Assert.assertEquals(1, summaries.size());
        CounterSummary cs = new CounterSummary(summaries.get(0));
        Assert.assertEquals(famCounts, cs.getCounters());
        FileStatistics fileStats = summaries.get(0).getFileStatistics();
        Assert.assertEquals(0, fileStats.getInaccurate());
        Assert.assertTrue("Saw " + fileStats.getTotal() + " files expected >=10", fileStats.getTotal() >= 10);
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) FileStatistics(org.apache.accumulo.core.client.summary.Summary.FileStatistics) HashMap(java.util.HashMap) Text(org.apache.hadoop.io.Text) Random(java.util.Random) NewTableConfiguration(org.apache.accumulo.core.client.admin.NewTableConfiguration) TreeSet(java.util.TreeSet) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Summary(org.apache.accumulo.core.client.summary.Summary) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Test(org.junit.Test)

Aggregations

HashMap (java.util.HashMap)2 Summary (org.apache.accumulo.core.client.summary.Summary)2 FileStatistics (org.apache.accumulo.core.client.summary.Summary.FileStatistics)2 Test (org.junit.Test)2 Random (java.util.Random)1 TreeSet (java.util.TreeSet)1 BatchWriter (org.apache.accumulo.core.client.BatchWriter)1 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)1 Connector (org.apache.accumulo.core.client.Connector)1 NewTableConfiguration (org.apache.accumulo.core.client.admin.NewTableConfiguration)1 CounterSummary (org.apache.accumulo.core.client.summary.CounterSummary)1 SummarizerConfiguration (org.apache.accumulo.core.client.summary.SummarizerConfiguration)1 FamilySummarizer (org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer)1 FileSummary (org.apache.accumulo.core.summary.SummaryCollection.FileSummary)1 Text (org.apache.hadoop.io.Text)1