Search in sources :

Example 1 with Sampler

use of org.apache.accumulo.core.client.sample.Sampler in project accumulo by apache.

the class RFileTest method testSampleLG.

@Test
public void testSampleLG() throws IOException {
    int num = 5000;
    for (int sampleBufferSize : new int[] { 1 << 10, 1 << 20 }) {
        // force sample buffer to flush for smaller data
        RFile.setSampleBufferSize(sampleBufferSize);
        for (int modulus : new int[] { 19, 103, 1019 }) {
            List<Entry<Key, Value>> sampleDataLG1 = new ArrayList<>();
            List<Entry<Key, Value>> sampleDataLG2 = new ArrayList<>();
            ConfigurationCopy sampleConf = new ConfigurationCopy(conf == null ? DefaultConfiguration.getInstance() : conf);
            sampleConf.set(Property.TABLE_SAMPLER, RowSampler.class.getName());
            sampleConf.set(Property.TABLE_SAMPLER_OPTS + "hasher", "murmur3_32");
            sampleConf.set(Property.TABLE_SAMPLER_OPTS + "modulus", modulus + "");
            Sampler sampler = SamplerFactory.newSampler(SamplerConfigurationImpl.newSamplerConfig(sampleConf), sampleConf);
            TestRFile trf = new TestRFile(sampleConf);
            trf.openWriter(false, 1000);
            trf.writer.startNewLocalityGroup("meta-lg", newColFamByteSequence("metaA", "metaB"));
            for (int r = 0; r < num; r++) {
                String row = String.format("r%06d", r);
                Key k1 = new Key(row, "metaA", "q9", 7);
                Key k2 = new Key(row, "metaB", "q8", 7);
                Key k3 = new Key(row, "metaB", "qA", 7);
                Value v1 = new Value(("" + r).getBytes());
                Value v2 = new Value(("" + r * 93).getBytes());
                Value v3 = new Value(("" + r * 113).getBytes());
                if (sampler.accept(k1)) {
                    sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<>(k1, v1));
                    sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<>(k2, v2));
                    sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<>(k3, v3));
                }
                trf.writer.append(k1, v1);
                trf.writer.append(k2, v2);
                trf.writer.append(k3, v3);
            }
            trf.writer.startDefaultLocalityGroup();
            for (int r = 0; r < num; r++) {
                String row = String.format("r%06d", r);
                Key k1 = new Key(row, "dataA", "q9", 7);
                Value v1 = new Value(("" + r).getBytes());
                if (sampler.accept(k1)) {
                    sampleDataLG2.add(new AbstractMap.SimpleImmutableEntry<>(k1, v1));
                }
                trf.writer.append(k1, v1);
            }
            trf.closeWriter();
            Assert.assertTrue(sampleDataLG1.size() > 0);
            Assert.assertTrue(sampleDataLG2.size() > 0);
            trf.openReader(false);
            FileSKVIterator sample = trf.reader.getSample(SamplerConfigurationImpl.newSamplerConfig(sampleConf));
            checkSample(sample, sampleDataLG1, newColFamByteSequence("metaA", "metaB"), true);
            checkSample(sample, sampleDataLG1, newColFamByteSequence("metaA"), true);
            checkSample(sample, sampleDataLG1, newColFamByteSequence("metaB"), true);
            checkSample(sample, sampleDataLG1, newColFamByteSequence("dataA"), false);
            checkSample(sample, sampleDataLG2, newColFamByteSequence("metaA", "metaB"), false);
            checkSample(sample, sampleDataLG2, newColFamByteSequence("dataA"), true);
            ArrayList<Entry<Key, Value>> allSampleData = new ArrayList<>();
            allSampleData.addAll(sampleDataLG1);
            allSampleData.addAll(sampleDataLG2);
            Collections.sort(allSampleData, new Comparator<Entry<Key, Value>>() {

                @Override
                public int compare(Entry<Key, Value> o1, Entry<Key, Value> o2) {
                    return o1.getKey().compareTo(o2.getKey());
                }
            });
            checkSample(sample, allSampleData, newColFamByteSequence("dataA", "metaA"), true);
            checkSample(sample, allSampleData, EMPTY_COL_FAMS, false);
            trf.closeReader();
        }
    }
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) ArrayList(java.util.ArrayList) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) AbstractMap(java.util.AbstractMap) Entry(java.util.Map.Entry) Sampler(org.apache.accumulo.core.client.sample.Sampler) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) CryptoTest(org.apache.accumulo.core.security.crypto.CryptoTest) Test(org.junit.Test)

Example 2 with Sampler

use of org.apache.accumulo.core.client.sample.Sampler in project accumulo by apache.

the class InMemoryMapTest method runInterruptSampleTest.

private void runInterruptSampleTest(boolean deepCopy, boolean delete, boolean dcAfterDelete) throws Exception {
    SamplerConfigurationImpl sampleConfig1 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "2"));
    Sampler sampler = SamplerFactory.newSampler(sampleConfig1, DefaultConfiguration.getInstance());
    ConfigurationCopy config1 = newConfig(tempFolder.newFolder().getAbsolutePath());
    for (Entry<String, String> entry : sampleConfig1.toTablePropertiesMap().entrySet()) {
        config1.set(entry.getKey(), entry.getValue());
    }
    InMemoryMap imm = new InMemoryMap(config1);
    TreeMap<Key, Value> expectedSample = new TreeMap<>();
    TreeMap<Key, Value> expectedAll = new TreeMap<>();
    for (int r = 0; r < 1000; r++) {
        String row = String.format("r%06d", r);
        mutate(imm, row, "cf1:cq1", 5, "v" + (2 * r), sampler, expectedSample, expectedAll);
        mutate(imm, row, "cf2:cq2", 5, "v" + ((2 * r) + 1), sampler, expectedSample, expectedAll);
    }
    assertTrue(expectedSample.size() > 0);
    MemoryIterator miter = imm.skvIterator(sampleConfig1);
    AtomicBoolean iFlag = new AtomicBoolean(false);
    miter.setInterruptFlag(iFlag);
    SortedKeyValueIterator<Key, Value> iter = miter;
    if (delete && !dcAfterDelete) {
        imm.delete(0);
    }
    if (deepCopy) {
        iter = iter.deepCopy(new SampleIE(sampleConfig1));
    }
    if (delete && dcAfterDelete) {
        imm.delete(0);
    }
    assertEquals(expectedSample, readAll(iter));
    iFlag.set(true);
    try {
        readAll(iter);
        Assert.fail();
    } catch (IterationInterruptedException iie) {
    }
    miter.close();
}
Also used : ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) TreeMap(java.util.TreeMap) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Sampler(org.apache.accumulo.core.client.sample.Sampler) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) Value(org.apache.accumulo.core.data.Value) MemoryIterator(org.apache.accumulo.tserver.InMemoryMap.MemoryIterator) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) Key(org.apache.accumulo.core.data.Key)

Example 3 with Sampler

use of org.apache.accumulo.core.client.sample.Sampler in project accumulo by apache.

the class InMemoryMapTest method testDeferredSamplerCreation.

@Test
public void testDeferredSamplerCreation() throws Exception {
    SamplerConfigurationImpl sampleConfig1 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "9"));
    ConfigurationCopy config1 = newConfig(tempFolder.newFolder().getAbsolutePath());
    for (Entry<String, String> entry : sampleConfig1.toTablePropertiesMap().entrySet()) {
        config1.set(entry.getKey(), entry.getValue());
    }
    InMemoryMap imm = new InMemoryMap(config1);
    // change sampler config after creating in mem map.
    SamplerConfigurationImpl sampleConfig2 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "7"));
    for (Entry<String, String> entry : sampleConfig2.toTablePropertiesMap().entrySet()) {
        config1.set(entry.getKey(), entry.getValue());
    }
    TreeMap<Key, Value> expectedSample = new TreeMap<>();
    TreeMap<Key, Value> expectedAll = new TreeMap<>();
    Sampler sampler = SamplerFactory.newSampler(sampleConfig2, config1);
    for (int i = 0; i < 100; i++) {
        mutate(imm, "r" + i, "cf:cq", 5, "v" + i, sampler, expectedSample, expectedAll);
    }
    MemoryIterator iter = imm.skvIterator(sampleConfig2);
    iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
    Assert.assertEquals(expectedSample, readAll(iter));
    SortedKeyValueIterator<Key, Value> dc = iter.deepCopy(new SampleIE(sampleConfig2));
    dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
    Assert.assertEquals(expectedSample, readAll(dc));
    iter = imm.skvIterator(null);
    iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
    Assert.assertEquals(expectedAll, readAll(iter));
    iter = imm.skvIterator(sampleConfig1);
    thrown.expect(SampleNotPresentException.class);
    iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
}
Also used : ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) TreeMap(java.util.TreeMap) Range(org.apache.accumulo.core.data.Range) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) Sampler(org.apache.accumulo.core.client.sample.Sampler) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) Value(org.apache.accumulo.core.data.Value) MemoryIterator(org.apache.accumulo.tserver.InMemoryMap.MemoryIterator) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 4 with Sampler

use of org.apache.accumulo.core.client.sample.Sampler in project accumulo by apache.

the class SamplerFactory method newSampler.

public static Sampler newSampler(SamplerConfigurationImpl config, AccumuloConfiguration acuconf, boolean useAccumuloStart) throws IOException {
    String context = acuconf.get(Property.TABLE_CLASSPATH);
    Class<? extends Sampler> clazz;
    try {
        if (!useAccumuloStart)
            clazz = SamplerFactory.class.getClassLoader().loadClass(config.getClassName()).asSubclass(Sampler.class);
        if (context != null && !context.equals(""))
            clazz = AccumuloVFSClassLoader.getContextManager().loadClass(context, config.getClassName(), Sampler.class);
        else
            clazz = AccumuloVFSClassLoader.loadClass(config.getClassName(), Sampler.class);
        Sampler sampler = clazz.newInstance();
        sampler.init(config.toSamplerConfiguration());
        return sampler;
    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
        throw new RuntimeException(e);
    }
}
Also used : Sampler(org.apache.accumulo.core.client.sample.Sampler)

Example 5 with Sampler

use of org.apache.accumulo.core.client.sample.Sampler in project accumulo by apache.

the class InMemoryMapTest method testSample.

@Test
public void testSample() throws Exception {
    SamplerConfigurationImpl sampleConfig = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "7"));
    Sampler sampler = SamplerFactory.newSampler(sampleConfig, DefaultConfiguration.getInstance());
    ConfigurationCopy config1 = newConfig(tempFolder.newFolder().getAbsolutePath());
    for (Entry<String, String> entry : sampleConfig.toTablePropertiesMap().entrySet()) {
        config1.set(entry.getKey(), entry.getValue());
    }
    ConfigurationCopy config2 = newConfig(tempFolder.newFolder().getAbsolutePath());
    config2.set(Property.TABLE_LOCALITY_GROUP_PREFIX + "lg1", LocalityGroupUtil.encodeColumnFamilies(toTextSet("cf2")));
    config2.set(Property.TABLE_LOCALITY_GROUPS.getKey(), "lg1");
    for (Entry<String, String> entry : sampleConfig.toTablePropertiesMap().entrySet()) {
        config2.set(entry.getKey(), entry.getValue());
    }
    for (ConfigurationCopy config : Arrays.asList(config1, config2)) {
        InMemoryMap imm = new InMemoryMap(config);
        TreeMap<Key, Value> expectedSample = new TreeMap<>();
        TreeMap<Key, Value> expectedAll = new TreeMap<>();
        TreeMap<Key, Value> expectedNone = new TreeMap<>();
        MemoryIterator iter0 = imm.skvIterator(sampleConfig);
        for (int r = 0; r < 100; r++) {
            String row = String.format("r%06d", r);
            mutate(imm, row, "cf1:cq1", 5, "v" + (2 * r), sampler, expectedSample, expectedAll);
            mutate(imm, row, "cf2:cq2", 5, "v" + ((2 * r) + 1), sampler, expectedSample, expectedAll);
        }
        assertTrue(expectedSample.size() > 0);
        MemoryIterator iter1 = imm.skvIterator(sampleConfig);
        MemoryIterator iter2 = imm.skvIterator(null);
        SortedKeyValueIterator<Key, Value> iter0dc1 = iter0.deepCopy(new SampleIE());
        SortedKeyValueIterator<Key, Value> iter0dc2 = iter0.deepCopy(new SampleIE(sampleConfig));
        SortedKeyValueIterator<Key, Value> iter1dc1 = iter1.deepCopy(new SampleIE());
        SortedKeyValueIterator<Key, Value> iter1dc2 = iter1.deepCopy(new SampleIE(sampleConfig));
        SortedKeyValueIterator<Key, Value> iter2dc1 = iter2.deepCopy(new SampleIE());
        SortedKeyValueIterator<Key, Value> iter2dc2 = iter2.deepCopy(new SampleIE(sampleConfig));
        assertEquals(expectedNone, readAll(iter0));
        assertEquals(expectedNone, readAll(iter0dc1));
        assertEquals(expectedNone, readAll(iter0dc2));
        assertEquals(expectedSample, readAll(iter1));
        assertEquals(expectedAll, readAll(iter2));
        assertEquals(expectedAll, readAll(iter1dc1));
        assertEquals(expectedAll, readAll(iter2dc1));
        assertEquals(expectedSample, readAll(iter1dc2));
        assertEquals(expectedSample, readAll(iter2dc2));
        imm.delete(0);
        assertEquals(expectedNone, readAll(iter0));
        assertEquals(expectedNone, readAll(iter0dc1));
        assertEquals(expectedNone, readAll(iter0dc2));
        assertEquals(expectedSample, readAll(iter1));
        assertEquals(expectedAll, readAll(iter2));
        assertEquals(expectedAll, readAll(iter1dc1));
        assertEquals(expectedAll, readAll(iter2dc1));
        assertEquals(expectedSample, readAll(iter1dc2));
        assertEquals(expectedSample, readAll(iter2dc2));
        SortedKeyValueIterator<Key, Value> iter0dc3 = iter0.deepCopy(new SampleIE());
        SortedKeyValueIterator<Key, Value> iter0dc4 = iter0.deepCopy(new SampleIE(sampleConfig));
        SortedKeyValueIterator<Key, Value> iter1dc3 = iter1.deepCopy(new SampleIE());
        SortedKeyValueIterator<Key, Value> iter1dc4 = iter1.deepCopy(new SampleIE(sampleConfig));
        SortedKeyValueIterator<Key, Value> iter2dc3 = iter2.deepCopy(new SampleIE());
        SortedKeyValueIterator<Key, Value> iter2dc4 = iter2.deepCopy(new SampleIE(sampleConfig));
        assertEquals(expectedNone, readAll(iter0dc3));
        assertEquals(expectedNone, readAll(iter0dc4));
        assertEquals(expectedAll, readAll(iter1dc3));
        assertEquals(expectedAll, readAll(iter2dc3));
        assertEquals(expectedSample, readAll(iter1dc4));
        assertEquals(expectedSample, readAll(iter2dc4));
        iter1.close();
        iter2.close();
    }
}
Also used : ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) TreeMap(java.util.TreeMap) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) Sampler(org.apache.accumulo.core.client.sample.Sampler) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) Value(org.apache.accumulo.core.data.Value) MemoryIterator(org.apache.accumulo.tserver.InMemoryMap.MemoryIterator) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

Sampler (org.apache.accumulo.core.client.sample.Sampler)7 RowSampler (org.apache.accumulo.core.client.sample.RowSampler)5 ConfigurationCopy (org.apache.accumulo.core.conf.ConfigurationCopy)5 Key (org.apache.accumulo.core.data.Key)5 Value (org.apache.accumulo.core.data.Value)5 SamplerConfigurationImpl (org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl)4 Test (org.junit.Test)4 TreeMap (java.util.TreeMap)3 MemoryIterator (org.apache.accumulo.tserver.InMemoryMap.MemoryIterator)3 ArrayList (java.util.ArrayList)2 Entry (java.util.Map.Entry)2 PartialKey (org.apache.accumulo.core.data.PartialKey)2 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)2 CryptoTest (org.apache.accumulo.core.security.crypto.CryptoTest)2 HashCode (com.google.common.hash.HashCode)1 Hasher (com.google.common.hash.Hasher)1 AbstractMap (java.util.AbstractMap)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)1 Range (org.apache.accumulo.core.data.Range)1