Search in sources :

Example 1 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class AccumuloFileOutputFormatIT method handleWriteTests.

private void handleWriteTests(boolean content) throws Exception {
    File f = folder.newFile(testName.getMethodName());
    assertTrue(f.delete());
    MRTester.main(new String[] { content ? TEST_TABLE : EMPTY_TABLE, f.getAbsolutePath() });
    assertTrue(f.exists());
    File[] files = f.listFiles(new FileFilter() {

        @Override
        public boolean accept(File file) {
            return file.getName().startsWith("part-m-");
        }
    });
    assertNotNull(files);
    if (content) {
        assertEquals(1, files.length);
        assertTrue(files[0].exists());
        Configuration conf = CachedConfiguration.getInstance();
        DefaultConfiguration acuconf = DefaultConfiguration.getInstance();
        FileSKVIterator sample = RFileOperations.getInstance().newReaderBuilder().forFile(files[0].toString(), FileSystem.get(conf), conf).withTableConfiguration(acuconf).build().getSample(new SamplerConfigurationImpl(SAMPLER_CONFIG));
        assertNotNull(sample);
    } else {
        assertEquals(0, files.length);
    }
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) Configuration(org.apache.hadoop.conf.Configuration) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) DefaultConfiguration(org.apache.accumulo.core.conf.DefaultConfiguration) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) DefaultConfiguration(org.apache.accumulo.core.conf.DefaultConfiguration) FileFilter(java.io.FileFilter) File(java.io.File)

Example 2 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class TabletIteratorEnvironment method cloneWithSamplingEnabled.

@Override
public IteratorEnvironment cloneWithSamplingEnabled() {
    if (!scope.equals(IteratorScope.scan)) {
        throw new UnsupportedOperationException();
    }
    SamplerConfigurationImpl sci = SamplerConfigurationImpl.newSamplerConfig(config);
    if (sci == null) {
        throw new SampleNotPresentException();
    }
    TabletIteratorEnvironment te = new TabletIteratorEnvironment(scope, config, trm, files, authorizations, sci, topLevelIterators);
    return te;
}
Also used : SampleNotPresentException(org.apache.accumulo.core.client.SampleNotPresentException) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl)

Example 3 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class ScanDataSource method createIterator.

private SortedKeyValueIterator<Key, Value> createIterator() throws IOException {
    Map<FileRef, DataFileValue> files;
    SamplerConfigurationImpl samplerConfig = options.getSamplerConfigurationImpl();
    synchronized (tablet) {
        if (memIters != null)
            throw new IllegalStateException("Tried to create new scan iterator w/o releasing memory");
        if (tablet.isClosed())
            throw new TabletClosedException();
        if (interruptFlag.get())
            throw new IterationInterruptedException(tablet.getExtent().toString() + " " + interruptFlag.hashCode());
        // only acquire the file manager when we know the tablet is open
        if (fileManager == null) {
            fileManager = tablet.getTabletResources().newScanFileManager();
            tablet.addActiveScans(this);
        }
        if (fileManager.getNumOpenFiles() != 0)
            throw new IllegalStateException("Tried to create new scan iterator w/o releasing files");
        // set this before trying to get iterators in case
        // getIterators() throws an exception
        expectedDeletionCount = tablet.getDataSourceDeletions();
        memIters = tablet.getTabletMemory().getIterators(samplerConfig);
        Pair<Long, Map<FileRef, DataFileValue>> reservation = tablet.getDatafileManager().reserveFilesForScan();
        fileReservationId = reservation.getFirst();
        files = reservation.getSecond();
    }
    Collection<InterruptibleIterator> mapfiles = fileManager.openFiles(files, options.isIsolated(), samplerConfig);
    for (SortedKeyValueIterator<Key, Value> skvi : Iterables.concat(mapfiles, memIters)) ((InterruptibleIterator) skvi).setInterruptFlag(interruptFlag);
    List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(mapfiles.size() + memIters.size());
    iters.addAll(mapfiles);
    iters.addAll(memIters);
    MultiIterator multiIter = new MultiIterator(iters, tablet.getExtent());
    TabletIteratorEnvironment iterEnv = new TabletIteratorEnvironment(IteratorScope.scan, tablet.getTableConfiguration(), fileManager, files, options.getAuthorizations(), samplerConfig);
    statsIterator = new StatsIterator(multiIter, TabletServer.seekCount, tablet.getScannedCounter());
    SortedKeyValueIterator<Key, Value> visFilter = IteratorUtil.setupSystemScanIterators(statsIterator, options.getColumnSet(), options.getAuthorizations(), options.getDefaultLabels());
    if (!loadIters) {
        return visFilter;
    } else {
        List<IterInfo> iterInfos;
        Map<String, Map<String, String>> iterOpts;
        ParsedIteratorConfig pic = tablet.getTableConfiguration().getParsedIteratorConfig(IteratorScope.scan);
        if (options.getSsiList().size() == 0 && options.getSsio().size() == 0) {
            // No scan time iterator options were set, so can just use the pre-parsed table iterator options.
            iterInfos = pic.getIterInfo();
            iterOpts = pic.getOpts();
        } else {
            // Scan time iterator options were set, so need to merge those with pre-parsed table iterator options.
            iterOpts = new HashMap<>(pic.getOpts().size() + options.getSsio().size());
            iterInfos = new ArrayList<>(pic.getIterInfo().size() + options.getSsiList().size());
            IteratorUtil.mergeIteratorConfig(iterInfos, iterOpts, pic.getIterInfo(), pic.getOpts(), options.getSsiList(), options.getSsio());
        }
        String context;
        if (options.getClassLoaderContext() != null) {
            log.trace("Loading iterators for scan with scan context: {}", options.getClassLoaderContext());
            context = options.getClassLoaderContext();
        } else {
            context = pic.getContext();
            if (context != null) {
                log.trace("Loading iterators for scan with table context: {}", options.getClassLoaderContext());
            } else {
                log.trace("Loading iterators for scan");
            }
        }
        return iterEnv.getTopLevelIterator(IteratorUtil.loadIterators(visFilter, iterInfos, iterOpts, iterEnv, true, context));
    }
}
Also used : SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) ArrayList(java.util.ArrayList) InterruptibleIterator(org.apache.accumulo.core.iterators.system.InterruptibleIterator) IterInfo(org.apache.accumulo.core.data.thrift.IterInfo) FileRef(org.apache.accumulo.server.fs.FileRef) TabletIteratorEnvironment(org.apache.accumulo.tserver.TabletIteratorEnvironment) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) MultiIterator(org.apache.accumulo.core.iterators.system.MultiIterator) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) StatsIterator(org.apache.accumulo.core.iterators.system.StatsIterator) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Value(org.apache.accumulo.core.data.Value) ParsedIteratorConfig(org.apache.accumulo.server.conf.TableConfiguration.ParsedIteratorConfig) HashMap(java.util.HashMap) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key)

Example 4 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class InMemoryMapTest method testNoSampleConfig.

@Test(expected = SampleNotPresentException.class)
public void testNoSampleConfig() throws Exception {
    InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
    mutate(imm, "r", "cf:cq", 5, "b");
    SamplerConfigurationImpl sampleConfig2 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "9"));
    MemoryIterator iter = imm.skvIterator(sampleConfig2);
    iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
}
Also used : RowSampler(org.apache.accumulo.core.client.sample.RowSampler) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) MemoryIterator(org.apache.accumulo.tserver.InMemoryMap.MemoryIterator) Range(org.apache.accumulo.core.data.Range) Test(org.junit.Test)

Example 5 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class InMemoryMapTest method runInterruptSampleTest.

private void runInterruptSampleTest(boolean deepCopy, boolean delete, boolean dcAfterDelete) throws Exception {
    SamplerConfigurationImpl sampleConfig1 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "2"));
    Sampler sampler = SamplerFactory.newSampler(sampleConfig1, DefaultConfiguration.getInstance());
    ConfigurationCopy config1 = newConfig(tempFolder.newFolder().getAbsolutePath());
    for (Entry<String, String> entry : sampleConfig1.toTablePropertiesMap().entrySet()) {
        config1.set(entry.getKey(), entry.getValue());
    }
    InMemoryMap imm = new InMemoryMap(config1);
    TreeMap<Key, Value> expectedSample = new TreeMap<>();
    TreeMap<Key, Value> expectedAll = new TreeMap<>();
    for (int r = 0; r < 1000; r++) {
        String row = String.format("r%06d", r);
        mutate(imm, row, "cf1:cq1", 5, "v" + (2 * r), sampler, expectedSample, expectedAll);
        mutate(imm, row, "cf2:cq2", 5, "v" + ((2 * r) + 1), sampler, expectedSample, expectedAll);
    }
    assertTrue(expectedSample.size() > 0);
    MemoryIterator miter = imm.skvIterator(sampleConfig1);
    AtomicBoolean iFlag = new AtomicBoolean(false);
    miter.setInterruptFlag(iFlag);
    SortedKeyValueIterator<Key, Value> iter = miter;
    if (delete && !dcAfterDelete) {
        imm.delete(0);
    }
    if (deepCopy) {
        iter = iter.deepCopy(new SampleIE(sampleConfig1));
    }
    if (delete && dcAfterDelete) {
        imm.delete(0);
    }
    assertEquals(expectedSample, readAll(iter));
    iFlag.set(true);
    try {
        readAll(iter);
        Assert.fail();
    } catch (IterationInterruptedException iie) {
    }
    miter.close();
}
Also used : ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) TreeMap(java.util.TreeMap) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Sampler(org.apache.accumulo.core.client.sample.Sampler) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) Value(org.apache.accumulo.core.data.Value) MemoryIterator(org.apache.accumulo.tserver.InMemoryMap.MemoryIterator) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) Key(org.apache.accumulo.core.data.Key)

Aggregations

SamplerConfigurationImpl (org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl)24 RowSampler (org.apache.accumulo.core.client.sample.RowSampler)8 ConfigurationCopy (org.apache.accumulo.core.conf.ConfigurationCopy)7 Test (org.junit.Test)7 Key (org.apache.accumulo.core.data.Key)6 Value (org.apache.accumulo.core.data.Value)6 MemoryIterator (org.apache.accumulo.tserver.InMemoryMap.MemoryIterator)6 SamplerConfiguration (org.apache.accumulo.core.client.sample.SamplerConfiguration)5 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)5 Sampler (org.apache.accumulo.core.client.sample.Sampler)4 Range (org.apache.accumulo.core.data.Range)4 Configuration (org.apache.hadoop.conf.Configuration)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 TreeMap (java.util.TreeMap)3 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)3 MultiIterator (org.apache.accumulo.core.iterators.system.MultiIterator)3 CachedConfiguration (org.apache.accumulo.core.util.CachedConfiguration)3 File (java.io.File)2 FileFilter (java.io.FileFilter)2