Search in sources :

Example 21 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class TabletIteratorEnvironment method getSamplerConfiguration.

@Override
public SamplerConfiguration getSamplerConfiguration() {
    if (samplerConfig == null) {
        // only create this once so that it stays the same, even if config changes
        SamplerConfigurationImpl sci = SamplerConfigurationImpl.newSamplerConfig(config);
        if (sci == null) {
            return null;
        }
        samplerConfig = sci.toSamplerConfiguration();
    }
    return samplerConfig;
}
Also used : SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl)

Example 22 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class AccumuloFileOutputFormatIT method handleWriteTests.

private void handleWriteTests(boolean content) throws Exception {
    File f = folder.newFile(testName.getMethodName());
    if (f.delete()) {
        log.debug("Deleted {}", f);
    }
    MRTester.main(new String[] { content ? TEST_TABLE : EMPTY_TABLE, f.getAbsolutePath() });
    assertTrue(f.exists());
    File[] files = f.listFiles(new FileFilter() {

        @Override
        public boolean accept(File file) {
            return file.getName().startsWith("part-m-");
        }
    });
    assertNotNull(files);
    if (content) {
        assertEquals(1, files.length);
        assertTrue(files[0].exists());
        Configuration conf = CachedConfiguration.getInstance();
        DefaultConfiguration acuconf = DefaultConfiguration.getInstance();
        FileSKVIterator sample = RFileOperations.getInstance().newReaderBuilder().forFile(files[0].toString(), FileSystem.get(conf), conf).withTableConfiguration(acuconf).build().getSample(new SamplerConfigurationImpl(SAMPLER_CONFIG));
        assertNotNull(sample);
    } else {
        assertEquals(0, files.length);
    }
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) Configuration(org.apache.hadoop.conf.Configuration) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) DefaultConfiguration(org.apache.accumulo.core.conf.DefaultConfiguration) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) DefaultConfiguration(org.apache.accumulo.core.conf.DefaultConfiguration) FileFilter(java.io.FileFilter) File(java.io.File)

Example 23 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class RFileOperations method openWriter.

@Override
protected FileSKVWriter openWriter(OpenWriterOperation options) throws IOException {
    AccumuloConfiguration acuconf = options.getTableConfiguration();
    long blockSize = acuconf.getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE);
    Preconditions.checkArgument((blockSize < Integer.MAX_VALUE && blockSize > 0), "table.file.compress.blocksize must be greater than 0 and less than " + Integer.MAX_VALUE);
    long indexBlockSize = acuconf.getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX);
    Preconditions.checkArgument((indexBlockSize < Integer.MAX_VALUE && indexBlockSize > 0), "table.file.compress.blocksize.index must be greater than 0 and less than " + Integer.MAX_VALUE);
    SamplerConfigurationImpl samplerConfig = SamplerConfigurationImpl.newSamplerConfig(acuconf);
    Sampler sampler = null;
    if (samplerConfig != null) {
        sampler = SamplerFactory.newSampler(samplerConfig, acuconf, options.isAccumuloStartEnabled());
    }
    String compression = options.getCompression();
    compression = compression == null ? options.getTableConfiguration().get(Property.TABLE_FILE_COMPRESSION_TYPE) : compression;
    FSDataOutputStream outputStream = options.getOutputStream();
    Configuration conf = options.getConfiguration();
    if (outputStream == null) {
        int hrep = conf.getInt("dfs.replication", -1);
        int trep = acuconf.getCount(Property.TABLE_FILE_REPLICATION);
        int rep = hrep;
        if (trep > 0 && trep != hrep) {
            rep = trep;
        }
        long hblock = conf.getLong("dfs.block.size", 1 << 26);
        long tblock = acuconf.getAsBytes(Property.TABLE_FILE_BLOCK_SIZE);
        long block = hblock;
        if (tblock > 0)
            block = tblock;
        int bufferSize = conf.getInt("io.file.buffer.size", 4096);
        String file = options.getFilename();
        FileSystem fs = options.getFileSystem();
        outputStream = fs.create(new Path(file), false, bufferSize, (short) rep, block);
    }
    CachableBlockFile.Writer _cbw = new CachableBlockFile.Writer(new RateLimitedOutputStream(outputStream, options.getRateLimiter()), compression, conf, acuconf);
    RFile.Writer writer = new RFile.Writer(_cbw, (int) blockSize, (int) indexBlockSize, samplerConfig, sampler);
    return writer;
}
Also used : Path(org.apache.hadoop.fs.Path) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Configuration(org.apache.hadoop.conf.Configuration) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) RateLimitedOutputStream(org.apache.accumulo.core.file.streams.RateLimitedOutputStream) Sampler(org.apache.accumulo.core.client.sample.Sampler) FileSystem(org.apache.hadoop.fs.FileSystem) CachableBlockFile(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration)

Example 24 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class NewTableConfiguration method enableSampling.

/**
 * Enable building a sample data set on the new table using the given sampler configuration.
 *
 * @since 1.8.0
 */
public NewTableConfiguration enableSampling(SamplerConfiguration samplerConfiguration) {
    requireNonNull(samplerConfiguration);
    Map<String, String> tmp = new SamplerConfigurationImpl(samplerConfiguration).toTablePropertiesMap();
    checkDisjoint(properties, tmp, "sampler");
    this.samplerProps = tmp;
    return this;
}
Also used : SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl)

Aggregations

SamplerConfigurationImpl (org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl)24 RowSampler (org.apache.accumulo.core.client.sample.RowSampler)8 ConfigurationCopy (org.apache.accumulo.core.conf.ConfigurationCopy)7 Test (org.junit.Test)7 Key (org.apache.accumulo.core.data.Key)6 Value (org.apache.accumulo.core.data.Value)6 MemoryIterator (org.apache.accumulo.tserver.InMemoryMap.MemoryIterator)6 SamplerConfiguration (org.apache.accumulo.core.client.sample.SamplerConfiguration)5 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)5 Sampler (org.apache.accumulo.core.client.sample.Sampler)4 Range (org.apache.accumulo.core.data.Range)4 Configuration (org.apache.hadoop.conf.Configuration)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 TreeMap (java.util.TreeMap)3 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)3 MultiIterator (org.apache.accumulo.core.iterators.system.MultiIterator)3 CachedConfiguration (org.apache.accumulo.core.util.CachedConfiguration)3 File (java.io.File)2 FileFilter (java.io.FileFilter)2