Search in sources :

Example 16 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class RangeInputSplit method readFields.

@Override
public void readFields(DataInput in) throws IOException {
    range.readFields(in);
    tableName = in.readUTF();
    tableId = in.readUTF();
    int numLocs = in.readInt();
    locations = new String[numLocs];
    for (int i = 0; i < numLocs; ++i) locations[i] = in.readUTF();
    if (in.readBoolean()) {
        isolatedScan = in.readBoolean();
    }
    if (in.readBoolean()) {
        offline = in.readBoolean();
    }
    if (in.readBoolean()) {
        localIterators = in.readBoolean();
    }
    if (in.readBoolean()) {
        mockInstance = in.readBoolean();
    }
    if (in.readBoolean()) {
        int numColumns = in.readInt();
        List<String> columns = new ArrayList<>(numColumns);
        for (int i = 0; i < numColumns; i++) {
            columns.add(in.readUTF());
        }
        fetchedColumns = InputConfigurator.deserializeFetchedColumns(columns);
    }
    if (in.readBoolean()) {
        String strAuths = in.readUTF();
        auths = new Authorizations(strAuths.getBytes(UTF_8));
    }
    if (in.readBoolean()) {
        principal = in.readUTF();
    }
    if (in.readBoolean()) {
        int ordinal = in.readInt();
        this.tokenSource = TokenSource.values()[ordinal];
        switch(this.tokenSource) {
            case INLINE:
                String tokenClass = in.readUTF();
                byte[] tokenBytes = Base64.getDecoder().decode(in.readUTF());
                this.token = AuthenticationTokenSerializer.deserialize(tokenClass, tokenBytes);
                break;
            case FILE:
                this.tokenFile = in.readUTF();
                break;
            default:
                throw new IOException("Cannot parse unknown TokenSource ordinal");
        }
    }
    if (in.readBoolean()) {
        instanceName = in.readUTF();
    }
    if (in.readBoolean()) {
        zooKeepers = in.readUTF();
    }
    if (in.readBoolean()) {
        int numIterators = in.readInt();
        iterators = new ArrayList<>(numIterators);
        for (int i = 0; i < numIterators; i++) {
            iterators.add(new IteratorSetting(in));
        }
    }
    if (in.readBoolean()) {
        level = Level.toLevel(in.readInt());
    }
    if (in.readBoolean()) {
        samplerConfig = new SamplerConfigurationImpl(in).toSamplerConfiguration();
    }
}
Also used : Authorizations(org.apache.accumulo.core.security.Authorizations) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Example 17 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class RangeInputSplit method write.

@Override
public void write(DataOutput out) throws IOException {
    range.write(out);
    out.writeUTF(tableName);
    out.writeUTF(tableId);
    out.writeInt(locations.length);
    for (String location : locations) out.writeUTF(location);
    out.writeBoolean(null != isolatedScan);
    if (null != isolatedScan) {
        out.writeBoolean(isolatedScan);
    }
    out.writeBoolean(null != offline);
    if (null != offline) {
        out.writeBoolean(offline);
    }
    out.writeBoolean(null != localIterators);
    if (null != localIterators) {
        out.writeBoolean(localIterators);
    }
    out.writeBoolean(null != mockInstance);
    if (null != mockInstance) {
        out.writeBoolean(mockInstance);
    }
    out.writeBoolean(null != fetchedColumns);
    if (null != fetchedColumns) {
        String[] cols = InputConfigurator.serializeColumns(fetchedColumns);
        out.writeInt(cols.length);
        for (String col : cols) {
            out.writeUTF(col);
        }
    }
    out.writeBoolean(null != auths);
    if (null != auths) {
        out.writeUTF(auths.serialize());
    }
    out.writeBoolean(null != principal);
    if (null != principal) {
        out.writeUTF(principal);
    }
    out.writeBoolean(null != tokenSource);
    if (null != tokenSource) {
        out.writeInt(tokenSource.ordinal());
        if (null != token && null != tokenFile) {
            throw new IOException("Cannot use both inline AuthenticationToken and file-based AuthenticationToken");
        } else if (null != token) {
            out.writeUTF(token.getClass().getName());
            out.writeUTF(Base64.getEncoder().encodeToString(AuthenticationTokenSerializer.serialize(token)));
        } else {
            out.writeUTF(tokenFile);
        }
    }
    out.writeBoolean(null != instanceName);
    if (null != instanceName) {
        out.writeUTF(instanceName);
    }
    out.writeBoolean(null != zooKeepers);
    if (null != zooKeepers) {
        out.writeUTF(zooKeepers);
    }
    out.writeBoolean(null != iterators);
    if (null != iterators) {
        out.writeInt(iterators.size());
        for (IteratorSetting iterator : iterators) {
            iterator.write(out);
        }
    }
    out.writeBoolean(null != level);
    if (null != level) {
        out.writeInt(level.toInt());
    }
    out.writeBoolean(null != samplerConfig);
    if (null != samplerConfig) {
        new SamplerConfigurationImpl(samplerConfig).write(out);
    }
}
Also used : IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) IOException(java.io.IOException)

Example 18 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class RFileScanner method iterator.

@Override
public Iterator<Entry<Key, Value>> iterator() {
    try {
        RFileSource[] sources = opts.in.getSources();
        List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(sources.length);
        for (int i = 0; i < sources.length; i++) {
            // TODO may have been a bug with multiple files and caching in older version...
            FSDataInputStream inputStream = (FSDataInputStream) sources[i].getInputStream();
            readers.add(new RFile.Reader(new CachableBlockFile.Reader("source-" + i, inputStream, sources[i].getLength(), opts.in.getConf(), dataCache, indexCache, DefaultConfiguration.getInstance())));
        }
        if (getSamplerConfiguration() != null) {
            for (int i = 0; i < readers.size(); i++) {
                readers.set(i, ((Reader) readers.get(i)).getSample(new SamplerConfigurationImpl(getSamplerConfiguration())));
            }
        }
        SortedKeyValueIterator<Key, Value> iterator;
        if (opts.bounds != null) {
            iterator = new MultiIterator(readers, opts.bounds);
        } else {
            iterator = new MultiIterator(readers, false);
        }
        Set<ByteSequence> families = Collections.emptySet();
        if (opts.useSystemIterators) {
            SortedSet<Column> cols = this.getFetchedColumns();
            families = LocalityGroupUtil.families(cols);
            iterator = IteratorUtil.setupSystemScanIterators(iterator, cols, getAuthorizations(), EMPTY_BYTES);
        }
        try {
            if (opts.tableConfig != null && opts.tableConfig.size() > 0) {
                ConfigurationCopy conf = new ConfigurationCopy(opts.tableConfig);
                iterator = IteratorUtil.loadIterators(IteratorScope.scan, iterator, null, conf, serverSideIteratorList, serverSideIteratorOptions, new IterEnv());
            } else {
                iterator = IteratorUtil.loadIterators(iterator, serverSideIteratorList, serverSideIteratorOptions, new IterEnv(), false, null);
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        iterator.seek(getRange() == null ? EMPTY_RANGE : getRange(), families, families.size() == 0 ? false : true);
        return new IteratorAdapter(iterator);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : MultiIterator(org.apache.accumulo.core.iterators.system.MultiIterator) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) IteratorAdapter(org.apache.accumulo.core.iterators.IteratorAdapter) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) Reader(org.apache.accumulo.core.file.rfile.RFile.Reader) RFile(org.apache.accumulo.core.file.rfile.RFile) IOException(java.io.IOException) Reader(org.apache.accumulo.core.file.rfile.RFile.Reader) Column(org.apache.accumulo.core.data.Column) Value(org.apache.accumulo.core.data.Value) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Key(org.apache.accumulo.core.data.Key) ByteSequence(org.apache.accumulo.core.data.ByteSequence)

Example 19 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class RFileWriterBuilder method withSampler.

@Override
public WriterOptions withSampler(SamplerConfiguration samplerConf) {
    Objects.requireNonNull(samplerConf);
    Map<String, String> tmp = new SamplerConfigurationImpl(samplerConf).toTablePropertiesMap();
    checkDisjoint(tableConfig, tmp, "sampler");
    this.samplerProps = tmp;
    return this;
}
Also used : SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl)

Example 20 with SamplerConfigurationImpl

use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.

the class AccumuloFileOutputFormatTest method validateConfiguration.

@Test
public void validateConfiguration() throws IOException, InterruptedException {
    int a = 7;
    long b = 300l;
    long c = 50l;
    long d = 10l;
    String e = "snappy";
    SamplerConfiguration samplerConfig = new SamplerConfiguration(RowSampler.class.getName());
    samplerConfig.addOption("hasher", "murmur3_32");
    samplerConfig.addOption("modulus", "109");
    SummarizerConfiguration sc1 = SummarizerConfiguration.builder(VisibilitySummarizer.class).addOption(CountingSummarizer.MAX_COUNTERS_OPT, 2048).build();
    SummarizerConfiguration sc2 = SummarizerConfiguration.builder(FamilySummarizer.class).addOption(CountingSummarizer.MAX_COUNTERS_OPT, 256).build();
    Job job1 = Job.getInstance();
    AccumuloFileOutputFormat.setReplication(job1, a);
    AccumuloFileOutputFormat.setFileBlockSize(job1, b);
    AccumuloFileOutputFormat.setDataBlockSize(job1, c);
    AccumuloFileOutputFormat.setIndexBlockSize(job1, d);
    AccumuloFileOutputFormat.setCompressionType(job1, e);
    AccumuloFileOutputFormat.setSampler(job1, samplerConfig);
    AccumuloFileOutputFormat.setSummarizers(job1, sc1, sc2);
    AccumuloConfiguration acuconf = FileOutputConfigurator.getAccumuloConfiguration(AccumuloFileOutputFormat.class, job1.getConfiguration());
    assertEquals(7, acuconf.getCount(Property.TABLE_FILE_REPLICATION));
    assertEquals(300l, acuconf.getAsBytes(Property.TABLE_FILE_BLOCK_SIZE));
    assertEquals(50l, acuconf.getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
    assertEquals(10l, acuconf.getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
    assertEquals("snappy", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
    assertEquals(new SamplerConfigurationImpl(samplerConfig), SamplerConfigurationImpl.newSamplerConfig(acuconf));
    Collection<SummarizerConfiguration> summarizerConfigs = SummarizerConfiguration.fromTableProperties(acuconf);
    assertEquals(2, summarizerConfigs.size());
    assertTrue(summarizerConfigs.contains(sc1));
    assertTrue(summarizerConfigs.contains(sc2));
    a = 17;
    b = 1300l;
    c = 150l;
    d = 110l;
    e = "lzo";
    samplerConfig = new SamplerConfiguration(RowSampler.class.getName());
    samplerConfig.addOption("hasher", "md5");
    samplerConfig.addOption("modulus", "100003");
    Job job2 = Job.getInstance();
    AccumuloFileOutputFormat.setReplication(job2, a);
    AccumuloFileOutputFormat.setFileBlockSize(job2, b);
    AccumuloFileOutputFormat.setDataBlockSize(job2, c);
    AccumuloFileOutputFormat.setIndexBlockSize(job2, d);
    AccumuloFileOutputFormat.setCompressionType(job2, e);
    AccumuloFileOutputFormat.setSampler(job2, samplerConfig);
    acuconf = FileOutputConfigurator.getAccumuloConfiguration(AccumuloFileOutputFormat.class, job2.getConfiguration());
    assertEquals(17, acuconf.getCount(Property.TABLE_FILE_REPLICATION));
    assertEquals(1300l, acuconf.getAsBytes(Property.TABLE_FILE_BLOCK_SIZE));
    assertEquals(150l, acuconf.getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
    assertEquals(110l, acuconf.getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
    assertEquals("lzo", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
    assertEquals(new SamplerConfigurationImpl(samplerConfig), SamplerConfigurationImpl.newSamplerConfig(acuconf));
    summarizerConfigs = SummarizerConfiguration.fromTableProperties(acuconf);
    assertEquals(0, summarizerConfigs.size());
}
Also used : RowSampler(org.apache.accumulo.core.client.sample.RowSampler) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) Job(org.apache.hadoop.mapreduce.Job) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Test(org.junit.Test)

Aggregations

SamplerConfigurationImpl (org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl)24 RowSampler (org.apache.accumulo.core.client.sample.RowSampler)8 ConfigurationCopy (org.apache.accumulo.core.conf.ConfigurationCopy)7 Test (org.junit.Test)7 Key (org.apache.accumulo.core.data.Key)6 Value (org.apache.accumulo.core.data.Value)6 MemoryIterator (org.apache.accumulo.tserver.InMemoryMap.MemoryIterator)6 SamplerConfiguration (org.apache.accumulo.core.client.sample.SamplerConfiguration)5 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)5 Sampler (org.apache.accumulo.core.client.sample.Sampler)4 Range (org.apache.accumulo.core.data.Range)4 Configuration (org.apache.hadoop.conf.Configuration)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 TreeMap (java.util.TreeMap)3 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)3 MultiIterator (org.apache.accumulo.core.iterators.system.MultiIterator)3 CachedConfiguration (org.apache.accumulo.core.util.CachedConfiguration)3 File (java.io.File)2 FileFilter (java.io.FileFilter)2