use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.
the class RangeInputSplit method readFields.
@Override
public void readFields(DataInput in) throws IOException {
range.readFields(in);
tableName = in.readUTF();
tableId = in.readUTF();
int numLocs = in.readInt();
locations = new String[numLocs];
for (int i = 0; i < numLocs; ++i) locations[i] = in.readUTF();
if (in.readBoolean()) {
isolatedScan = in.readBoolean();
}
if (in.readBoolean()) {
offline = in.readBoolean();
}
if (in.readBoolean()) {
localIterators = in.readBoolean();
}
if (in.readBoolean()) {
mockInstance = in.readBoolean();
}
if (in.readBoolean()) {
int numColumns = in.readInt();
List<String> columns = new ArrayList<>(numColumns);
for (int i = 0; i < numColumns; i++) {
columns.add(in.readUTF());
}
fetchedColumns = InputConfigurator.deserializeFetchedColumns(columns);
}
if (in.readBoolean()) {
String strAuths = in.readUTF();
auths = new Authorizations(strAuths.getBytes(UTF_8));
}
if (in.readBoolean()) {
principal = in.readUTF();
}
if (in.readBoolean()) {
int ordinal = in.readInt();
this.tokenSource = TokenSource.values()[ordinal];
switch(this.tokenSource) {
case INLINE:
String tokenClass = in.readUTF();
byte[] tokenBytes = Base64.getDecoder().decode(in.readUTF());
this.token = AuthenticationTokenSerializer.deserialize(tokenClass, tokenBytes);
break;
case FILE:
this.tokenFile = in.readUTF();
break;
default:
throw new IOException("Cannot parse unknown TokenSource ordinal");
}
}
if (in.readBoolean()) {
instanceName = in.readUTF();
}
if (in.readBoolean()) {
zooKeepers = in.readUTF();
}
if (in.readBoolean()) {
int numIterators = in.readInt();
iterators = new ArrayList<>(numIterators);
for (int i = 0; i < numIterators; i++) {
iterators.add(new IteratorSetting(in));
}
}
if (in.readBoolean()) {
level = Level.toLevel(in.readInt());
}
if (in.readBoolean()) {
samplerConfig = new SamplerConfigurationImpl(in).toSamplerConfiguration();
}
}
use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.
the class RangeInputSplit method write.
@Override
public void write(DataOutput out) throws IOException {
range.write(out);
out.writeUTF(tableName);
out.writeUTF(tableId);
out.writeInt(locations.length);
for (String location : locations) out.writeUTF(location);
out.writeBoolean(null != isolatedScan);
if (null != isolatedScan) {
out.writeBoolean(isolatedScan);
}
out.writeBoolean(null != offline);
if (null != offline) {
out.writeBoolean(offline);
}
out.writeBoolean(null != localIterators);
if (null != localIterators) {
out.writeBoolean(localIterators);
}
out.writeBoolean(null != mockInstance);
if (null != mockInstance) {
out.writeBoolean(mockInstance);
}
out.writeBoolean(null != fetchedColumns);
if (null != fetchedColumns) {
String[] cols = InputConfigurator.serializeColumns(fetchedColumns);
out.writeInt(cols.length);
for (String col : cols) {
out.writeUTF(col);
}
}
out.writeBoolean(null != auths);
if (null != auths) {
out.writeUTF(auths.serialize());
}
out.writeBoolean(null != principal);
if (null != principal) {
out.writeUTF(principal);
}
out.writeBoolean(null != tokenSource);
if (null != tokenSource) {
out.writeInt(tokenSource.ordinal());
if (null != token && null != tokenFile) {
throw new IOException("Cannot use both inline AuthenticationToken and file-based AuthenticationToken");
} else if (null != token) {
out.writeUTF(token.getClass().getName());
out.writeUTF(Base64.getEncoder().encodeToString(AuthenticationTokenSerializer.serialize(token)));
} else {
out.writeUTF(tokenFile);
}
}
out.writeBoolean(null != instanceName);
if (null != instanceName) {
out.writeUTF(instanceName);
}
out.writeBoolean(null != zooKeepers);
if (null != zooKeepers) {
out.writeUTF(zooKeepers);
}
out.writeBoolean(null != iterators);
if (null != iterators) {
out.writeInt(iterators.size());
for (IteratorSetting iterator : iterators) {
iterator.write(out);
}
}
out.writeBoolean(null != level);
if (null != level) {
out.writeInt(level.toInt());
}
out.writeBoolean(null != samplerConfig);
if (null != samplerConfig) {
new SamplerConfigurationImpl(samplerConfig).write(out);
}
}
use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.
the class RFileScanner method iterator.
@Override
public Iterator<Entry<Key, Value>> iterator() {
try {
RFileSource[] sources = opts.in.getSources();
List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(sources.length);
for (int i = 0; i < sources.length; i++) {
// TODO may have been a bug with multiple files and caching in older version...
FSDataInputStream inputStream = (FSDataInputStream) sources[i].getInputStream();
readers.add(new RFile.Reader(new CachableBlockFile.Reader("source-" + i, inputStream, sources[i].getLength(), opts.in.getConf(), dataCache, indexCache, DefaultConfiguration.getInstance())));
}
if (getSamplerConfiguration() != null) {
for (int i = 0; i < readers.size(); i++) {
readers.set(i, ((Reader) readers.get(i)).getSample(new SamplerConfigurationImpl(getSamplerConfiguration())));
}
}
SortedKeyValueIterator<Key, Value> iterator;
if (opts.bounds != null) {
iterator = new MultiIterator(readers, opts.bounds);
} else {
iterator = new MultiIterator(readers, false);
}
Set<ByteSequence> families = Collections.emptySet();
if (opts.useSystemIterators) {
SortedSet<Column> cols = this.getFetchedColumns();
families = LocalityGroupUtil.families(cols);
iterator = IteratorUtil.setupSystemScanIterators(iterator, cols, getAuthorizations(), EMPTY_BYTES);
}
try {
if (opts.tableConfig != null && opts.tableConfig.size() > 0) {
ConfigurationCopy conf = new ConfigurationCopy(opts.tableConfig);
iterator = IteratorUtil.loadIterators(IteratorScope.scan, iterator, null, conf, serverSideIteratorList, serverSideIteratorOptions, new IterEnv());
} else {
iterator = IteratorUtil.loadIterators(iterator, serverSideIteratorList, serverSideIteratorOptions, new IterEnv(), false, null);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
iterator.seek(getRange() == null ? EMPTY_RANGE : getRange(), families, families.size() == 0 ? false : true);
return new IteratorAdapter(iterator);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.
the class RFileWriterBuilder method withSampler.
@Override
public WriterOptions withSampler(SamplerConfiguration samplerConf) {
Objects.requireNonNull(samplerConf);
Map<String, String> tmp = new SamplerConfigurationImpl(samplerConf).toTablePropertiesMap();
checkDisjoint(tableConfig, tmp, "sampler");
this.samplerProps = tmp;
return this;
}
use of org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl in project accumulo by apache.
the class AccumuloFileOutputFormatTest method validateConfiguration.
@Test
public void validateConfiguration() throws IOException, InterruptedException {
int a = 7;
long b = 300l;
long c = 50l;
long d = 10l;
String e = "snappy";
SamplerConfiguration samplerConfig = new SamplerConfiguration(RowSampler.class.getName());
samplerConfig.addOption("hasher", "murmur3_32");
samplerConfig.addOption("modulus", "109");
SummarizerConfiguration sc1 = SummarizerConfiguration.builder(VisibilitySummarizer.class).addOption(CountingSummarizer.MAX_COUNTERS_OPT, 2048).build();
SummarizerConfiguration sc2 = SummarizerConfiguration.builder(FamilySummarizer.class).addOption(CountingSummarizer.MAX_COUNTERS_OPT, 256).build();
Job job1 = Job.getInstance();
AccumuloFileOutputFormat.setReplication(job1, a);
AccumuloFileOutputFormat.setFileBlockSize(job1, b);
AccumuloFileOutputFormat.setDataBlockSize(job1, c);
AccumuloFileOutputFormat.setIndexBlockSize(job1, d);
AccumuloFileOutputFormat.setCompressionType(job1, e);
AccumuloFileOutputFormat.setSampler(job1, samplerConfig);
AccumuloFileOutputFormat.setSummarizers(job1, sc1, sc2);
AccumuloConfiguration acuconf = FileOutputConfigurator.getAccumuloConfiguration(AccumuloFileOutputFormat.class, job1.getConfiguration());
assertEquals(7, acuconf.getCount(Property.TABLE_FILE_REPLICATION));
assertEquals(300l, acuconf.getAsBytes(Property.TABLE_FILE_BLOCK_SIZE));
assertEquals(50l, acuconf.getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
assertEquals(10l, acuconf.getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
assertEquals("snappy", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
assertEquals(new SamplerConfigurationImpl(samplerConfig), SamplerConfigurationImpl.newSamplerConfig(acuconf));
Collection<SummarizerConfiguration> summarizerConfigs = SummarizerConfiguration.fromTableProperties(acuconf);
assertEquals(2, summarizerConfigs.size());
assertTrue(summarizerConfigs.contains(sc1));
assertTrue(summarizerConfigs.contains(sc2));
a = 17;
b = 1300l;
c = 150l;
d = 110l;
e = "lzo";
samplerConfig = new SamplerConfiguration(RowSampler.class.getName());
samplerConfig.addOption("hasher", "md5");
samplerConfig.addOption("modulus", "100003");
Job job2 = Job.getInstance();
AccumuloFileOutputFormat.setReplication(job2, a);
AccumuloFileOutputFormat.setFileBlockSize(job2, b);
AccumuloFileOutputFormat.setDataBlockSize(job2, c);
AccumuloFileOutputFormat.setIndexBlockSize(job2, d);
AccumuloFileOutputFormat.setCompressionType(job2, e);
AccumuloFileOutputFormat.setSampler(job2, samplerConfig);
acuconf = FileOutputConfigurator.getAccumuloConfiguration(AccumuloFileOutputFormat.class, job2.getConfiguration());
assertEquals(17, acuconf.getCount(Property.TABLE_FILE_REPLICATION));
assertEquals(1300l, acuconf.getAsBytes(Property.TABLE_FILE_BLOCK_SIZE));
assertEquals(150l, acuconf.getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
assertEquals(110l, acuconf.getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
assertEquals("lzo", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
assertEquals(new SamplerConfigurationImpl(samplerConfig), SamplerConfigurationImpl.newSamplerConfig(acuconf));
summarizerConfigs = SummarizerConfiguration.fromTableProperties(acuconf);
assertEquals(0, summarizerConfigs.size());
}
Aggregations