Search in sources :

Example 1 with InputTableConfig

use of org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig in project accumulo by apache.

the class InputConfigurator method getDefaultInputTableConfig.

/**
 * Returns the {@link InputTableConfig} for the configuration based on the properties set using
 * the single-table input methods.
 *
 * @param implementingClass
 *          the class whose name will be used as a prefix for the property configuration key
 * @param conf
 *          the Hadoop instance for which to retrieve the configuration
 * @param tableName
 *          the table name for which to retrieve the configuration
 * @return the config object built from the single input table properties set on the job
 * @since 1.6.0
 */
protected static Map.Entry<String, InputTableConfig> getDefaultInputTableConfig(Class<?> implementingClass, Configuration conf, String tableName) {
    if (tableName != null) {
        InputTableConfig queryConfig = new InputTableConfig();
        List<IteratorSetting> itrs = getIterators(implementingClass, conf);
        if (itrs != null)
            itrs.forEach(queryConfig::addIterator);
        Set<IteratorSetting.Column> columns = getFetchedColumns(implementingClass, conf);
        if (columns != null)
            queryConfig.fetchColumns(columns);
        List<Range> ranges = null;
        try {
            ranges = getRanges(implementingClass, conf);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        if (ranges != null)
            queryConfig.setRanges(ranges);
        SamplerConfiguration samplerConfig = getSamplerConfiguration(implementingClass, conf);
        if (samplerConfig != null) {
            queryConfig.setSamplerConfiguration(samplerConfig);
        }
        queryConfig.setAutoAdjustRanges(getAutoAdjustRanges(implementingClass, conf)).setUseIsolatedScanners(isIsolated(implementingClass, conf)).setUseLocalIterators(usesLocalIterators(implementingClass, conf)).setOfflineScan(isOfflineScan(implementingClass, conf)).setExecutionHints(getExecutionHints(implementingClass, conf));
        return Maps.immutableEntry(tableName, queryConfig);
    }
    return null;
}
Also used : InputTableConfig(org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range)

Example 2 with InputTableConfig

use of org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig in project accumulo by apache.

the class InputConfigurator method setInputTableConfigs.

/**
 * Sets configurations for multiple tables at a time.
 *
 * @param implementingClass
 *          the class whose name will be used as a prefix for the property configuration key
 * @param conf
 *          the Hadoop configuration object to configure
 * @param configs
 *          an array of {@link InputTableConfig} objects to associate with the job
 * @since 1.6.0
 */
public static void setInputTableConfigs(Class<?> implementingClass, Configuration conf, Map<String, InputTableConfig> configs) {
    MapWritable mapWritable = new MapWritable();
    for (Map.Entry<String, InputTableConfig> tableConfig : configs.entrySet()) mapWritable.put(new Text(tableConfig.getKey()), tableConfig.getValue());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    try {
        mapWritable.write(new DataOutputStream(baos));
    } catch (IOException e) {
        throw new IllegalStateException("Table configuration could not be serialized.");
    }
    String confKey = enumToConfKey(implementingClass, ScanOpts.TABLE_CONFIGS);
    conf.set(confKey, Base64.getEncoder().encodeToString(baos.toByteArray()));
}
Also used : InputTableConfig(org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig) DataOutputStream(java.io.DataOutputStream) Text(org.apache.hadoop.io.Text) MapWritable(org.apache.hadoop.io.MapWritable) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) Map(java.util.Map) HashMap(java.util.HashMap)

Example 3 with InputTableConfig

use of org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig in project accumulo by apache.

the class InputConfigurator method validatePermissions.

/**
 * Validates that the user has permissions on the requested tables
 *
 * @param implementingClass
 *          the class whose name will be used as a prefix for the property configuration key
 * @param conf
 *          the Hadoop configuration object to configure
 * @param client
 *          the Accumulo client
 * @since 1.7.0
 */
public static void validatePermissions(Class<?> implementingClass, Configuration conf, AccumuloClient client) throws IOException {
    Map<String, InputTableConfig> inputTableConfigs = getInputTableConfigs(implementingClass, conf);
    try {
        if (getInputTableConfigs(implementingClass, conf).isEmpty())
            throw new IOException("No table set.");
        Properties props = getClientProperties(implementingClass, conf);
        String principal = ClientProperty.AUTH_PRINCIPAL.getValue(props);
        for (Map.Entry<String, InputTableConfig> tableConfig : inputTableConfigs.entrySet()) {
            final String tableName = tableConfig.getKey();
            final String namespace = extractNamespace(tableName);
            final boolean hasTableRead = client.securityOperations().hasTablePermission(principal, tableName, TablePermission.READ);
            final boolean hasNamespaceRead = client.securityOperations().hasNamespacePermission(principal, namespace, NamespacePermission.READ);
            if (!hasTableRead && !hasNamespaceRead) {
                throw new IOException("Unable to access table");
            }
        }
        for (Map.Entry<String, InputTableConfig> tableConfigEntry : inputTableConfigs.entrySet()) {
            InputTableConfig tableConfig = tableConfigEntry.getValue();
            if (!tableConfig.shouldUseLocalIterators()) {
                if (tableConfig.getIterators() != null) {
                    for (IteratorSetting iter : tableConfig.getIterators()) {
                        if (!client.tableOperations().testClassLoad(tableConfigEntry.getKey(), iter.getIteratorClass(), SortedKeyValueIterator.class.getName()))
                            throw new AccumuloException("Servers are unable to load " + iter.getIteratorClass() + " as a " + SortedKeyValueIterator.class.getName());
                    }
                }
            }
        }
    } catch (AccumuloException | TableNotFoundException | AccumuloSecurityException e) {
        throw new IOException(e);
    }
}
Also used : AccumuloException(org.apache.accumulo.core.client.AccumuloException) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) IOException(java.io.IOException) Properties(java.util.Properties) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) InputTableConfig(org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Map(java.util.Map) HashMap(java.util.HashMap)

Example 4 with InputTableConfig

use of org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig in project accumulo by apache.

the class MultiTableInputFormatTest method testManyTables.

@Test
public void testManyTables() throws Exception {
    JobConf job = new JobConf();
    Properties clientProps = org.apache.accumulo.hadoop.mapreduce.AccumuloInputFormatTest.setupClientProperties();
    // if auths are not set client will try to get from server, we dont want that here
    Authorizations auths = Authorizations.EMPTY;
    // set the client properties once then loop over tables
    InputFormatBuilder.TableParams<JobConf> opts = AccumuloInputFormat.configure().clientProperties(clientProps);
    for (int i = 0; i < 10_000; i++) {
        List<Range> ranges = singletonList(new Range("a" + i, "b" + i));
        Set<Column> cols = singleton(new Column(new Text("CF" + i), new Text("CQ" + i)));
        IteratorSetting iter = new IteratorSetting(50, "iter" + i, "iterclass" + i);
        opts.table("table" + i).auths(auths).ranges(ranges).fetchColumns(cols).addIterator(iter);
    }
    opts.store(job);
    // verify
    Map<String, InputTableConfig> configs = InputConfigurator.getInputTableConfigs(CLASS, job);
    assertEquals(10_000, configs.size());
    // create objects to test against
    for (int i = 0; i < 10_000; i++) {
        InputTableConfig t = new InputTableConfig();
        List<Range> ranges = singletonList(new Range("a" + i, "b" + i));
        Set<Column> cols = singleton(new Column(new Text("CF" + i), new Text("CQ" + i)));
        IteratorSetting iter = new IteratorSetting(50, "iter" + i, "iterclass" + i);
        t.setScanAuths(auths).setRanges(ranges).fetchColumns(cols).addIterator(iter);
        assertEquals(t, configs.get("table" + i));
    }
}
Also used : Authorizations(org.apache.accumulo.core.security.Authorizations) InputFormatBuilder(org.apache.accumulo.hadoop.mapreduce.InputFormatBuilder) Text(org.apache.hadoop.io.Text) Properties(java.util.Properties) Range(org.apache.accumulo.core.data.Range) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) InputTableConfig(org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig) Column(org.apache.accumulo.core.client.IteratorSetting.Column) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 5 with InputTableConfig

use of org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig in project accumulo by apache.

the class MultiTableInputFormatTest method testStoreTables.

/**
 * Verify {@link InputTableConfig} objects get correctly serialized in the JobContext.
 */
@Test
public void testStoreTables() throws Exception {
    String table1Name = testName.getMethodName() + "1";
    String table2Name = testName.getMethodName() + "2";
    Job job = Job.getInstance();
    Properties clientProps = org.apache.accumulo.hadoop.mapreduce.AccumuloInputFormatTest.setupClientProperties();
    List<Range> ranges = singletonList(new Range("a", "b"));
    Set<IteratorSetting.Column> cols = singleton(new IteratorSetting.Column(new Text("CF1"), new Text("CQ1")));
    IteratorSetting iter1 = new IteratorSetting(50, "iter1", "iterclass1");
    IteratorSetting iter2 = new IteratorSetting(60, "iter2", "iterclass2");
    List<IteratorSetting> allIters = new ArrayList<>();
    allIters.add(iter1);
    allIters.add(iter2);
    // if auths are not set client will try to get from server, we dont want that here
    Authorizations auths = Authorizations.EMPTY;
    // @formatter:off
    AccumuloInputFormat.configure().clientProperties(clientProps).table(table1Name).auths(auths).ranges(ranges).fetchColumns(cols).addIterator(iter1).addIterator(iter2).localIterators(true).offlineScan(// end table 1
    true).table(table2Name).auths(auths).ranges(ranges).fetchColumns(cols).addIterator(// end
    iter2).store(job);
    // @formatter:on
    InputTableConfig table1 = new InputTableConfig();
    table1.setScanAuths(auths).setRanges(ranges).fetchColumns(cols).setUseLocalIterators(true).setOfflineScan(true);
    allIters.forEach(table1::addIterator);
    InputTableConfig table2 = new InputTableConfig();
    table2.setScanAuths(auths).setRanges(ranges).fetchColumns(cols).addIterator(iter2);
    Configuration jc = job.getConfiguration();
    assertEquals(table1, InputConfigurator.getInputTableConfig(CLASS, jc, table1Name));
    assertEquals(table2, InputConfigurator.getInputTableConfig(CLASS, jc, table2Name));
}
Also used : Authorizations(org.apache.accumulo.core.security.Authorizations) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) Properties(java.util.Properties) Range(org.apache.accumulo.core.data.Range) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) InputTableConfig(org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig) Column(org.apache.accumulo.core.client.IteratorSetting.Column) Column(org.apache.accumulo.core.client.IteratorSetting.Column) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Aggregations

InputTableConfig (org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig)10 IOException (java.io.IOException)6 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)6 Range (org.apache.accumulo.core.data.Range)6 Text (org.apache.hadoop.io.Text)6 Properties (java.util.Properties)5 Authorizations (org.apache.accumulo.core.security.Authorizations)5 HashMap (java.util.HashMap)4 Map (java.util.Map)4 Column (org.apache.accumulo.core.client.IteratorSetting.Column)4 Test (org.junit.Test)4 ArrayList (java.util.ArrayList)3 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)3 AccumuloException (org.apache.accumulo.core.client.AccumuloException)2 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)2 SamplerConfiguration (org.apache.accumulo.core.client.sample.SamplerConfiguration)2 MapWritable (org.apache.hadoop.io.MapWritable)2 JobConf (org.apache.hadoop.mapred.JobConf)2 Job (org.apache.hadoop.mapreduce.Job)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1