use of uk.gov.gchq.gaffer.sparkaccumulo.operation.rfilereaderrdd.RFileReaderRDD in project Gaffer by gchq.
the class GetRDDOfAllElementsHandler method doOperationUsingRFileReaderRDD.
private RDD<Element> doOperationUsingRFileReaderRDD(final GetRDDOfAllElements operation, final Context context, final AccumuloStore accumuloStore) throws OperationException {
final Configuration conf = getConfiguration(operation);
// Need to add validation iterator manually (it's not added by the addIterators method as normally the iterator
// is present on the table and therefore applied to all scans - here we're bypassing the normal table access
// method so it needs to be applied manually)
addValidationIterator(accumuloStore, conf);
// Need to add aggregation iterator manually for the same reasons as above
try {
addAggregationIterator(accumuloStore, conf);
} catch (final IteratorSettingException e) {
throw new OperationException("IteratorSettingException adding aggregation iterator", e);
}
// Add other iterators
addIterators(accumuloStore, conf, context.getUser(), operation);
try {
// Add view to conf so that any transformations can be applied
conf.set(AbstractGetRDDHandler.VIEW, new String(operation.getView().toCompactJson(), CommonConstants.UTF_8));
final byte[] serialisedConf = Utils.serialiseConfiguration(conf);
final RDD<Map.Entry<Key, Value>> rdd = new RFileReaderRDD(SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext(), accumuloStore.getProperties().getInstance(), accumuloStore.getProperties().getZookeepers(), accumuloStore.getProperties().getUser(), accumuloStore.getProperties().getPassword(), accumuloStore.getTableName(), context.getUser().getDataAuths(), serialisedConf);
return rdd.mapPartitions(new EntryIteratorToElementIterator(serialisedConf), true, ELEMENT_CLASS_TAG);
} catch (final IOException e) {
throw new OperationException("IOException serialising configuration", e);
}
}
use of uk.gov.gchq.gaffer.sparkaccumulo.operation.rfilereaderrdd.RFileReaderRDD in project Gaffer by gchq.
the class RFileReaderRddIT method throwRTX_whenRDDHasUserWithoutPermission.
@Test
public void throwRTX_whenRDDHasUserWithoutPermission() throws IOException, InterruptedException, AccumuloSecurityException, AccumuloException, TableNotFoundException, TableExistsException {
// Given
final MiniAccumuloCluster cluster = createAccumuloCluster(tableName, config, Arrays.asList("Bananas"));
final RFileReaderRDD rdd = new RFileReaderRDD(sparkSession.sparkContext(), cluster.getInstanceName(), cluster.getZooKeepers(), MiniAccumuloClusterProvider.USER_NO_GRANTED_PERMISSION, MiniAccumuloClusterProvider.PASSWORD, tableName, new HashSet<>(), serialiseConfiguration(config));
// When / Then
assertThatExceptionOfType(RuntimeException.class).isThrownBy(rdd::getPartitions).withMessage("User user2 does not have access to table " + tableName);
}
use of uk.gov.gchq.gaffer.sparkaccumulo.operation.rfilereaderrdd.RFileReaderRDD in project Gaffer by gchq.
the class RFileReaderRddIT method testRFileReaderRDDAppliesIteratorCorrectly.
@Test
public void testRFileReaderRDDAppliesIteratorCorrectly() throws IOException, InterruptedException, AccumuloSecurityException, AccumuloException, TableNotFoundException, TableExistsException {
// Given
final List<String> data = Arrays.asList("no", "not", "value");
final Job job = Job.getInstance(config);
final MiniAccumuloCluster cluster = createAccumuloCluster(tableName, job.getConfiguration(), data);
// Create an iterator and an option to grep for "val"
final Map<String, String> options = new HashMap<>();
options.put("term", "val");
AccumuloInputFormat.addIterator(job, new IteratorSetting(2, "NAME", GrepIterator.class.getName(), options));
// When
final RFileReaderRDD rdd = new RFileReaderRDD(sparkSession.sparkContext(), cluster.getInstanceName(), cluster.getZooKeepers(), MiniAccumuloClusterProvider.USER, MiniAccumuloClusterProvider.PASSWORD, tableName, new HashSet<>(), serialiseConfiguration(job.getConfiguration()));
// Then
assertThat(rdd.count()).isEqualTo(1L);
}
use of uk.gov.gchq.gaffer.sparkaccumulo.operation.rfilereaderrdd.RFileReaderRDD in project Gaffer by gchq.
the class RFileReaderRddIT method testRFileReaderRDDCanBeCreatedWith2TableInputs.
@Test
public void testRFileReaderRDDCanBeCreatedWith2TableInputs() throws IOException, InterruptedException, AccumuloSecurityException, AccumuloException, TableNotFoundException, TableExistsException {
// Given
final List<String> dataInput = Arrays.asList("apples", "bananas");
final MiniAccumuloCluster cluster = createAccumuloCluster(tableName, config, dataInput);
// When
final RFileReaderRDD rdd = new RFileReaderRDD(sparkSession.sparkContext(), cluster.getInstanceName(), cluster.getZooKeepers(), MiniAccumuloClusterProvider.USER, MiniAccumuloClusterProvider.PASSWORD, tableName, new HashSet<>(), serialiseConfiguration(config));
// Then
assertThat(rdd.count()).isEqualTo(dataInput.size());
assertThat(rdd.getPartitions()).hasSize(1);
}
use of uk.gov.gchq.gaffer.sparkaccumulo.operation.rfilereaderrdd.RFileReaderRDD in project Gaffer by gchq.
the class RFileReaderRddIT method throwRTX_whenGetPartitionsForFileReaderWithInvalidTableName.
public void throwRTX_whenGetPartitionsForFileReaderWithInvalidTableName() throws IOException, InterruptedException, AccumuloSecurityException, AccumuloException, TableNotFoundException, TableExistsException {
// Given
final MiniAccumuloCluster cluster = createAccumuloCluster(tableName, config, Arrays.asList("Bananas"));
final RFileReaderRDD rdd = new RFileReaderRDD(sparkSession.sparkContext(), cluster.getInstanceName(), cluster.getZooKeepers(), MiniAccumuloClusterProvider.USER, MiniAccumuloClusterProvider.PASSWORD, "Invalid Table Name", new HashSet<>(), serialiseConfiguration(config));
// When / Then
assertThatExceptionOfType(RuntimeException.class).isThrownBy(rdd::getPartitions).withMessage("User user does not have access to table Invalid Table Name");
}
Aggregations