Search in sources :

Example 6 with Partition

use of org.apache.spark.Partition in project Gaffer by gchq.

the class RFileReaderIterator method init.

private void init() throws IOException {
    final AccumuloTablet accumuloTablet = (AccumuloTablet) partition;
    LOGGER.info("Initialising RFileReaderIterator for files {}", StringUtils.join(accumuloTablet.getFiles(), ','));
    final AccumuloConfiguration accumuloConfiguration = SiteConfiguration.getInstance();
    // Required column families according to the configuration
    final Set<ByteSequence> requiredColumnFamilies = InputConfigurator.getFetchedColumns(AccumuloInputFormat.class, configuration).stream().map(Pair::getFirst).map(c -> new ArrayByteSequence(c.toString())).collect(Collectors.toSet());
    LOGGER.info("RFileReaderIterator will read column families of {}", StringUtils.join(requiredColumnFamilies, ','));
    // Column families
    final List<SortedKeyValueIterator<Key, Value>> iterators = new ArrayList<>();
    for (final String filename : accumuloTablet.getFiles()) {
        final Path path = new Path(filename);
        final FileSystem fs = path.getFileSystem(configuration);
        final RFile.Reader rFileReader = new RFile.Reader(new CachableBlockFile.Reader(fs, path, configuration, null, null, accumuloConfiguration));
        iterators.add(rFileReader);
    }
    mergedIterator = new MultiIterator(iterators, true);
    // Apply visibility filtering iterator
    if (null != auths) {
        final Authorizations authorizations = new Authorizations(auths.toArray(new String[auths.size()]));
        final SortedKeyValueIterator<Key, Value> visibilityFilter = VisibilityFilter.wrap(mergedIterator, authorizations, new byte[] {});
        final IteratorSetting visibilityIteratorSetting = new IteratorSetting(1, "auth", VisibilityFilter.class);
        visibilityFilter.init(mergedIterator, visibilityIteratorSetting.getOptions(), null);
        iteratorAfterIterators = visibilityFilter;
        LOGGER.info("Set authorizations to {}", authorizations);
    } else {
        iteratorAfterIterators = mergedIterator;
    }
    // Apply iterator stack
    final List<IteratorSetting> iteratorSettings = getIteratorSettings();
    iteratorSettings.sort(Comparator.comparingInt(IteratorSetting::getPriority));
    for (final IteratorSetting is : iteratorSettings) {
        iteratorAfterIterators = applyIterator(iteratorAfterIterators, is);
    }
    taskContext.addTaskCompletionListener(context -> close());
    final Range range = new Range(accumuloTablet.getStartRow(), true, accumuloTablet.getEndRow(), false);
    iteratorAfterIterators.seek(range, requiredColumnFamilies, true);
    LOGGER.info("Initialised iterator");
}
Also used : ByteSequence(org.apache.accumulo.core.data.ByteSequence) Partition(org.apache.spark.Partition) CachableBlockFile(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile) FileSystem(org.apache.hadoop.fs.FileSystem) LoggerFactory(org.slf4j.LoggerFactory) ArrayByteSequence(org.apache.accumulo.core.data.ArrayByteSequence) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) Key(org.apache.accumulo.core.data.Key) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Value(org.apache.accumulo.core.data.Value) InputConfigurator(org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator) IteratorUtil(org.apache.accumulo.core.iterators.IteratorUtil) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) MultiIterator(org.apache.accumulo.core.iterators.system.MultiIterator) Logger(org.slf4j.Logger) AccumuloInputFormat(org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat) TaskContext(org.apache.spark.TaskContext) Set(java.util.Set) IOException(java.io.IOException) Authorizations(org.apache.accumulo.core.security.Authorizations) Collectors(java.util.stream.Collectors) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Range(org.apache.accumulo.core.data.Range) SiteConfiguration(org.apache.accumulo.core.conf.SiteConfiguration) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) VisibilityFilter(org.apache.accumulo.core.iterators.system.VisibilityFilter) AbstractMap(java.util.AbstractMap) List(java.util.List) RFile(org.apache.accumulo.core.file.rfile.RFile) Pair(org.apache.accumulo.core.util.Pair) Comparator(java.util.Comparator) IteratorEnvironment(org.apache.accumulo.core.iterators.IteratorEnvironment) ArrayList(java.util.ArrayList) RFile(org.apache.accumulo.core.file.rfile.RFile) FileSystem(org.apache.hadoop.fs.FileSystem) CachableBlockFile(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Pair(org.apache.accumulo.core.util.Pair) Path(org.apache.hadoop.fs.Path) Authorizations(org.apache.accumulo.core.security.Authorizations) MultiIterator(org.apache.accumulo.core.iterators.system.MultiIterator) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) Range(org.apache.accumulo.core.data.Range) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) Value(org.apache.accumulo.core.data.Value) ArrayByteSequence(org.apache.accumulo.core.data.ArrayByteSequence) ByteSequence(org.apache.accumulo.core.data.ByteSequence) ArrayByteSequence(org.apache.accumulo.core.data.ArrayByteSequence) Key(org.apache.accumulo.core.data.Key)

Aggregations

Partition (org.apache.spark.Partition)6 Configuration (org.apache.hadoop.conf.Configuration)4 Test (org.junit.jupiter.api.Test)3 HashSet (java.util.HashSet)2 TaskContext (org.apache.spark.TaskContext)2 IOException (java.io.IOException)1 AbstractMap (java.util.AbstractMap)1 ArrayList (java.util.ArrayList)1 Comparator (java.util.Comparator)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Collectors (java.util.stream.Collectors)1 AccumuloException (org.apache.accumulo.core.client.AccumuloException)1 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)1 Connector (org.apache.accumulo.core.client.Connector)1 Instance (org.apache.accumulo.core.client.Instance)1 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)1 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)1 ZooKeeperInstance (org.apache.accumulo.core.client.ZooKeeperInstance)1