Search in sources :

Example 16 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class FileUtil method cleanupIndexOp.

protected static void cleanupIndexOp(Path tmpDir, VolumeManager fs, ArrayList<FileSKVIterator> readers) throws IOException {
    // close all of the index sequence files
    for (FileSKVIterator r : readers) {
        try {
            if (r != null)
                r.close();
        } catch (IOException e) {
            // okay, try to close the rest anyway
            log.error("{}", e.getMessage(), e);
        }
    }
    if (tmpDir != null) {
        Volume v = fs.getVolumeByPath(tmpDir);
        if (v.getFileSystem().exists(tmpDir)) {
            fs.deleteRecursively(tmpDir);
            return;
        }
        log.error("Did not delete tmp dir because it wasn't a tmp dir {}", tmpDir);
    }
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) Volume(org.apache.accumulo.core.volume.Volume) IOException(java.io.IOException)

Example 17 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project Gaffer by gchq.

the class BloomFilter18IT method testFilter.

private void testFilter(final AccumuloElementConverter elementConverter, final RangeFactory rangeFactory) throws AccumuloElementConversionException, RangeFactoryException, IOException {
    // Create random data to insert, and sort it
    final Random random = new Random();
    final HashSet<Key> keysSet = new HashSet<>();
    final HashSet<Entity> dataSet = new HashSet<>();
    for (int i = 0; i < 100000; i++) {
        final Entity source = new Entity(TestGroups.ENTITY);
        source.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
        final Entity destination = new Entity(TestGroups.ENTITY);
        destination.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
        dataSet.add(source);
        dataSet.add(destination);
        final Entity sourceEntity = new Entity(source.getGroup());
        sourceEntity.setVertex(source.getVertex());
        final Entity destinationEntity = new Entity(destination.getGroup());
        destinationEntity.setVertex(destination.getVertex());
        final Edge edge = new Edge(TestGroups.EDGE, source.getVertex(), destination.getVertex(), true);
        keysSet.add(elementConverter.getKeyFromEntity(sourceEntity));
        keysSet.add(elementConverter.getKeyFromEntity(destinationEntity));
        final Pair<Key> edgeKeys = elementConverter.getKeysFromEdge(edge);
        keysSet.add(edgeKeys.getFirst());
        keysSet.add(edgeKeys.getSecond());
    }
    final ArrayList<Key> keys = new ArrayList<>(keysSet);
    Collections.sort(keys);
    final Properties property = new Properties();
    property.put(AccumuloPropertyNames.COUNT, 10);
    final Value value = elementConverter.getValueFromProperties(TestGroups.ENTITY, property);
    final Value value2 = elementConverter.getValueFromProperties(TestGroups.EDGE, property);
    // Create Accumulo configuration
    final ConfigurationCopy accumuloConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
    accumuloConf.set(Property.TABLE_BLOOM_ENABLED, "true");
    accumuloConf.set(Property.TABLE_BLOOM_KEY_FUNCTOR, CoreKeyBloomFunctor.class.getName());
    accumuloConf.set(Property.TABLE_FILE_TYPE, RFile.EXTENSION);
    accumuloConf.set(Property.TABLE_BLOOM_LOAD_THRESHOLD, "1");
    accumuloConf.set(Property.TSERV_BLOOM_LOAD_MAXCONCURRENT, "1");
    // Create Hadoop configuration
    final Configuration conf = CachedConfiguration.getInstance();
    final FileSystem fs = FileSystem.get(conf);
    // Open file
    final String suffix = FileOperations.getNewFileExtension(accumuloConf);
    final String filenameTemp = tempFolder.getRoot().getAbsolutePath();
    final String filename = filenameTemp + "." + suffix;
    final File file = new File(filename);
    if (file.exists()) {
        file.delete();
    }
    final FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder().forFile(filename, fs, conf).withTableConfiguration(accumuloConf).build();
    try {
        // Write data to file
        writer.startDefaultLocalityGroup();
        for (final Key key : keys) {
            if (elementConverter.getElementFromKey(key).getGroup().equals(TestGroups.ENTITY)) {
                writer.append(key, value);
            } else {
                writer.append(key, value2);
            }
        }
    } finally {
        writer.close();
    }
    // Reader
    final FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(filename, fs, conf).withTableConfiguration(accumuloConf).seekToBeginning(false).build();
    try {
        // Calculate random look up rate - run it 3 times and take best
        final int numTrials = 5;
        double maxRandomRate = -1.0;
        for (int i = 0; i < numTrials; i++) {
            final double rate = calculateRandomLookUpRate(reader, dataSet, random, rangeFactory);
            if (rate > maxRandomRate) {
                maxRandomRate = rate;
            }
        }
        LOGGER.info("Max random rate = " + maxRandomRate);
        // Calculate look up rate for items that were inserted
        double maxCausalRate = -1.0;
        for (int i = 0; i < numTrials; i++) {
            double rate = calculateCausalLookUpRate(reader, dataSet, random, rangeFactory);
            if (rate > maxCausalRate) {
                maxCausalRate = rate;
            }
        }
        LOGGER.info("Max causal rate = " + maxCausalRate);
        // Random look up rate should be much faster
        assertTrue(maxRandomRate > maxCausalRate);
    } finally {
        // Close reader
        reader.close();
    }
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) Configuration(org.apache.hadoop.conf.Configuration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) ArrayList(java.util.ArrayList) Properties(uk.gov.gchq.gaffer.data.element.Properties) Random(java.util.Random) CoreKeyBloomFunctor(uk.gov.gchq.gaffer.accumulostore.key.core.impl.CoreKeyBloomFunctor) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) Edge(uk.gov.gchq.gaffer.data.element.Edge) File(java.io.File) RFile(org.apache.accumulo.core.file.rfile.RFile) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 18 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project incubator-rya by apache.

the class AccumuloHDFSFileInputFormat method createRecordReader.

@Override
public RecordReader<Key, Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    return new RecordReader<Key, Value>() {

        private FileSKVIterator fileSKVIterator;

        private boolean started = false;

        @Override
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            FileSplit split = (FileSplit) inputSplit;
            Configuration job = taskAttemptContext.getConfiguration();
            Path file = split.getPath();
            FileSystem fs = file.getFileSystem(job);
            Instance instance = MRUtils.AccumuloProps.getInstance(taskAttemptContext);
            fileSKVIterator = RFileOperations.getInstance().openReader(file.toString(), ALLRANGE, new HashSet<ByteSequence>(), false, fs, job, instance.getConfiguration());
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (started) {
                fileSKVIterator.next();
            } else {
                // don't move past the first record yet
                started = true;
            }
            return fileSKVIterator.hasTop();
        }

        @Override
        public Key getCurrentKey() throws IOException, InterruptedException {
            return fileSKVIterator.getTopKey();
        }

        @Override
        public Value getCurrentValue() throws IOException, InterruptedException {
            return fileSKVIterator.getTopValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public void close() throws IOException {
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) Configuration(org.apache.hadoop.conf.Configuration) Instance(org.apache.accumulo.core.client.Instance) RecordReader(org.apache.hadoop.mapreduce.RecordReader) FileSystem(org.apache.hadoop.fs.FileSystem) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) InputSplit(org.apache.hadoop.mapreduce.InputSplit) HashSet(java.util.HashSet)

Example 19 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class Compactor method openMapDataFiles.

private List<SortedKeyValueIterator<Key, Value>> openMapDataFiles(String lgName, ArrayList<FileSKVIterator> readers) throws IOException {
    List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(filesToCompact.size());
    for (FileRef mapFile : filesToCompact.keySet()) {
        try {
            FileOperations fileFactory = FileOperations.getInstance();
            FileSystem fs = this.fs.getVolumeByPath(mapFile.path()).getFileSystem();
            FileSKVIterator reader;
            reader = fileFactory.newReaderBuilder().forFile(mapFile.path().toString(), fs, fs.getConf()).withTableConfiguration(acuTableConf).withRateLimiter(env.getReadLimiter()).build();
            readers.add(reader);
            SortedKeyValueIterator<Key, Value> iter = new ProblemReportingIterator(context, extent.getTableId(), mapFile.path().toString(), false, reader);
            if (filesToCompact.get(mapFile).isTimeSet()) {
                iter = new TimeSettingIterator(iter, filesToCompact.get(mapFile).getTime());
            }
            iters.add(iter);
        } catch (Throwable e) {
            ProblemReports.getInstance(context).report(new ProblemReport(extent.getTableId(), ProblemType.FILE_READ, mapFile.path().toString(), e));
            log.warn("Some problem opening map file {} {}", mapFile, e.getMessage(), e);
            // failed to open some map file... close the ones that were opened
            for (FileSKVIterator reader : readers) {
                try {
                    reader.close();
                } catch (Throwable e2) {
                    log.warn("Failed to close map file", e2);
                }
            }
            readers.clear();
            if (e instanceof IOException)
                throw (IOException) e;
            throw new IOException("Failed to open map data files", e);
        }
    }
    return iters;
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) ArrayList(java.util.ArrayList) FileOperations(org.apache.accumulo.core.file.FileOperations) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) TimeSettingIterator(org.apache.accumulo.core.iterators.system.TimeSettingIterator) IOException(java.io.IOException) ProblemReport(org.apache.accumulo.server.problems.ProblemReport) FileRef(org.apache.accumulo.server.fs.FileRef) FileSystem(org.apache.hadoop.fs.FileSystem) ProblemReportingIterator(org.apache.accumulo.server.problems.ProblemReportingIterator) Value(org.apache.accumulo.core.data.Value) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Key(org.apache.accumulo.core.data.Key)

Example 20 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class MajorCompactionRequest method openReader.

public FileSKVIterator openReader(FileRef ref) throws IOException {
    Preconditions.checkState(volumeManager != null, "Opening files is not supported at this time.  Its only supported when CompactionStrategy.gatherInformation() is called.");
    // @TODO verify the file isn't some random file in HDFS
    // @TODO ensure these files are always closed?
    FileOperations fileFactory = FileOperations.getInstance();
    FileSystem ns = volumeManager.getVolumeByPath(ref.path()).getFileSystem();
    FileSKVIterator openReader = fileFactory.newReaderBuilder().forFile(ref.path().toString(), ns, ns.getConf()).withTableConfiguration(tableConfig).seekToBeginning().build();
    return openReader;
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) FileSystem(org.apache.hadoop.fs.FileSystem) FileOperations(org.apache.accumulo.core.file.FileOperations)

Aggregations

FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)32 Key (org.apache.accumulo.core.data.Key)22 FileSystem (org.apache.hadoop.fs.FileSystem)17 ArrayList (java.util.ArrayList)13 PartialKey (org.apache.accumulo.core.data.PartialKey)13 Value (org.apache.accumulo.core.data.Value)13 IOException (java.io.IOException)11 Configuration (org.apache.hadoop.conf.Configuration)10 Path (org.apache.hadoop.fs.Path)9 Range (org.apache.accumulo.core.data.Range)7 CachedConfiguration (org.apache.accumulo.core.util.CachedConfiguration)7 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)5 ConfigurationCopy (org.apache.accumulo.core.conf.ConfigurationCopy)5 SortedKeyValueIterator (org.apache.accumulo.core.iterators.SortedKeyValueIterator)5 MultiIterator (org.apache.accumulo.core.iterators.system.MultiIterator)5 Text (org.apache.hadoop.io.Text)5 Test (org.junit.Test)5 File (java.io.File)4 HashMap (java.util.HashMap)4 CryptoTest (org.apache.accumulo.core.security.crypto.CryptoTest)4