use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class FileUtil method cleanupIndexOp.
protected static void cleanupIndexOp(Path tmpDir, VolumeManager fs, ArrayList<FileSKVIterator> readers) throws IOException {
// close all of the index sequence files
for (FileSKVIterator r : readers) {
try {
if (r != null)
r.close();
} catch (IOException e) {
// okay, try to close the rest anyway
log.error("{}", e.getMessage(), e);
}
}
if (tmpDir != null) {
Volume v = fs.getVolumeByPath(tmpDir);
if (v.getFileSystem().exists(tmpDir)) {
fs.deleteRecursively(tmpDir);
return;
}
log.error("Did not delete tmp dir because it wasn't a tmp dir {}", tmpDir);
}
}
use of org.apache.accumulo.core.file.FileSKVIterator in project Gaffer by gchq.
the class BloomFilter18IT method testFilter.
private void testFilter(final AccumuloElementConverter elementConverter, final RangeFactory rangeFactory) throws AccumuloElementConversionException, RangeFactoryException, IOException {
// Create random data to insert, and sort it
final Random random = new Random();
final HashSet<Key> keysSet = new HashSet<>();
final HashSet<Entity> dataSet = new HashSet<>();
for (int i = 0; i < 100000; i++) {
final Entity source = new Entity(TestGroups.ENTITY);
source.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
final Entity destination = new Entity(TestGroups.ENTITY);
destination.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
dataSet.add(source);
dataSet.add(destination);
final Entity sourceEntity = new Entity(source.getGroup());
sourceEntity.setVertex(source.getVertex());
final Entity destinationEntity = new Entity(destination.getGroup());
destinationEntity.setVertex(destination.getVertex());
final Edge edge = new Edge(TestGroups.EDGE, source.getVertex(), destination.getVertex(), true);
keysSet.add(elementConverter.getKeyFromEntity(sourceEntity));
keysSet.add(elementConverter.getKeyFromEntity(destinationEntity));
final Pair<Key> edgeKeys = elementConverter.getKeysFromEdge(edge);
keysSet.add(edgeKeys.getFirst());
keysSet.add(edgeKeys.getSecond());
}
final ArrayList<Key> keys = new ArrayList<>(keysSet);
Collections.sort(keys);
final Properties property = new Properties();
property.put(AccumuloPropertyNames.COUNT, 10);
final Value value = elementConverter.getValueFromProperties(TestGroups.ENTITY, property);
final Value value2 = elementConverter.getValueFromProperties(TestGroups.EDGE, property);
// Create Accumulo configuration
final ConfigurationCopy accumuloConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
accumuloConf.set(Property.TABLE_BLOOM_ENABLED, "true");
accumuloConf.set(Property.TABLE_BLOOM_KEY_FUNCTOR, CoreKeyBloomFunctor.class.getName());
accumuloConf.set(Property.TABLE_FILE_TYPE, RFile.EXTENSION);
accumuloConf.set(Property.TABLE_BLOOM_LOAD_THRESHOLD, "1");
accumuloConf.set(Property.TSERV_BLOOM_LOAD_MAXCONCURRENT, "1");
// Create Hadoop configuration
final Configuration conf = CachedConfiguration.getInstance();
final FileSystem fs = FileSystem.get(conf);
// Open file
final String suffix = FileOperations.getNewFileExtension(accumuloConf);
final String filenameTemp = tempFolder.getRoot().getAbsolutePath();
final String filename = filenameTemp + "." + suffix;
final File file = new File(filename);
if (file.exists()) {
file.delete();
}
final FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder().forFile(filename, fs, conf).withTableConfiguration(accumuloConf).build();
try {
// Write data to file
writer.startDefaultLocalityGroup();
for (final Key key : keys) {
if (elementConverter.getElementFromKey(key).getGroup().equals(TestGroups.ENTITY)) {
writer.append(key, value);
} else {
writer.append(key, value2);
}
}
} finally {
writer.close();
}
// Reader
final FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(filename, fs, conf).withTableConfiguration(accumuloConf).seekToBeginning(false).build();
try {
// Calculate random look up rate - run it 3 times and take best
final int numTrials = 5;
double maxRandomRate = -1.0;
for (int i = 0; i < numTrials; i++) {
final double rate = calculateRandomLookUpRate(reader, dataSet, random, rangeFactory);
if (rate > maxRandomRate) {
maxRandomRate = rate;
}
}
LOGGER.info("Max random rate = " + maxRandomRate);
// Calculate look up rate for items that were inserted
double maxCausalRate = -1.0;
for (int i = 0; i < numTrials; i++) {
double rate = calculateCausalLookUpRate(reader, dataSet, random, rangeFactory);
if (rate > maxCausalRate) {
maxCausalRate = rate;
}
}
LOGGER.info("Max causal rate = " + maxCausalRate);
// Random look up rate should be much faster
assertTrue(maxRandomRate > maxCausalRate);
} finally {
// Close reader
reader.close();
}
}
use of org.apache.accumulo.core.file.FileSKVIterator in project incubator-rya by apache.
the class AccumuloHDFSFileInputFormat method createRecordReader.
@Override
public RecordReader<Key, Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
return new RecordReader<Key, Value>() {
private FileSKVIterator fileSKVIterator;
private boolean started = false;
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
FileSplit split = (FileSplit) inputSplit;
Configuration job = taskAttemptContext.getConfiguration();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(job);
Instance instance = MRUtils.AccumuloProps.getInstance(taskAttemptContext);
fileSKVIterator = RFileOperations.getInstance().openReader(file.toString(), ALLRANGE, new HashSet<ByteSequence>(), false, fs, job, instance.getConfiguration());
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (started) {
fileSKVIterator.next();
} else {
// don't move past the first record yet
started = true;
}
return fileSKVIterator.hasTop();
}
@Override
public Key getCurrentKey() throws IOException, InterruptedException {
return fileSKVIterator.getTopKey();
}
@Override
public Value getCurrentValue() throws IOException, InterruptedException {
return fileSKVIterator.getTopValue();
}
@Override
public float getProgress() throws IOException, InterruptedException {
return 0;
}
@Override
public void close() throws IOException {
}
};
}
use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class Compactor method openMapDataFiles.
private List<SortedKeyValueIterator<Key, Value>> openMapDataFiles(String lgName, ArrayList<FileSKVIterator> readers) throws IOException {
List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(filesToCompact.size());
for (FileRef mapFile : filesToCompact.keySet()) {
try {
FileOperations fileFactory = FileOperations.getInstance();
FileSystem fs = this.fs.getVolumeByPath(mapFile.path()).getFileSystem();
FileSKVIterator reader;
reader = fileFactory.newReaderBuilder().forFile(mapFile.path().toString(), fs, fs.getConf()).withTableConfiguration(acuTableConf).withRateLimiter(env.getReadLimiter()).build();
readers.add(reader);
SortedKeyValueIterator<Key, Value> iter = new ProblemReportingIterator(context, extent.getTableId(), mapFile.path().toString(), false, reader);
if (filesToCompact.get(mapFile).isTimeSet()) {
iter = new TimeSettingIterator(iter, filesToCompact.get(mapFile).getTime());
}
iters.add(iter);
} catch (Throwable e) {
ProblemReports.getInstance(context).report(new ProblemReport(extent.getTableId(), ProblemType.FILE_READ, mapFile.path().toString(), e));
log.warn("Some problem opening map file {} {}", mapFile, e.getMessage(), e);
// failed to open some map file... close the ones that were opened
for (FileSKVIterator reader : readers) {
try {
reader.close();
} catch (Throwable e2) {
log.warn("Failed to close map file", e2);
}
}
readers.clear();
if (e instanceof IOException)
throw (IOException) e;
throw new IOException("Failed to open map data files", e);
}
}
return iters;
}
use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class MajorCompactionRequest method openReader.
public FileSKVIterator openReader(FileRef ref) throws IOException {
Preconditions.checkState(volumeManager != null, "Opening files is not supported at this time. Its only supported when CompactionStrategy.gatherInformation() is called.");
// @TODO verify the file isn't some random file in HDFS
// @TODO ensure these files are always closed?
FileOperations fileFactory = FileOperations.getInstance();
FileSystem ns = volumeManager.getVolumeByPath(ref.path()).getFileSystem();
FileSKVIterator openReader = fileFactory.newReaderBuilder().forFile(ref.path().toString(), ns, ns.getConf()).withTableConfiguration(tableConfig).seekToBeginning().build();
return openReader;
}
Aggregations