use of org.apache.accumulo.core.data.Key in project hive by apache.
the class AccumuloDefaultIndexScanner method getIndexRowRanges.
/**
* Get a list of rowid ranges by scanning a column index.
*
* @param column - the hive column name
* @param indexRange - Key range to scan on the index table
* @return List of matching rowid ranges or null if too many matches found
* if index values are not found a newline range is added to list to
* short-circuit the query
*/
@Override
public List<Range> getIndexRowRanges(String column, Range indexRange) {
List<Range> rowIds = new ArrayList<Range>();
Scanner scan = null;
String col = this.colMap.get(column);
if (col != null) {
try {
LOG.debug("Searching tab=" + indexTable + " column=" + column + " range=" + indexRange);
Connector conn = getConnector();
scan = conn.createScanner(indexTable, auths);
scan.setRange(indexRange);
Text cf = new Text(col);
LOG.debug("Using Column Family=" + toString());
scan.fetchColumnFamily(cf);
for (Map.Entry<Key, Value> entry : scan) {
rowIds.add(new Range(entry.getKey().getColumnQualifier()));
// if we have too many results return null for a full scan
if (rowIds.size() > maxRowIds) {
return null;
}
}
// no hits on the index so return a no match range
if (rowIds.isEmpty()) {
LOG.debug("Found 0 index matches");
} else {
LOG.debug("Found " + rowIds.size() + " index matches");
}
return rowIds;
} catch (AccumuloException | AccumuloSecurityException | TableNotFoundException e) {
LOG.error("Failed to scan index table: " + indexTable, e);
} finally {
if (scan != null) {
scan.close();
}
}
}
// assume the index is bad and do a full scan
LOG.debug("Index lookup failed for table " + indexTable);
return null;
}
use of org.apache.accumulo.core.data.Key in project apex-malhar by apache.
the class AccumuloTestHelper method getAccumuloTuple.
public static AccumuloTuple getAccumuloTuple(String row, String colFam, String colName) {
Authorizations auths = new Authorizations();
Scanner scan = null;
try {
scan = con.createScanner("tab1", auths);
} catch (TableNotFoundException e) {
logger.error("error in test helper");
DTThrowable.rethrow(e);
}
scan.setRange(new Range(new Text(row)));
scan.fetchColumn(new Text(colFam), new Text(colName));
// assuming only one row
for (Entry<Key, Value> entry : scan) {
AccumuloTuple tuple = new AccumuloTuple();
tuple.setRow(entry.getKey().getRow().toString());
tuple.setColFamily(entry.getKey().getColumnFamily().toString());
tuple.setColName(entry.getKey().getColumnQualifier().toString());
tuple.setColValue(entry.getValue().toString());
return tuple;
}
return null;
}
use of org.apache.accumulo.core.data.Key in project apex-malhar by apache.
the class AccumuloWindowStore method getCommittedWindowId.
@Override
public long getCommittedWindowId(String appId, int operatorId) {
byte[] value = null;
Authorizations auths = new Authorizations();
Scanner scan = null;
String columnKey = appId + "_" + operatorId + "_" + lastWindowColumnName;
lastWindowColumnBytes = columnKey.getBytes();
try {
scan = connector.createScanner(tableName, auths);
} catch (TableNotFoundException e) {
logger.error("error getting committed window id", e);
DTThrowable.rethrow(e);
}
scan.setRange(new Range(new Text(rowBytes)));
scan.fetchColumn(new Text(columnFamilyBytes), new Text(lastWindowColumnBytes));
for (Entry<Key, Value> entry : scan) {
value = entry.getValue().get();
}
if (value != null) {
long longval = toLong(value);
return longval;
}
return -1;
}
use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.
the class BloomFilter18IT method testFilter.
private void testFilter(final AccumuloElementConverter elementConverter, final RangeFactory rangeFactory) throws AccumuloElementConversionException, RangeFactoryException, IOException {
// Create random data to insert, and sort it
final Random random = new Random();
final HashSet<Key> keysSet = new HashSet<>();
final HashSet<Entity> dataSet = new HashSet<>();
for (int i = 0; i < 100000; i++) {
final Entity source = new Entity(TestGroups.ENTITY);
source.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
final Entity destination = new Entity(TestGroups.ENTITY);
destination.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
dataSet.add(source);
dataSet.add(destination);
final Entity sourceEntity = new Entity(source.getGroup());
sourceEntity.setVertex(source.getVertex());
final Entity destinationEntity = new Entity(destination.getGroup());
destinationEntity.setVertex(destination.getVertex());
final Edge edge = new Edge(TestGroups.EDGE, source.getVertex(), destination.getVertex(), true);
keysSet.add(elementConverter.getKeyFromEntity(sourceEntity));
keysSet.add(elementConverter.getKeyFromEntity(destinationEntity));
final Pair<Key> edgeKeys = elementConverter.getKeysFromEdge(edge);
keysSet.add(edgeKeys.getFirst());
keysSet.add(edgeKeys.getSecond());
}
final ArrayList<Key> keys = new ArrayList<>(keysSet);
Collections.sort(keys);
final Properties property = new Properties();
property.put(AccumuloPropertyNames.COUNT, 10);
final Value value = elementConverter.getValueFromProperties(TestGroups.ENTITY, property);
final Value value2 = elementConverter.getValueFromProperties(TestGroups.EDGE, property);
// Create Accumulo configuration
final ConfigurationCopy accumuloConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
accumuloConf.set(Property.TABLE_BLOOM_ENABLED, "true");
accumuloConf.set(Property.TABLE_BLOOM_KEY_FUNCTOR, CoreKeyBloomFunctor.class.getName());
accumuloConf.set(Property.TABLE_FILE_TYPE, RFile.EXTENSION);
accumuloConf.set(Property.TABLE_BLOOM_LOAD_THRESHOLD, "1");
accumuloConf.set(Property.TSERV_BLOOM_LOAD_MAXCONCURRENT, "1");
// Create Hadoop configuration
final Configuration conf = CachedConfiguration.getInstance();
final FileSystem fs = FileSystem.get(conf);
// Open file
final String suffix = FileOperations.getNewFileExtension(accumuloConf);
final String filenameTemp = tempFolder.getRoot().getAbsolutePath();
final String filename = filenameTemp + "." + suffix;
final File file = new File(filename);
if (file.exists()) {
file.delete();
}
final FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder().forFile(filename, fs, conf).withTableConfiguration(accumuloConf).build();
try {
// Write data to file
writer.startDefaultLocalityGroup();
for (final Key key : keys) {
if (elementConverter.getElementFromKey(key).getGroup().equals(TestGroups.ENTITY)) {
writer.append(key, value);
} else {
writer.append(key, value2);
}
}
} finally {
writer.close();
}
// Reader
final FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(filename, fs, conf).withTableConfiguration(accumuloConf).seekToBeginning(false).build();
try {
// Calculate random look up rate - run it 3 times and take best
final int numTrials = 5;
double maxRandomRate = -1.0;
for (int i = 0; i < numTrials; i++) {
final double rate = calculateRandomLookUpRate(reader, dataSet, random, rangeFactory);
if (rate > maxRandomRate) {
maxRandomRate = rate;
}
}
LOGGER.info("Max random rate = " + maxRandomRate);
// Calculate look up rate for items that were inserted
double maxCausalRate = -1.0;
for (int i = 0; i < numTrials; i++) {
double rate = calculateCausalLookUpRate(reader, dataSet, random, rangeFactory);
if (rate > maxCausalRate) {
maxCausalRate = rate;
}
}
LOGGER.info("Max causal rate = " + maxCausalRate);
// Random look up rate should be much faster
assertTrue(maxRandomRate > maxCausalRate);
} finally {
// Close reader
reader.close();
}
}
use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.
the class ImportKeyValueJavaPairRDDToAccumuloHandlerTest method checkImportKeyValueJavaPairRDD.
@Test
public void checkImportKeyValueJavaPairRDD() throws OperationException, IOException, InterruptedException {
final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeSchema.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
final List<Element> elements = new ArrayList<>();
for (int i = 0; i < 10; i++) {
final Entity entity = new Entity(TestGroups.ENTITY);
entity.setVertex("" + i);
final Edge edge1 = new Edge(TestGroups.EDGE);
edge1.setSource("" + i);
edge1.setDestination("B");
edge1.setDirected(false);
edge1.putProperty(TestPropertyNames.COUNT, 2);
final Edge edge2 = new Edge(TestGroups.EDGE);
edge2.setSource("" + i);
edge2.setDestination("C");
edge2.setDirected(false);
edge2.putProperty(TestPropertyNames.COUNT, 4);
elements.add(edge1);
elements.add(edge2);
elements.add(entity);
}
final User user = new User();
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
configuration.write(new DataOutputStream(baos));
final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
final String outputPath = this.getClass().getResource("/").getPath().toString() + "load";
final String failurePath = this.getClass().getResource("/").getPath().toString() + "failure";
final File file = new File(outputPath);
if (file.exists()) {
FileUtils.forceDelete(file);
}
final ElementConverterFunction func = new ElementConverterFunction(sparkContext.broadcast(new ByteEntityAccumuloElementConverter(graph1.getSchema())));
final JavaPairRDD<Key, Value> elementJavaRDD = sparkContext.parallelize(elements).flatMapToPair(func);
final ImportKeyValueJavaPairRDDToAccumulo addRdd = new ImportKeyValueJavaPairRDDToAccumulo.Builder().input(elementJavaRDD).outputPath(outputPath).failurePath(failurePath).build();
graph1.execute(addRdd, user);
FileUtils.forceDelete(file);
// Check all elements were added
final GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
final JavaRDD<Element> rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>(rdd.collect());
assertEquals(elements.size(), results.size());
sparkContext.stop();
}
Aggregations