use of uk.gov.gchq.gaffer.data.element.Properties in project Gaffer by gchq.
the class BloomFilter18IT method testFilter.
private void testFilter(final AccumuloElementConverter elementConverter, final RangeFactory rangeFactory) throws AccumuloElementConversionException, RangeFactoryException, IOException {
// Create random data to insert, and sort it
final Random random = new Random();
final HashSet<Key> keysSet = new HashSet<>();
final HashSet<Entity> dataSet = new HashSet<>();
for (int i = 0; i < 100000; i++) {
final Entity source = new Entity(TestGroups.ENTITY);
source.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
final Entity destination = new Entity(TestGroups.ENTITY);
destination.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
dataSet.add(source);
dataSet.add(destination);
final Entity sourceEntity = new Entity(source.getGroup());
sourceEntity.setVertex(source.getVertex());
final Entity destinationEntity = new Entity(destination.getGroup());
destinationEntity.setVertex(destination.getVertex());
final Edge edge = new Edge(TestGroups.EDGE, source.getVertex(), destination.getVertex(), true);
keysSet.add(elementConverter.getKeyFromEntity(sourceEntity));
keysSet.add(elementConverter.getKeyFromEntity(destinationEntity));
final Pair<Key> edgeKeys = elementConverter.getKeysFromEdge(edge);
keysSet.add(edgeKeys.getFirst());
keysSet.add(edgeKeys.getSecond());
}
final ArrayList<Key> keys = new ArrayList<>(keysSet);
Collections.sort(keys);
final Properties property = new Properties();
property.put(AccumuloPropertyNames.COUNT, 10);
final Value value = elementConverter.getValueFromProperties(TestGroups.ENTITY, property);
final Value value2 = elementConverter.getValueFromProperties(TestGroups.EDGE, property);
// Create Accumulo configuration
final ConfigurationCopy accumuloConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
accumuloConf.set(Property.TABLE_BLOOM_ENABLED, "true");
accumuloConf.set(Property.TABLE_BLOOM_KEY_FUNCTOR, CoreKeyBloomFunctor.class.getName());
accumuloConf.set(Property.TABLE_FILE_TYPE, RFile.EXTENSION);
accumuloConf.set(Property.TABLE_BLOOM_LOAD_THRESHOLD, "1");
accumuloConf.set(Property.TSERV_BLOOM_LOAD_MAXCONCURRENT, "1");
// Create Hadoop configuration
final Configuration conf = CachedConfiguration.getInstance();
final FileSystem fs = FileSystem.get(conf);
// Open file
final String suffix = FileOperations.getNewFileExtension(accumuloConf);
final String filenameTemp = tempFolder.getRoot().getAbsolutePath();
final String filename = filenameTemp + "." + suffix;
final File file = new File(filename);
if (file.exists()) {
file.delete();
}
final FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder().forFile(filename, fs, conf).withTableConfiguration(accumuloConf).build();
try {
// Write data to file
writer.startDefaultLocalityGroup();
for (final Key key : keys) {
if (elementConverter.getElementFromKey(key).getGroup().equals(TestGroups.ENTITY)) {
writer.append(key, value);
} else {
writer.append(key, value2);
}
}
} finally {
writer.close();
}
// Reader
final FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(filename, fs, conf).withTableConfiguration(accumuloConf).seekToBeginning(false).build();
try {
// Calculate random look up rate - run it 3 times and take best
final int numTrials = 5;
double maxRandomRate = -1.0;
for (int i = 0; i < numTrials; i++) {
final double rate = calculateRandomLookUpRate(reader, dataSet, random, rangeFactory);
if (rate > maxRandomRate) {
maxRandomRate = rate;
}
}
LOGGER.info("Max random rate = " + maxRandomRate);
// Calculate look up rate for items that were inserted
double maxCausalRate = -1.0;
for (int i = 0; i < numTrials; i++) {
double rate = calculateCausalLookUpRate(reader, dataSet, random, rangeFactory);
if (rate > maxCausalRate) {
maxCausalRate = rate;
}
}
LOGGER.info("Max causal rate = " + maxCausalRate);
// Random look up rate should be much faster
assertTrue(maxRandomRate > maxCausalRate);
} finally {
// Close reader
reader.close();
}
}
use of uk.gov.gchq.gaffer.data.element.Properties in project Gaffer by gchq.
the class AbstractCoreKeyAccumuloElementConverter method getPropertiesFromColumnVisibility.
@Override
public Properties getPropertiesFromColumnVisibility(final String group, final byte[] columnVisibility) throws AccumuloElementConversionException {
final Properties properties = new Properties();
final SchemaElementDefinition elementDefinition = schema.getElement(group);
if (null == elementDefinition) {
throw new AccumuloElementConversionException("No SchemaElementDefinition found for group " + group + ", is this group in your schema or do your table iterators need updating?");
}
if (null != schema.getVisibilityProperty()) {
final TypeDefinition propertyDef = elementDefinition.getPropertyTypeDef(schema.getVisibilityProperty());
if (null != propertyDef) {
final Serialisation serialiser = propertyDef.getSerialiser();
try {
if (columnVisibility == null || columnVisibility.length == 0) {
final Object value = serialiser.deserialiseEmptyBytes();
if (value != null) {
properties.put(schema.getVisibilityProperty(), value);
}
} else {
properties.put(schema.getVisibilityProperty(), serialiser.deserialise(columnVisibility));
}
} catch (final SerialisationException e) {
throw new AccumuloElementConversionException(e.getMessage(), e);
}
}
}
return properties;
}
use of uk.gov.gchq.gaffer.data.element.Properties in project Gaffer by gchq.
the class AbstractCoreKeyAccumuloElementConverter method getPropertiesFromTimestamp.
/**
* Get the properties for a given group defined in the Schema as being
* stored in the Accumulo timestamp column.
*
* @param group The {@link Element} type to be queried
* @param timestamp the element timestamp property
* @return The Properties stored within the Timestamp part of the
* {@link Key}
* @throws AccumuloElementConversionException If the supplied group has not been defined
*/
@Override
public Properties getPropertiesFromTimestamp(final String group, final long timestamp) throws AccumuloElementConversionException {
final SchemaElementDefinition elementDefinition = schema.getElement(group);
if (null == elementDefinition) {
throw new AccumuloElementConversionException("No SchemaElementDefinition found for group " + group + ", is this group in your schema or do your table iterators need updating?");
}
final Properties properties = new Properties();
// If the element group requires a timestamp property then add it.
if (null != schema.getTimestampProperty() && elementDefinition.containsProperty(schema.getTimestampProperty())) {
properties.put(schema.getTimestampProperty(), timestamp);
}
return properties;
}
use of uk.gov.gchq.gaffer.data.element.Properties in project Gaffer by gchq.
the class AggregatorIterator method reduce.
@Override
public Value reduce(final Key key, final Iterator<Value> iter) {
// Get first Value. If this is the only Value then return it straight
// away;
Value value = iter.next();
if (!iter.hasNext()) {
return value;
}
final String group;
try {
group = new String(key.getColumnFamilyData().getBackingArray(), CommonConstants.UTF_8);
} catch (final UnsupportedEncodingException e) {
throw new AggregationException("Failed to recreate a graph element from a key and value", e);
}
Properties properties;
final ElementAggregator aggregator;
try {
properties = elementConverter.getPropertiesFromValue(group, value);
} catch (final AccumuloElementConversionException e) {
throw new AggregationException("Failed to recreate a graph element from a key and value", e);
}
aggregator = schema.getElement(group).getAggregator();
aggregator.aggregate(properties);
while (iter.hasNext()) {
value = iter.next();
try {
properties = elementConverter.getPropertiesFromValue(group, value);
} catch (final AccumuloElementConversionException e) {
throw new AggregationException("Failed to recreate a graph element from a key and value", e);
}
aggregator.aggregate(properties);
}
properties = new Properties();
aggregator.state(properties);
try {
return elementConverter.getValueFromProperties(group, properties);
} catch (final AccumuloElementConversionException e) {
throw new AggregationException("Failed to create an accumulo value from an elements properties", e);
}
}
use of uk.gov.gchq.gaffer.data.element.Properties in project Gaffer by gchq.
the class BloomFilterIT method testFilter.
private void testFilter(final AccumuloElementConverter elementConverter, final RangeFactory rangeFactory) throws AccumuloElementConversionException, RangeFactoryException, IOException {
// Create random data to insert, and sort it
final Random random = new Random();
final HashSet<Key> keysSet = new HashSet<>();
final HashSet<Entity> dataSet = new HashSet<>();
for (int i = 0; i < 100000; i++) {
final Entity source = new Entity(TestGroups.ENTITY);
source.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
final Entity destination = new Entity(TestGroups.ENTITY);
destination.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
dataSet.add(source);
dataSet.add(destination);
final Entity sourceEntity = new Entity(source.getGroup());
sourceEntity.setVertex(source.getVertex());
final Entity destinationEntity = new Entity(destination.getGroup());
destinationEntity.setVertex(destination.getVertex());
final Edge edge = new Edge(TestGroups.EDGE, source.getVertex(), destination.getVertex(), true);
keysSet.add(elementConverter.getKeyFromEntity(sourceEntity));
keysSet.add(elementConverter.getKeyFromEntity(destinationEntity));
final Pair<Key> edgeKeys = elementConverter.getKeysFromEdge(edge);
keysSet.add(edgeKeys.getFirst());
keysSet.add(edgeKeys.getSecond());
}
final ArrayList<Key> keys = new ArrayList<>(keysSet);
Collections.sort(keys);
final Properties property = new Properties();
property.put(AccumuloPropertyNames.COUNT, 10);
final Value value = elementConverter.getValueFromProperties(TestGroups.ENTITY, property);
final Value value2 = elementConverter.getValueFromProperties(TestGroups.EDGE, property);
// Create Accumulo configuration
final ConfigurationCopy accumuloConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
accumuloConf.set(Property.TABLE_BLOOM_ENABLED, "true");
accumuloConf.set(Property.TABLE_BLOOM_KEY_FUNCTOR, CoreKeyBloomFunctor.class.getName());
accumuloConf.set(Property.TABLE_FILE_TYPE, RFile.EXTENSION);
accumuloConf.set(Property.TABLE_BLOOM_LOAD_THRESHOLD, "1");
accumuloConf.set(Property.TSERV_BLOOM_LOAD_MAXCONCURRENT, "1");
// Create Hadoop configuration
final Configuration conf = CachedConfiguration.getInstance();
final FileSystem fs = FileSystem.get(conf);
// Open file
final String suffix = FileOperations.getNewFileExtension(accumuloConf);
final String filenameTemp = tempFolder.getRoot().getAbsolutePath();
final String filename = filenameTemp + "." + suffix;
final File file = new File(filename);
if (file.exists()) {
file.delete();
}
final FileSKVWriter writer = FileOperations.getInstance().openWriter(filename, fs, conf, accumuloConf);
try {
// Write data to file
writer.startDefaultLocalityGroup();
for (final Key key : keys) {
if (elementConverter.getElementFromKey(key).getGroup().equals(TestGroups.ENTITY)) {
writer.append(key, value);
} else {
writer.append(key, value2);
}
}
} finally {
writer.close();
}
// Reader
final FileSKVIterator reader = FileOperations.getInstance().openReader(filename, false, fs, conf, accumuloConf);
try {
// Calculate random look up rate - run it 3 times and take best
final int numTrials = 5;
double maxRandomRate = -1.0;
for (int i = 0; i < numTrials; i++) {
final double rate = calculateRandomLookUpRate(reader, dataSet, random, rangeFactory);
if (rate > maxRandomRate) {
maxRandomRate = rate;
}
}
LOGGER.info("Max random rate = " + maxRandomRate);
// Calculate look up rate for items that were inserted
double maxCausalRate = -1.0;
for (int i = 0; i < numTrials; i++) {
double rate = calculateCausalLookUpRate(reader, dataSet, random, rangeFactory);
if (rate > maxCausalRate) {
maxCausalRate = rate;
}
}
LOGGER.info("Max causal rate = " + maxCausalRate);
// Random look up rate should be much faster
assertTrue(maxRandomRate > maxCausalRate);
} finally {
// Close reader
reader.close();
}
}
Aggregations