Search in sources :

Example 16 with Properties

use of uk.gov.gchq.gaffer.data.element.Properties in project Gaffer by gchq.

the class BloomFilter18IT method testFilter.

private void testFilter(final AccumuloElementConverter elementConverter, final RangeFactory rangeFactory) throws AccumuloElementConversionException, RangeFactoryException, IOException {
    // Create random data to insert, and sort it
    final Random random = new Random();
    final HashSet<Key> keysSet = new HashSet<>();
    final HashSet<Entity> dataSet = new HashSet<>();
    for (int i = 0; i < 100000; i++) {
        final Entity source = new Entity(TestGroups.ENTITY);
        source.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
        final Entity destination = new Entity(TestGroups.ENTITY);
        destination.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
        dataSet.add(source);
        dataSet.add(destination);
        final Entity sourceEntity = new Entity(source.getGroup());
        sourceEntity.setVertex(source.getVertex());
        final Entity destinationEntity = new Entity(destination.getGroup());
        destinationEntity.setVertex(destination.getVertex());
        final Edge edge = new Edge(TestGroups.EDGE, source.getVertex(), destination.getVertex(), true);
        keysSet.add(elementConverter.getKeyFromEntity(sourceEntity));
        keysSet.add(elementConverter.getKeyFromEntity(destinationEntity));
        final Pair<Key> edgeKeys = elementConverter.getKeysFromEdge(edge);
        keysSet.add(edgeKeys.getFirst());
        keysSet.add(edgeKeys.getSecond());
    }
    final ArrayList<Key> keys = new ArrayList<>(keysSet);
    Collections.sort(keys);
    final Properties property = new Properties();
    property.put(AccumuloPropertyNames.COUNT, 10);
    final Value value = elementConverter.getValueFromProperties(TestGroups.ENTITY, property);
    final Value value2 = elementConverter.getValueFromProperties(TestGroups.EDGE, property);
    // Create Accumulo configuration
    final ConfigurationCopy accumuloConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
    accumuloConf.set(Property.TABLE_BLOOM_ENABLED, "true");
    accumuloConf.set(Property.TABLE_BLOOM_KEY_FUNCTOR, CoreKeyBloomFunctor.class.getName());
    accumuloConf.set(Property.TABLE_FILE_TYPE, RFile.EXTENSION);
    accumuloConf.set(Property.TABLE_BLOOM_LOAD_THRESHOLD, "1");
    accumuloConf.set(Property.TSERV_BLOOM_LOAD_MAXCONCURRENT, "1");
    // Create Hadoop configuration
    final Configuration conf = CachedConfiguration.getInstance();
    final FileSystem fs = FileSystem.get(conf);
    // Open file
    final String suffix = FileOperations.getNewFileExtension(accumuloConf);
    final String filenameTemp = tempFolder.getRoot().getAbsolutePath();
    final String filename = filenameTemp + "." + suffix;
    final File file = new File(filename);
    if (file.exists()) {
        file.delete();
    }
    final FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder().forFile(filename, fs, conf).withTableConfiguration(accumuloConf).build();
    try {
        // Write data to file
        writer.startDefaultLocalityGroup();
        for (final Key key : keys) {
            if (elementConverter.getElementFromKey(key).getGroup().equals(TestGroups.ENTITY)) {
                writer.append(key, value);
            } else {
                writer.append(key, value2);
            }
        }
    } finally {
        writer.close();
    }
    // Reader
    final FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(filename, fs, conf).withTableConfiguration(accumuloConf).seekToBeginning(false).build();
    try {
        // Calculate random look up rate - run it 3 times and take best
        final int numTrials = 5;
        double maxRandomRate = -1.0;
        for (int i = 0; i < numTrials; i++) {
            final double rate = calculateRandomLookUpRate(reader, dataSet, random, rangeFactory);
            if (rate > maxRandomRate) {
                maxRandomRate = rate;
            }
        }
        LOGGER.info("Max random rate = " + maxRandomRate);
        // Calculate look up rate for items that were inserted
        double maxCausalRate = -1.0;
        for (int i = 0; i < numTrials; i++) {
            double rate = calculateCausalLookUpRate(reader, dataSet, random, rangeFactory);
            if (rate > maxCausalRate) {
                maxCausalRate = rate;
            }
        }
        LOGGER.info("Max causal rate = " + maxCausalRate);
        // Random look up rate should be much faster
        assertTrue(maxRandomRate > maxCausalRate);
    } finally {
        // Close reader
        reader.close();
    }
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) Configuration(org.apache.hadoop.conf.Configuration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) ArrayList(java.util.ArrayList) Properties(uk.gov.gchq.gaffer.data.element.Properties) Random(java.util.Random) CoreKeyBloomFunctor(uk.gov.gchq.gaffer.accumulostore.key.core.impl.CoreKeyBloomFunctor) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) Edge(uk.gov.gchq.gaffer.data.element.Edge) File(java.io.File) RFile(org.apache.accumulo.core.file.rfile.RFile) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 17 with Properties

use of uk.gov.gchq.gaffer.data.element.Properties in project Gaffer by gchq.

the class AbstractCoreKeyAccumuloElementConverter method getPropertiesFromColumnVisibility.

@Override
public Properties getPropertiesFromColumnVisibility(final String group, final byte[] columnVisibility) throws AccumuloElementConversionException {
    final Properties properties = new Properties();
    final SchemaElementDefinition elementDefinition = schema.getElement(group);
    if (null == elementDefinition) {
        throw new AccumuloElementConversionException("No SchemaElementDefinition found for group " + group + ", is this group in your schema or do your table iterators need updating?");
    }
    if (null != schema.getVisibilityProperty()) {
        final TypeDefinition propertyDef = elementDefinition.getPropertyTypeDef(schema.getVisibilityProperty());
        if (null != propertyDef) {
            final Serialisation serialiser = propertyDef.getSerialiser();
            try {
                if (columnVisibility == null || columnVisibility.length == 0) {
                    final Object value = serialiser.deserialiseEmptyBytes();
                    if (value != null) {
                        properties.put(schema.getVisibilityProperty(), value);
                    }
                } else {
                    properties.put(schema.getVisibilityProperty(), serialiser.deserialise(columnVisibility));
                }
            } catch (final SerialisationException e) {
                throw new AccumuloElementConversionException(e.getMessage(), e);
            }
        }
    }
    return properties;
}
Also used : SerialisationException(uk.gov.gchq.gaffer.exception.SerialisationException) Serialisation(uk.gov.gchq.gaffer.serialisation.Serialisation) Properties(uk.gov.gchq.gaffer.data.element.Properties) SchemaElementDefinition(uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition) AccumuloElementConversionException(uk.gov.gchq.gaffer.accumulostore.key.exception.AccumuloElementConversionException) TypeDefinition(uk.gov.gchq.gaffer.store.schema.TypeDefinition)

Example 18 with Properties

use of uk.gov.gchq.gaffer.data.element.Properties in project Gaffer by gchq.

the class AbstractCoreKeyAccumuloElementConverter method getPropertiesFromTimestamp.

/**
     * Get the properties for a given group defined in the Schema as being
     * stored in the Accumulo timestamp column.
     *
     * @param group     The {@link Element} type to be queried
     * @param timestamp the element timestamp property
     * @return The Properties stored within the Timestamp part of the
     * {@link Key}
     * @throws AccumuloElementConversionException If the supplied group has not been defined
     */
@Override
public Properties getPropertiesFromTimestamp(final String group, final long timestamp) throws AccumuloElementConversionException {
    final SchemaElementDefinition elementDefinition = schema.getElement(group);
    if (null == elementDefinition) {
        throw new AccumuloElementConversionException("No SchemaElementDefinition found for group " + group + ", is this group in your schema or do your table iterators need updating?");
    }
    final Properties properties = new Properties();
    // If the element group requires a timestamp property then add it.
    if (null != schema.getTimestampProperty() && elementDefinition.containsProperty(schema.getTimestampProperty())) {
        properties.put(schema.getTimestampProperty(), timestamp);
    }
    return properties;
}
Also used : Properties(uk.gov.gchq.gaffer.data.element.Properties) SchemaElementDefinition(uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition) AccumuloElementConversionException(uk.gov.gchq.gaffer.accumulostore.key.exception.AccumuloElementConversionException)

Example 19 with Properties

use of uk.gov.gchq.gaffer.data.element.Properties in project Gaffer by gchq.

the class AggregatorIterator method reduce.

@Override
public Value reduce(final Key key, final Iterator<Value> iter) {
    // Get first Value. If this is the only Value then return it straight
    // away;
    Value value = iter.next();
    if (!iter.hasNext()) {
        return value;
    }
    final String group;
    try {
        group = new String(key.getColumnFamilyData().getBackingArray(), CommonConstants.UTF_8);
    } catch (final UnsupportedEncodingException e) {
        throw new AggregationException("Failed to recreate a graph element from a key and value", e);
    }
    Properties properties;
    final ElementAggregator aggregator;
    try {
        properties = elementConverter.getPropertiesFromValue(group, value);
    } catch (final AccumuloElementConversionException e) {
        throw new AggregationException("Failed to recreate a graph element from a key and value", e);
    }
    aggregator = schema.getElement(group).getAggregator();
    aggregator.aggregate(properties);
    while (iter.hasNext()) {
        value = iter.next();
        try {
            properties = elementConverter.getPropertiesFromValue(group, value);
        } catch (final AccumuloElementConversionException e) {
            throw new AggregationException("Failed to recreate a graph element from a key and value", e);
        }
        aggregator.aggregate(properties);
    }
    properties = new Properties();
    aggregator.state(properties);
    try {
        return elementConverter.getValueFromProperties(group, properties);
    } catch (final AccumuloElementConversionException e) {
        throw new AggregationException("Failed to create an accumulo value from an elements properties", e);
    }
}
Also used : AggregationException(uk.gov.gchq.gaffer.accumulostore.key.exception.AggregationException) Value(org.apache.accumulo.core.data.Value) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Properties(uk.gov.gchq.gaffer.data.element.Properties) ElementAggregator(uk.gov.gchq.gaffer.data.element.function.ElementAggregator) AccumuloElementConversionException(uk.gov.gchq.gaffer.accumulostore.key.exception.AccumuloElementConversionException)

Example 20 with Properties

use of uk.gov.gchq.gaffer.data.element.Properties in project Gaffer by gchq.

the class BloomFilterIT method testFilter.

private void testFilter(final AccumuloElementConverter elementConverter, final RangeFactory rangeFactory) throws AccumuloElementConversionException, RangeFactoryException, IOException {
    // Create random data to insert, and sort it
    final Random random = new Random();
    final HashSet<Key> keysSet = new HashSet<>();
    final HashSet<Entity> dataSet = new HashSet<>();
    for (int i = 0; i < 100000; i++) {
        final Entity source = new Entity(TestGroups.ENTITY);
        source.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
        final Entity destination = new Entity(TestGroups.ENTITY);
        destination.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
        dataSet.add(source);
        dataSet.add(destination);
        final Entity sourceEntity = new Entity(source.getGroup());
        sourceEntity.setVertex(source.getVertex());
        final Entity destinationEntity = new Entity(destination.getGroup());
        destinationEntity.setVertex(destination.getVertex());
        final Edge edge = new Edge(TestGroups.EDGE, source.getVertex(), destination.getVertex(), true);
        keysSet.add(elementConverter.getKeyFromEntity(sourceEntity));
        keysSet.add(elementConverter.getKeyFromEntity(destinationEntity));
        final Pair<Key> edgeKeys = elementConverter.getKeysFromEdge(edge);
        keysSet.add(edgeKeys.getFirst());
        keysSet.add(edgeKeys.getSecond());
    }
    final ArrayList<Key> keys = new ArrayList<>(keysSet);
    Collections.sort(keys);
    final Properties property = new Properties();
    property.put(AccumuloPropertyNames.COUNT, 10);
    final Value value = elementConverter.getValueFromProperties(TestGroups.ENTITY, property);
    final Value value2 = elementConverter.getValueFromProperties(TestGroups.EDGE, property);
    // Create Accumulo configuration
    final ConfigurationCopy accumuloConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
    accumuloConf.set(Property.TABLE_BLOOM_ENABLED, "true");
    accumuloConf.set(Property.TABLE_BLOOM_KEY_FUNCTOR, CoreKeyBloomFunctor.class.getName());
    accumuloConf.set(Property.TABLE_FILE_TYPE, RFile.EXTENSION);
    accumuloConf.set(Property.TABLE_BLOOM_LOAD_THRESHOLD, "1");
    accumuloConf.set(Property.TSERV_BLOOM_LOAD_MAXCONCURRENT, "1");
    // Create Hadoop configuration
    final Configuration conf = CachedConfiguration.getInstance();
    final FileSystem fs = FileSystem.get(conf);
    // Open file
    final String suffix = FileOperations.getNewFileExtension(accumuloConf);
    final String filenameTemp = tempFolder.getRoot().getAbsolutePath();
    final String filename = filenameTemp + "." + suffix;
    final File file = new File(filename);
    if (file.exists()) {
        file.delete();
    }
    final FileSKVWriter writer = FileOperations.getInstance().openWriter(filename, fs, conf, accumuloConf);
    try {
        // Write data to file
        writer.startDefaultLocalityGroup();
        for (final Key key : keys) {
            if (elementConverter.getElementFromKey(key).getGroup().equals(TestGroups.ENTITY)) {
                writer.append(key, value);
            } else {
                writer.append(key, value2);
            }
        }
    } finally {
        writer.close();
    }
    // Reader
    final FileSKVIterator reader = FileOperations.getInstance().openReader(filename, false, fs, conf, accumuloConf);
    try {
        // Calculate random look up rate - run it 3 times and take best
        final int numTrials = 5;
        double maxRandomRate = -1.0;
        for (int i = 0; i < numTrials; i++) {
            final double rate = calculateRandomLookUpRate(reader, dataSet, random, rangeFactory);
            if (rate > maxRandomRate) {
                maxRandomRate = rate;
            }
        }
        LOGGER.info("Max random rate = " + maxRandomRate);
        // Calculate look up rate for items that were inserted
        double maxCausalRate = -1.0;
        for (int i = 0; i < numTrials; i++) {
            double rate = calculateCausalLookUpRate(reader, dataSet, random, rangeFactory);
            if (rate > maxCausalRate) {
                maxCausalRate = rate;
            }
        }
        LOGGER.info("Max causal rate = " + maxCausalRate);
        // Random look up rate should be much faster
        assertTrue(maxRandomRate > maxCausalRate);
    } finally {
        // Close reader
        reader.close();
    }
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) Configuration(org.apache.hadoop.conf.Configuration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) ArrayList(java.util.ArrayList) Properties(uk.gov.gchq.gaffer.data.element.Properties) Random(java.util.Random) CoreKeyBloomFunctor(uk.gov.gchq.gaffer.accumulostore.key.core.impl.CoreKeyBloomFunctor) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) Edge(uk.gov.gchq.gaffer.data.element.Edge) File(java.io.File) RFile(org.apache.accumulo.core.file.rfile.RFile) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Aggregations

Properties (uk.gov.gchq.gaffer.data.element.Properties)31 Test (org.junit.Test)16 Value (org.apache.accumulo.core.data.Value)11 AccumuloElementConversionException (uk.gov.gchq.gaffer.accumulostore.key.exception.AccumuloElementConversionException)8 Key (org.apache.accumulo.core.data.Key)7 Edge (uk.gov.gchq.gaffer.data.element.Edge)6 Schema (uk.gov.gchq.gaffer.store.schema.Schema)5 Entity (uk.gov.gchq.gaffer.data.element.Entity)4 SchemaElementDefinition (uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition)4 Element (uk.gov.gchq.gaffer.data.element.Element)3 ElementAggregator (uk.gov.gchq.gaffer.data.element.function.ElementAggregator)3 File (java.io.File)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 Entry (java.util.Map.Entry)2 Random (java.util.Random)2 AccumuloException (org.apache.accumulo.core.client.AccumuloException)2 BatchWriter (org.apache.accumulo.core.client.BatchWriter)2 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)2