Search in sources :

Example 96 with Key

use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.

the class AddElementsFromHdfsMapper method map.

@Override
protected void map(final Element element, final Context context) throws IOException, InterruptedException {
    final Pair<Key> keyPair;
    try {
        keyPair = elementConverter.getKeysFromElement(element);
    } catch (final AccumuloElementConversionException e) {
        throw new IllegalArgumentException(e.getMessage(), e);
    }
    final Value value;
    try {
        value = elementConverter.getValueFromElement(element);
    } catch (final AccumuloElementConversionException e) {
        throw new IllegalArgumentException(e.getMessage(), e);
    }
    context.write(keyPair.getFirst(), value);
    if (keyPair.getSecond() != null) {
        context.write(keyPair.getSecond(), value);
    }
    context.getCounter("Bulk import", element.getClass().getSimpleName() + " count").increment(1L);
}
Also used : Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) AccumuloElementConversionException(uk.gov.gchq.gaffer.accumulostore.key.exception.AccumuloElementConversionException)

Example 97 with Key

use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.

the class BloomFilterIT method testFilter.

private void testFilter(final AccumuloElementConverter elementConverter, final RangeFactory rangeFactory) throws AccumuloElementConversionException, RangeFactoryException, IOException {
    // Create random data to insert, and sort it
    final Random random = new Random();
    final HashSet<Key> keysSet = new HashSet<>();
    final HashSet<Entity> dataSet = new HashSet<>();
    for (int i = 0; i < 100000; i++) {
        final Entity source = new Entity(TestGroups.ENTITY);
        source.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
        final Entity destination = new Entity(TestGroups.ENTITY);
        destination.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
        dataSet.add(source);
        dataSet.add(destination);
        final Entity sourceEntity = new Entity(source.getGroup());
        sourceEntity.setVertex(source.getVertex());
        final Entity destinationEntity = new Entity(destination.getGroup());
        destinationEntity.setVertex(destination.getVertex());
        final Edge edge = new Edge(TestGroups.EDGE, source.getVertex(), destination.getVertex(), true);
        keysSet.add(elementConverter.getKeyFromEntity(sourceEntity));
        keysSet.add(elementConverter.getKeyFromEntity(destinationEntity));
        final Pair<Key> edgeKeys = elementConverter.getKeysFromEdge(edge);
        keysSet.add(edgeKeys.getFirst());
        keysSet.add(edgeKeys.getSecond());
    }
    final ArrayList<Key> keys = new ArrayList<>(keysSet);
    Collections.sort(keys);
    final Properties property = new Properties();
    property.put(AccumuloPropertyNames.COUNT, 10);
    final Value value = elementConverter.getValueFromProperties(TestGroups.ENTITY, property);
    final Value value2 = elementConverter.getValueFromProperties(TestGroups.EDGE, property);
    // Create Accumulo configuration
    final ConfigurationCopy accumuloConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
    accumuloConf.set(Property.TABLE_BLOOM_ENABLED, "true");
    accumuloConf.set(Property.TABLE_BLOOM_KEY_FUNCTOR, CoreKeyBloomFunctor.class.getName());
    accumuloConf.set(Property.TABLE_FILE_TYPE, RFile.EXTENSION);
    accumuloConf.set(Property.TABLE_BLOOM_LOAD_THRESHOLD, "1");
    accumuloConf.set(Property.TSERV_BLOOM_LOAD_MAXCONCURRENT, "1");
    // Create Hadoop configuration
    final Configuration conf = CachedConfiguration.getInstance();
    final FileSystem fs = FileSystem.get(conf);
    // Open file
    final String suffix = FileOperations.getNewFileExtension(accumuloConf);
    final String filenameTemp = tempFolder.getRoot().getAbsolutePath();
    final String filename = filenameTemp + "." + suffix;
    final File file = new File(filename);
    if (file.exists()) {
        file.delete();
    }
    final FileSKVWriter writer = FileOperations.getInstance().openWriter(filename, fs, conf, accumuloConf);
    try {
        // Write data to file
        writer.startDefaultLocalityGroup();
        for (final Key key : keys) {
            if (elementConverter.getElementFromKey(key).getGroup().equals(TestGroups.ENTITY)) {
                writer.append(key, value);
            } else {
                writer.append(key, value2);
            }
        }
    } finally {
        writer.close();
    }
    // Reader
    final FileSKVIterator reader = FileOperations.getInstance().openReader(filename, false, fs, conf, accumuloConf);
    try {
        // Calculate random look up rate - run it 3 times and take best
        final int numTrials = 5;
        double maxRandomRate = -1.0;
        for (int i = 0; i < numTrials; i++) {
            final double rate = calculateRandomLookUpRate(reader, dataSet, random, rangeFactory);
            if (rate > maxRandomRate) {
                maxRandomRate = rate;
            }
        }
        LOGGER.info("Max random rate = " + maxRandomRate);
        // Calculate look up rate for items that were inserted
        double maxCausalRate = -1.0;
        for (int i = 0; i < numTrials; i++) {
            double rate = calculateCausalLookUpRate(reader, dataSet, random, rangeFactory);
            if (rate > maxCausalRate) {
                maxCausalRate = rate;
            }
        }
        LOGGER.info("Max causal rate = " + maxCausalRate);
        // Random look up rate should be much faster
        assertTrue(maxRandomRate > maxCausalRate);
    } finally {
        // Close reader
        reader.close();
    }
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) Configuration(org.apache.hadoop.conf.Configuration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) ArrayList(java.util.ArrayList) Properties(uk.gov.gchq.gaffer.data.element.Properties) Random(java.util.Random) CoreKeyBloomFunctor(uk.gov.gchq.gaffer.accumulostore.key.core.impl.CoreKeyBloomFunctor) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) Edge(uk.gov.gchq.gaffer.data.element.Edge) File(java.io.File) RFile(org.apache.accumulo.core.file.rfile.RFile) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 98 with Key

use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.

the class AbstractAccumuloElementConverterTest method shouldReturnAccumuloKeyConverterFromBasicEntity.

@Test
public void shouldReturnAccumuloKeyConverterFromBasicEntity() throws SchemaException, AccumuloElementConversionException, IOException {
    // Given
    final Entity entity = new Entity(TestGroups.ENTITY);
    entity.setVertex("3");
    // When
    final Key key = converter.getKeyFromEntity(entity);
    // Then
    final Entity newEntity = (Entity) converter.getElementFromKey(key);
    assertEquals("3", newEntity.getVertex());
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 99 with Key

use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.

the class AbstractAccumuloElementConverterTest method shouldReturnAccumuloKeyConverterFromBasicEdge.

//TEST WE CAN RETRIEVE AN ELEMENT FROM A KEY THAT HAS BEEN CREATED CORRECTLY
@Test
public void shouldReturnAccumuloKeyConverterFromBasicEdge() throws SchemaException, AccumuloElementConversionException, IOException {
    // Given
    final Edge edge = new Edge(TestGroups.EDGE);
    edge.setDestination("2");
    edge.setSource("1");
    edge.setDirected(true);
    // When
    final Pair<Key> keys = converter.getKeysFromElement(edge);
    // Then
    final Edge newEdge = (Edge) converter.getElementFromKey(keys.getFirst());
    assertEquals("1", newEdge.getSource());
    assertEquals("2", newEdge.getDestination());
    assertEquals(true, newEdge.isDirected());
}
Also used : Edge(uk.gov.gchq.gaffer.data.element.Edge) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 100 with Key

use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.

the class AbstractAccumuloElementConverterTest method shouldSkipNullPropertyValuesWhenCreatingAccumuloKey.

@Test
public void shouldSkipNullPropertyValuesWhenCreatingAccumuloKey() throws SchemaException, AccumuloElementConversionException, IOException {
    // Given
    final Edge edge = new Edge(TestGroups.EDGE);
    edge.setSource("1");
    edge.setDestination("2");
    edge.setDirected(true);
    edge.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, null);
    // When
    final Pair<Key> keys = converter.getKeysFromElement(edge);
    Properties properties = converter.getPropertiesFromColumnQualifier(TestGroups.EDGE, keys.getFirst().getColumnQualifierData().getBackingArray());
    // Then
    assertEquals(null, properties.get(AccumuloPropertyNames.COLUMN_QUALIFIER));
}
Also used : Properties(uk.gov.gchq.gaffer.data.element.Properties) Edge(uk.gov.gchq.gaffer.data.element.Edge) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

Key (org.apache.accumulo.core.data.Key)111 Test (org.junit.Test)66 Value (org.apache.accumulo.core.data.Value)64 Edge (uk.gov.gchq.gaffer.data.element.Edge)44 Range (org.apache.accumulo.core.data.Range)32 HashMap (java.util.HashMap)28 Element (uk.gov.gchq.gaffer.data.element.Element)23 Text (org.apache.hadoop.io.Text)20 Mutation (org.apache.accumulo.core.data.Mutation)17 Authorizations (org.apache.accumulo.core.security.Authorizations)16 Scanner (org.apache.accumulo.core.client.Scanner)15 Entity (uk.gov.gchq.gaffer.data.element.Entity)15 Entry (java.util.Map.Entry)14 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)11 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)11 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)11 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)11 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)11 AccumuloException (org.apache.accumulo.core.client.AccumuloException)10 BatchWriter (org.apache.accumulo.core.client.BatchWriter)10