Search in sources :

Example 26 with Key

use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.

the class CoreKeyGroupByAggregatorIteratorTest method shouldAggregateEverythingWhenGroupByIsSetToBlank.

public void shouldAggregateEverythingWhenGroupByIsSetToBlank(final AccumuloStore store, final AccumuloElementConverter elementConverter) throws StoreException, AccumuloElementConversionException {
    final String visibilityString = "public";
    try {
        // Create edge
        final Edge edge = new Edge(TestGroups.EDGE);
        edge.setSource("1");
        edge.setDestination("2");
        edge.setDirected(true);
        edge.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 1);
        edge.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER_2, 1);
        edge.putProperty(AccumuloPropertyNames.PROP_1, 0);
        edge.putProperty(AccumuloPropertyNames.PROP_2, 0);
        edge.putProperty(AccumuloPropertyNames.PROP_3, 0);
        edge.putProperty(AccumuloPropertyNames.PROP_4, 0);
        edge.putProperty(AccumuloPropertyNames.COUNT, 1);
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("1");
        edge2.setDestination("2");
        edge2.setDirected(true);
        edge2.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 1);
        edge2.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER_2, 1);
        edge2.putProperty(AccumuloPropertyNames.PROP_1, 0);
        edge2.putProperty(AccumuloPropertyNames.PROP_2, 0);
        edge2.putProperty(AccumuloPropertyNames.PROP_3, 0);
        edge2.putProperty(AccumuloPropertyNames.PROP_4, 0);
        edge2.putProperty(AccumuloPropertyNames.COUNT, 1);
        final Edge edge3 = new Edge(TestGroups.EDGE);
        edge3.setSource("1");
        edge3.setDestination("2");
        edge3.setDirected(true);
        edge3.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 1);
        edge3.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER_2, 1);
        edge3.putProperty(AccumuloPropertyNames.PROP_1, 0);
        edge3.putProperty(AccumuloPropertyNames.PROP_2, 0);
        edge3.putProperty(AccumuloPropertyNames.PROP_3, 0);
        edge3.putProperty(AccumuloPropertyNames.PROP_4, 0);
        edge3.putProperty(AccumuloPropertyNames.COUNT, 1);
        //THIS EDGE WILL BE REDUCED MEANING ITS CQ (columnQualifier) will only occur once because its key is equal.
        final Edge edge4 = new Edge(TestGroups.EDGE);
        edge4.setSource("1");
        edge4.setDestination("2");
        edge4.setDirected(true);
        edge4.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 1);
        edge4.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER_2, 4);
        edge4.putProperty(AccumuloPropertyNames.PROP_1, 0);
        edge4.putProperty(AccumuloPropertyNames.PROP_2, 0);
        edge4.putProperty(AccumuloPropertyNames.PROP_3, 0);
        edge4.putProperty(AccumuloPropertyNames.PROP_4, 0);
        edge4.putProperty(AccumuloPropertyNames.COUNT, 2);
        final Edge edge5 = new Edge(TestGroups.EDGE);
        edge5.setSource("1");
        edge5.setDestination("2");
        edge5.setDirected(true);
        edge5.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 3);
        edge5.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER_2, 5);
        edge5.putProperty(AccumuloPropertyNames.PROP_1, 0);
        edge5.putProperty(AccumuloPropertyNames.PROP_2, 0);
        edge5.putProperty(AccumuloPropertyNames.PROP_3, 0);
        edge5.putProperty(AccumuloPropertyNames.PROP_4, 0);
        edge5.putProperty(AccumuloPropertyNames.COUNT, 10);
        final Edge edge6 = new Edge(TestGroups.EDGE);
        edge6.setSource("1");
        edge6.setDestination("2");
        edge6.setDirected(true);
        edge6.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 3);
        edge6.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER_2, 5);
        edge6.putProperty(AccumuloPropertyNames.PROP_1, 0);
        edge6.putProperty(AccumuloPropertyNames.PROP_2, 0);
        edge6.putProperty(AccumuloPropertyNames.PROP_3, 0);
        edge6.putProperty(AccumuloPropertyNames.PROP_4, 0);
        edge6.putProperty(AccumuloPropertyNames.COUNT, 5);
        // Accumulo key
        final Key key = elementConverter.getKeysFromEdge(edge).getFirst();
        final Key key2 = elementConverter.getKeysFromEdge(edge2).getFirst();
        final Key key3 = elementConverter.getKeysFromEdge(edge3).getFirst();
        final Key key4 = elementConverter.getKeysFromEdge(edge4).getFirst();
        final Key key5 = elementConverter.getKeysFromEdge(edge5).getFirst();
        final Key key6 = elementConverter.getKeysFromEdge(edge6).getFirst();
        // Accumulo values
        final Value value1 = elementConverter.getValueFromProperties(TestGroups.EDGE, edge.getProperties());
        final Value value2 = elementConverter.getValueFromProperties(TestGroups.EDGE, edge2.getProperties());
        final Value value3 = elementConverter.getValueFromProperties(TestGroups.EDGE, edge3.getProperties());
        final Value value4 = elementConverter.getValueFromProperties(TestGroups.EDGE, edge4.getProperties());
        final Value value5 = elementConverter.getValueFromProperties(TestGroups.EDGE, edge5.getProperties());
        final Value value6 = elementConverter.getValueFromProperties(TestGroups.EDGE, edge6.getProperties());
        // Create mutation
        final Mutation m1 = new Mutation(key.getRow());
        m1.put(key.getColumnFamily(), key.getColumnQualifier(), new ColumnVisibility(key.getColumnVisibility()), key.getTimestamp(), value1);
        final Mutation m2 = new Mutation(key2.getRow());
        m2.put(key2.getColumnFamily(), key2.getColumnQualifier(), new ColumnVisibility(key2.getColumnVisibility()), key2.getTimestamp(), value2);
        final Mutation m3 = new Mutation(key.getRow());
        m3.put(key3.getColumnFamily(), key3.getColumnQualifier(), new ColumnVisibility(key3.getColumnVisibility()), key3.getTimestamp(), value3);
        final Mutation m4 = new Mutation(key.getRow());
        m4.put(key4.getColumnFamily(), key4.getColumnQualifier(), new ColumnVisibility(key4.getColumnVisibility()), key4.getTimestamp(), value4);
        final Mutation m5 = new Mutation(key.getRow());
        m5.put(key5.getColumnFamily(), key5.getColumnQualifier(), new ColumnVisibility(key5.getColumnVisibility()), key5.getTimestamp(), value5);
        final Mutation m6 = new Mutation(key.getRow());
        m6.put(key6.getColumnFamily(), key6.getColumnQualifier(), new ColumnVisibility(key6.getColumnVisibility()), key6.getTimestamp(), value6);
        // Write mutation
        final BatchWriterConfig writerConfig = new BatchWriterConfig();
        writerConfig.setMaxMemory(1000000L);
        writerConfig.setMaxLatency(1000L, TimeUnit.MILLISECONDS);
        writerConfig.setMaxWriteThreads(1);
        final BatchWriter writer = store.getConnection().createBatchWriter(store.getProperties().getTable(), writerConfig);
        writer.addMutation(m1);
        writer.addMutation(m2);
        writer.addMutation(m3);
        writer.addMutation(m4);
        writer.addMutation(m5);
        writer.addMutation(m6);
        writer.close();
        Edge expectedEdge1 = new Edge(TestGroups.EDGE);
        expectedEdge1.setSource("1");
        expectedEdge1.setDestination("2");
        expectedEdge1.setDirected(true);
        expectedEdge1.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 5);
        expectedEdge1.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER_2, 10);
        expectedEdge1.putProperty(AccumuloPropertyNames.COUNT, 20);
        expectedEdge1.putProperty(AccumuloPropertyNames.PROP_1, 0);
        expectedEdge1.putProperty(AccumuloPropertyNames.PROP_2, 0);
        expectedEdge1.putProperty(AccumuloPropertyNames.PROP_3, 0);
        expectedEdge1.putProperty(AccumuloPropertyNames.PROP_4, 0);
        // Read data back and check we get one merged element
        final Authorizations authorizations = new Authorizations(visibilityString);
        final Scanner scanner = store.getConnection().createScanner(store.getProperties().getTable(), authorizations);
        final IteratorSetting iteratorSetting = new IteratorSettingBuilder(AccumuloStoreConstants.COLUMN_QUALIFIER_AGGREGATOR_ITERATOR_PRIORITY, "KeyCombiner", CoreKeyGroupByAggregatorIterator.class).all().view(new View.Builder().edge(TestGroups.EDGE, new ViewElementDefinition.Builder().groupBy().build()).build()).schema(store.getSchema()).keyConverter(store.getKeyPackage().getKeyConverter()).build();
        scanner.addScanIterator(iteratorSetting);
        final Iterator<Entry<Key, Value>> it = scanner.iterator();
        Entry<Key, Value> entry = it.next();
        Element readEdge = elementConverter.getFullElement(entry.getKey(), entry.getValue());
        assertEquals(expectedEdge1, readEdge);
        assertEquals(5, readEdge.getProperty(AccumuloPropertyNames.COLUMN_QUALIFIER));
        assertEquals(10, readEdge.getProperty(AccumuloPropertyNames.COLUMN_QUALIFIER_2));
        assertEquals(20, readEdge.getProperty(AccumuloPropertyNames.COUNT));
        if (it.hasNext()) {
            fail("Additional row found.");
        }
    } catch (AccumuloException | TableNotFoundException e) {
        fail(this.getClass().getSimpleName() + " failed with exception: " + e);
    }
}
Also used : Scanner(org.apache.accumulo.core.client.Scanner) AccumuloException(org.apache.accumulo.core.client.AccumuloException) Authorizations(org.apache.accumulo.core.security.Authorizations) IteratorSettingBuilder(uk.gov.gchq.gaffer.accumulostore.utils.IteratorSettingBuilder) Element(uk.gov.gchq.gaffer.data.element.Element) IteratorSettingBuilder(uk.gov.gchq.gaffer.accumulostore.utils.IteratorSettingBuilder) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Entry(java.util.Map.Entry) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) Value(org.apache.accumulo.core.data.Value) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) Mutation(org.apache.accumulo.core.data.Mutation) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Edge(uk.gov.gchq.gaffer.data.element.Edge) Key(org.apache.accumulo.core.data.Key)

Example 27 with Key

use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.

the class SampleDataAndCreateSplitsFileTool method run.

@Override
public int run(final String[] strings) throws OperationException {
    try {
        LOGGER.info("Creating job using SampleDataForSplitPointsJobFactory");
        job = new SampleDataForSplitPointsJobFactory().createJob(operation, store);
    } catch (final IOException e) {
        LOGGER.error("Failed to create Hadoop job: {}", e.getMessage());
        throw new OperationException("Failed to create the Hadoop job: " + e.getMessage(), e);
    }
    try {
        LOGGER.info("Running SampleDataForSplitPoints job (job name is {})", job.getJobName());
        job.waitForCompletion(true);
    } catch (final IOException | InterruptedException | ClassNotFoundException e) {
        LOGGER.error("Exception running job: {}", e.getMessage());
        throw new OperationException("Error while waiting for job to complete: " + e.getMessage(), e);
    }
    try {
        if (!job.isSuccessful()) {
            LOGGER.error("Job was not successful (job name is {})", job.getJobName());
            throw new OperationException("Error running job");
        }
    } catch (final IOException e) {
        LOGGER.error("Exception running job: {}", e.getMessage());
        throw new OperationException("Error running job" + e.getMessage(), e);
    }
    // Find the number of records output
    // NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than
    // mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier
    // versions of Hadoop.
    Counter counter;
    try {
        counter = job.getCounters().findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
        LOGGER.info("Number of records output = {}", counter);
    } catch (final IOException e) {
        LOGGER.error("Failed to get counter org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS from job: {}", e.getMessage());
        throw new OperationException("Failed to get counter: " + Task.Counter.REDUCE_OUTPUT_RECORDS, e);
    }
    int numberTabletServers;
    try {
        numberTabletServers = store.getConnection().instanceOperations().getTabletServers().size();
        LOGGER.info("Number of tablet servers is {}", numberTabletServers);
    } catch (final StoreException e) {
        LOGGER.error("Exception thrown getting number of tablet servers: {}", e.getMessage());
        throw new OperationException(e.getMessage(), e);
    }
    long outputEveryNthRecord = counter.getValue() / (numberTabletServers - 1);
    final Path resultsFile = new Path(operation.getOutputPath(), "part-r-00000");
    LOGGER.info("Will output every {}-th record from {}", outputEveryNthRecord, resultsFile);
    // Read through resulting file, pick out the split points and write to file.
    final Configuration conf = getConf();
    final FileSystem fs;
    try {
        fs = FileSystem.get(conf);
    } catch (final IOException e) {
        LOGGER.error("Exception getting filesystem: {}", e.getMessage());
        throw new OperationException("Failed to get filesystem from configuration: " + e.getMessage(), e);
    }
    LOGGER.info("Writing splits to {}", operation.getResultingSplitsFilePath());
    final Key key = new Key();
    final Value value = new Value();
    long count = 0;
    int numberSplitPointsOutput = 0;
    try (final SequenceFile.Reader reader = new SequenceFile.Reader(fs, resultsFile, conf);
        final PrintStream splitsWriter = new PrintStream(new BufferedOutputStream(fs.create(new Path(operation.getResultingSplitsFilePath()), true)), false, CommonConstants.UTF_8)) {
        while (reader.next(key, value) && numberSplitPointsOutput < numberTabletServers - 1) {
            count++;
            if (count % outputEveryNthRecord == 0) {
                LOGGER.debug("Outputting split point number {} ({})", numberSplitPointsOutput, Base64.encodeBase64(key.getRow().getBytes()));
                numberSplitPointsOutput++;
                splitsWriter.println(new String(Base64.encodeBase64(key.getRow().getBytes()), CommonConstants.UTF_8));
            }
        }
        LOGGER.info("Total number of records read was {}", count);
    } catch (final IOException e) {
        LOGGER.error("Exception reading results file and outputting split points: {}", e.getMessage());
        throw new OperationException(e.getMessage(), e);
    }
    try {
        fs.delete(resultsFile, true);
        LOGGER.info("Deleted the results file {}", resultsFile);
    } catch (final IOException e) {
        LOGGER.error("Failed to delete the results file {}", resultsFile);
        throw new OperationException("Failed to delete the results file: " + e.getMessage(), e);
    }
    return SUCCESS_RESPONSE;
}
Also used : SampleDataForSplitPointsJobFactory(uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.factory.SampleDataForSplitPointsJobFactory) Path(org.apache.hadoop.fs.Path) PrintStream(java.io.PrintStream) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) StoreException(uk.gov.gchq.gaffer.store.StoreException) Counter(org.apache.hadoop.mapreduce.Counter) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) BufferedOutputStream(java.io.BufferedOutputStream) OperationException(uk.gov.gchq.gaffer.operation.OperationException) Key(org.apache.accumulo.core.data.Key)

Example 28 with Key

use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.

the class SampleDataForSplitPointsMapper method map.

protected void map(final Element element, final Context context) throws IOException, InterruptedException {
    if (Math.random() < proportionToSample) {
        context.getCounter("Split points", "Number sampled").increment(1L);
        final Pair<Key> keyPair;
        try {
            keyPair = elementConverter.getKeysFromElement(element);
        } catch (final AccumuloElementConversionException e) {
            throw new IllegalArgumentException(e.getMessage(), e);
        }
        final Value value;
        try {
            value = elementConverter.getValueFromElement(element);
        } catch (final AccumuloElementConversionException e) {
            throw new IllegalArgumentException(e.getMessage(), e);
        }
        context.write(keyPair.getFirst(), value);
        if (keyPair.getSecond() != null) {
            context.write(keyPair.getSecond(), value);
        }
    } else {
        context.getCounter("Split points", "Number not sampled").increment(1L);
    }
}
Also used : Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) AccumuloElementConversionException(uk.gov.gchq.gaffer.accumulostore.key.exception.AccumuloElementConversionException)

Example 29 with Key

use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.

the class ByteEntityRangeElementPropertyFilterIteratorTest method shouldOnlyAcceptUndirectedEdges.

@Test
public void shouldOnlyAcceptUndirectedEdges() throws OperationException, AccumuloElementConversionException {
    // Given
    final ByteEntityRangeElementPropertyFilterIterator filter = new ByteEntityRangeElementPropertyFilterIterator();
    final Map<String, String> options = new HashMap<String, String>() {

        {
            put(AccumuloStoreConstants.UNDIRECTED_EDGE_ONLY, "true");
            put(AccumuloStoreConstants.OUTGOING_EDGE_ONLY, "true");
        }
    };
    filter.validateOptions(options);
    // value should not be used
    final Value value = null;
    // When / Then
    for (final Element element : ELEMENTS) {
        final boolean expectedResult = element instanceof Edge && !((Edge) element).isDirected();
        final Pair<Key> keys = converter.getKeysFromElement(element);
        assertEquals("Failed for element: " + element.toString(), expectedResult, filter.accept(keys.getFirst(), value));
        if (null != keys.getSecond()) {
            // self elements are not added the other way round
            assertEquals("Failed for element: " + element.toString(), expectedResult, filter.accept(keys.getSecond(), value));
        }
    }
}
Also used : HashMap(java.util.HashMap) ByteEntityRangeElementPropertyFilterIterator(uk.gov.gchq.gaffer.accumulostore.key.core.impl.byteEntity.ByteEntityRangeElementPropertyFilterIterator) Element(uk.gov.gchq.gaffer.data.element.Element) Value(org.apache.accumulo.core.data.Value) Edge(uk.gov.gchq.gaffer.data.element.Edge) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 30 with Key

use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.

the class ByteEntityRangeElementPropertyFilterIteratorTest method shouldOnlyAcceptDirectedEdges.

@Test
public void shouldOnlyAcceptDirectedEdges() throws OperationException, AccumuloElementConversionException {
    // Given
    final ByteEntityRangeElementPropertyFilterIterator filter = new ByteEntityRangeElementPropertyFilterIterator();
    final Map<String, String> options = new HashMap<String, String>() {

        {
            put(AccumuloStoreConstants.DIRECTED_EDGE_ONLY, "true");
        }
    };
    filter.validateOptions(options);
    // value should not be used
    final Value value = null;
    // When / Then
    for (final Element element : ELEMENTS) {
        final boolean expectedResult = element instanceof Edge && ((Edge) element).isDirected();
        final Pair<Key> keys = converter.getKeysFromElement(element);
        assertEquals("Failed for element: " + element.toString(), expectedResult, filter.accept(keys.getFirst(), value));
        if (null != keys.getSecond()) {
            // self elements are not added the other way round
            assertEquals("Failed for element: " + element.toString(), expectedResult, filter.accept(keys.getSecond(), value));
        }
    }
}
Also used : HashMap(java.util.HashMap) ByteEntityRangeElementPropertyFilterIterator(uk.gov.gchq.gaffer.accumulostore.key.core.impl.byteEntity.ByteEntityRangeElementPropertyFilterIterator) Element(uk.gov.gchq.gaffer.data.element.Element) Value(org.apache.accumulo.core.data.Value) Edge(uk.gov.gchq.gaffer.data.element.Edge) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

Key (org.apache.accumulo.core.data.Key)111 Test (org.junit.Test)66 Value (org.apache.accumulo.core.data.Value)64 Edge (uk.gov.gchq.gaffer.data.element.Edge)44 Range (org.apache.accumulo.core.data.Range)32 HashMap (java.util.HashMap)28 Element (uk.gov.gchq.gaffer.data.element.Element)23 Text (org.apache.hadoop.io.Text)20 Mutation (org.apache.accumulo.core.data.Mutation)17 Authorizations (org.apache.accumulo.core.security.Authorizations)16 Scanner (org.apache.accumulo.core.client.Scanner)15 Entity (uk.gov.gchq.gaffer.data.element.Entity)15 Entry (java.util.Map.Entry)14 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)11 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)11 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)11 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)11 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)11 AccumuloException (org.apache.accumulo.core.client.AccumuloException)10 BatchWriter (org.apache.accumulo.core.client.BatchWriter)10