Search in sources :

Example 6 with GenericRowWithSchema

use of org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema in project Gaffer by gchq.

the class AggregateGafferRowsFunctionTest method mergeEdgeRowsTest.

@Test
public void mergeEdgeRowsTest() throws OperationException, SerialisationException {
    final String group = "BasicEdge";
    final SchemaElementDefinition elementSchema = utils.getGafferSchema().getElement(group);
    final byte[] aggregatorJson = JSONSerialiser.serialise(elementSchema.getIngestAggregator());
    final GafferGroupObjectConverter converter = utils.getConverter(group);
    final String[] gafferProperties = new String[elementSchema.getProperties().size()];
    elementSchema.getProperties().toArray(gafferProperties);
    final AggregateGafferRowsFunction aggregator = new AggregateGafferRowsFunction(gafferProperties, false, elementSchema.getGroupBy(), utils.getColumnToPaths(group), aggregatorJson, converter);
    final GenericRowWithSchema row1 = DataGen.generateEdgeRow(utils, group, "src", "dst", true, (byte) 'a', 0.2, 3f, TestUtils.getTreeSet1(), 5L, (short) 6, TestUtils.DATE, TestUtils.getFreqMap1(), null);
    final GenericRowWithSchema row2 = DataGen.generateEdgeRow(utils, group, "src", "dst", true, (byte) 'c', 0.7, 4f, TestUtils.getTreeSet2(), 7L, (short) 4, TestUtils.DATE, TestUtils.getFreqMap2(), null);
    final Row merged = aggregator.call(row1, row2);
    final List<Object> actual = new ArrayList<>(13);
    for (int i = 0; i < merged.length(); i++) {
        actual.add(merged.apply(i));
    }
    final List<Object> expected = new ArrayList<>(13);
    expected.add("src");
    expected.add("dst");
    expected.add(true);
    expected.add(new byte[] { (byte) 'c' });
    expected.add(0.8999999999999999);
    expected.add(7f);
    expected.add(new String[] { "A", "B", "C" });
    expected.add(12L);
    expected.add(10);
    expected.add(TestUtils.DATE.getTime());
    expected.add(JavaConversions$.MODULE$.mapAsScalaMap(TestUtils.MERGED_FREQMAP));
    expected.add(2);
    assertThat(expected).containsExactly(actual.toArray());
}
Also used : GenericRowWithSchema(org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema) ArrayList(java.util.ArrayList) Row(org.apache.spark.sql.Row) AggregateGafferRowsFunction(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.AggregateGafferRowsFunction) SchemaElementDefinition(uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition) Test(org.junit.jupiter.api.Test)

Example 7 with GenericRowWithSchema

use of org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema in project Gaffer by gchq.

the class AggregateGafferRowsFunction method call.

@Override
public Row call(final Row v1, final Row v2) throws SerialisationException {
    LOGGER.trace("First Row object to be aggregated: {}", v1);
    LOGGER.trace("Second Row object to be aggregated: {}", v2);
    ArrayList<Object> outputRow = new ArrayList<>(v1.size());
    if (isEntity) {
        for (final String col : columnToPaths.get(ParquetStore.VERTEX)) {
            outputRow.add(v1.getAs(col));
        }
    } else {
        for (final String col : columnToPaths.get(ParquetStore.SOURCE)) {
            outputRow.add(v1.getAs(col));
        }
        for (final String col : columnToPaths.get(ParquetStore.DESTINATION)) {
            outputRow.add(v1.getAs(col));
        }
        outputRow.add(v1.getAs(ParquetStore.DIRECTED));
    }
    // Build up Properties object for both rows containing just the objects that need merging
    final Properties prop1 = new Properties();
    final Properties prop2 = new Properties();
    for (final String propName : gafferProperties) {
        if (!groupByColumns.contains(propName)) {
            LOGGER.debug("Merging property: {}", propName);
            prop1.put(propName, objectConverter.sparkRowToGafferObject(propName, v1));
            prop2.put(propName, objectConverter.sparkRowToGafferObject(propName, v2));
        }
    }
    LOGGER.trace("First properties object to be aggregated: {}", prop1);
    LOGGER.trace("Second properties object to be aggregated: {}", prop2);
    // merge properties
    if (null == aggregator) {
        aggregator = JSONSerialiser.deserialise(aggregatorJson, ElementAggregator.class);
    }
    Properties mergedProperties = aggregator.apply(prop1, prop2);
    LOGGER.trace("Merged properties object after aggregation: {}", mergedProperties);
    // add properties to the row maintaining the order
    for (final String propName : gafferProperties) {
        if (groupByColumns.contains(propName)) {
            final String[] paths = columnToPaths.get(propName);
            if (paths[0].contains(".")) {
                outputRow.add(v1.getAs(propName));
            } else {
                for (final String column : paths) {
                    outputRow.add(v1.getAs(column));
                }
            }
        } else {
            objectConverter.addGafferObjectToSparkRow(propName, mergedProperties.get(propName), outputRow, v1.schema());
        }
    }
    final GenericRowWithSchema mergedRow = new GenericRowWithSchema(outputRow.toArray(), v1.schema());
    LOGGER.trace("Merged row: {}", mergedRow);
    return mergedRow;
}
Also used : ArrayList(java.util.ArrayList) GenericRowWithSchema(org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema) Properties(uk.gov.gchq.gaffer.data.element.Properties) ElementAggregator(uk.gov.gchq.gaffer.data.element.function.ElementAggregator)

Aggregations

ArrayList (java.util.ArrayList)7 GenericRowWithSchema (org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema)7 Map (java.util.Map)4 HashMap (java.util.HashMap)2 Row (org.apache.spark.sql.Row)2 Test (org.junit.jupiter.api.Test)2 AggregateGafferRowsFunction (uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.AggregateGafferRowsFunction)2 GafferGroupObjectConverter (uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter)2 SchemaElementDefinition (uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition)2 FreqMap (uk.gov.gchq.gaffer.types.FreqMap)2 DataType (org.apache.spark.sql.types.DataType)1 StructType (org.apache.spark.sql.types.StructType)1 WrappedArray (scala.collection.mutable.WrappedArray)1 Properties (uk.gov.gchq.gaffer.data.element.Properties)1 ElementAggregator (uk.gov.gchq.gaffer.data.element.function.ElementAggregator)1 SerialisationException (uk.gov.gchq.gaffer.exception.SerialisationException)1