use of org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema in project Gaffer by gchq.
the class AggregateGafferRowsFunctionTest method mergeEdgeRowsTest.
@Test
public void mergeEdgeRowsTest() throws OperationException, SerialisationException {
final String group = "BasicEdge";
final SchemaElementDefinition elementSchema = utils.getGafferSchema().getElement(group);
final byte[] aggregatorJson = JSONSerialiser.serialise(elementSchema.getIngestAggregator());
final GafferGroupObjectConverter converter = utils.getConverter(group);
final String[] gafferProperties = new String[elementSchema.getProperties().size()];
elementSchema.getProperties().toArray(gafferProperties);
final AggregateGafferRowsFunction aggregator = new AggregateGafferRowsFunction(gafferProperties, false, elementSchema.getGroupBy(), utils.getColumnToPaths(group), aggregatorJson, converter);
final GenericRowWithSchema row1 = DataGen.generateEdgeRow(utils, group, "src", "dst", true, (byte) 'a', 0.2, 3f, TestUtils.getTreeSet1(), 5L, (short) 6, TestUtils.DATE, TestUtils.getFreqMap1(), null);
final GenericRowWithSchema row2 = DataGen.generateEdgeRow(utils, group, "src", "dst", true, (byte) 'c', 0.7, 4f, TestUtils.getTreeSet2(), 7L, (short) 4, TestUtils.DATE, TestUtils.getFreqMap2(), null);
final Row merged = aggregator.call(row1, row2);
final List<Object> actual = new ArrayList<>(13);
for (int i = 0; i < merged.length(); i++) {
actual.add(merged.apply(i));
}
final List<Object> expected = new ArrayList<>(13);
expected.add("src");
expected.add("dst");
expected.add(true);
expected.add(new byte[] { (byte) 'c' });
expected.add(0.8999999999999999);
expected.add(7f);
expected.add(new String[] { "A", "B", "C" });
expected.add(12L);
expected.add(10);
expected.add(TestUtils.DATE.getTime());
expected.add(JavaConversions$.MODULE$.mapAsScalaMap(TestUtils.MERGED_FREQMAP));
expected.add(2);
assertThat(expected).containsExactly(actual.toArray());
}
use of org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema in project Gaffer by gchq.
the class AggregateGafferRowsFunction method call.
@Override
public Row call(final Row v1, final Row v2) throws SerialisationException {
LOGGER.trace("First Row object to be aggregated: {}", v1);
LOGGER.trace("Second Row object to be aggregated: {}", v2);
ArrayList<Object> outputRow = new ArrayList<>(v1.size());
if (isEntity) {
for (final String col : columnToPaths.get(ParquetStore.VERTEX)) {
outputRow.add(v1.getAs(col));
}
} else {
for (final String col : columnToPaths.get(ParquetStore.SOURCE)) {
outputRow.add(v1.getAs(col));
}
for (final String col : columnToPaths.get(ParquetStore.DESTINATION)) {
outputRow.add(v1.getAs(col));
}
outputRow.add(v1.getAs(ParquetStore.DIRECTED));
}
// Build up Properties object for both rows containing just the objects that need merging
final Properties prop1 = new Properties();
final Properties prop2 = new Properties();
for (final String propName : gafferProperties) {
if (!groupByColumns.contains(propName)) {
LOGGER.debug("Merging property: {}", propName);
prop1.put(propName, objectConverter.sparkRowToGafferObject(propName, v1));
prop2.put(propName, objectConverter.sparkRowToGafferObject(propName, v2));
}
}
LOGGER.trace("First properties object to be aggregated: {}", prop1);
LOGGER.trace("Second properties object to be aggregated: {}", prop2);
// merge properties
if (null == aggregator) {
aggregator = JSONSerialiser.deserialise(aggregatorJson, ElementAggregator.class);
}
Properties mergedProperties = aggregator.apply(prop1, prop2);
LOGGER.trace("Merged properties object after aggregation: {}", mergedProperties);
// add properties to the row maintaining the order
for (final String propName : gafferProperties) {
if (groupByColumns.contains(propName)) {
final String[] paths = columnToPaths.get(propName);
if (paths[0].contains(".")) {
outputRow.add(v1.getAs(propName));
} else {
for (final String column : paths) {
outputRow.add(v1.getAs(column));
}
}
} else {
objectConverter.addGafferObjectToSparkRow(propName, mergedProperties.get(propName), outputRow, v1.schema());
}
}
final GenericRowWithSchema mergedRow = new GenericRowWithSchema(outputRow.toArray(), v1.schema());
LOGGER.trace("Merged row: {}", mergedRow);
return mergedRow;
}
Aggregations