Search in sources :

Example 21 with FreqMap

use of uk.gov.gchq.gaffer.types.FreqMap in project Gaffer by gchq.

the class AggregateDataForGroupTest method aggregateDataForGroupTest.

@Test
public void aggregateDataForGroupTest(@TempDir java.nio.file.Path tempDir) throws Exception {
    // Given
    final SchemaUtils schemaUtils = new SchemaUtils(TestUtils.gafferSchema("schemaUsingLongVertexType"));
    final String file1 = tempDir.resolve("inputdata1.parquet").toString();
    final String file2 = tempDir.resolve("inputdata2.parquet").toString();
    generateData(file1, schemaUtils);
    generateData(file2, schemaUtils);
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final List<String> inputFiles = new ArrayList<>(Sets.newHashSet(file1, file2));
    final String outputFolder = tempDir.resolve("aggregated").toString();
    final AggregateDataForGroup aggregator = new AggregateDataForGroup(FileSystem.get(new Configuration()), schemaUtils, TestGroups.ENTITY, inputFiles, outputFolder, sparkSession);
    // When
    aggregator.call();
    // Then
    final FileSystem fs = FileSystem.get(new Configuration());
    assertTrue(fs.exists(new Path(outputFolder)));
    final Row[] results = (Row[]) sparkSession.read().parquet(outputFolder).sort(ParquetStore.VERTEX).collect();
    for (int i = 0; i < 20; i++) {
        assertEquals((long) i, (long) results[i].getAs(ParquetStore.VERTEX));
        assertEquals('b', ((byte[]) results[i].getAs("byte"))[0]);
        assertEquals(14f, results[i].getAs("float"), 0.01f);
        assertEquals(11L * 2 * i, (long) results[i].getAs("long"));
        assertEquals(26, (int) results[i].getAs("short"));
        assertEquals(TestUtils.DATE.getTime(), (long) results[i].getAs("date"));
        assertEquals(4, (int) results[i].getAs("count"));
        assertArrayEquals(new String[] { "A", "B", "C" }, (String[]) ((WrappedArray<String>) results[i].getAs("treeSet")).array());
        final FreqMap mergedFreqMap = new FreqMap();
        mergedFreqMap.put("A", 4L);
        mergedFreqMap.put("B", 2L);
        mergedFreqMap.put("C", 2L);
        assertEquals(JavaConversions$.MODULE$.mapAsScalaMap(mergedFreqMap), results[i].getAs("freqMap"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SparkSession(org.apache.spark.sql.SparkSession) Configuration(org.apache.hadoop.conf.Configuration) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) AggregateDataForGroup(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.AggregateDataForGroup) ArrayList(java.util.ArrayList) WrappedArray(scala.collection.mutable.WrappedArray) FileSystem(org.apache.hadoop.fs.FileSystem) Row(org.apache.spark.sql.Row) Test(org.junit.jupiter.api.Test)

Example 22 with FreqMap

use of uk.gov.gchq.gaffer.types.FreqMap in project Gaffer by gchq.

the class WriteUnsortedDataTest method createEdgeForEdgeGroup.

public static Edge createEdgeForEdgeGroup(final long source, final long destination, final boolean directed, final Date date, final short multiplier) {
    final Edge edge = createEdgeForEdgeGroup(source, destination, directed, date);
    edge.putProperty("float", ((float) edge.getProperty("float")) * multiplier);
    edge.putProperty("long", ((long) edge.getProperty("long")) * multiplier);
    edge.putProperty("short", (short) ((short) edge.getProperty("short")) * multiplier);
    edge.putProperty("count", ((int) edge.getProperty("count")) * multiplier);
    final FreqMap freqMap = (FreqMap) edge.getProperty("freqMap");
    for (final Map.Entry<String, Long> entry : freqMap.entrySet()) {
        freqMap.put(entry.getKey(), entry.getValue() * multiplier);
    }
    edge.putProperty("freqMap", freqMap);
    return edge;
}
Also used : FreqMap(uk.gov.gchq.gaffer.types.FreqMap) Edge(uk.gov.gchq.gaffer.data.element.Edge) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) Map(java.util.Map)

Example 23 with FreqMap

use of uk.gov.gchq.gaffer.types.FreqMap in project Gaffer by gchq.

the class GetDataFrameOfElementsHandlerTest method getElementsWithNonStandardProperties.

private static List<Element> getElementsWithNonStandardProperties() {
    final List<Element> elements = new ArrayList<>();
    final FreqMap freqMap = new FreqMap();
    freqMap.put("W", 10L);
    freqMap.put("X", 100L);
    final HyperLogLogPlus hllpp = new HyperLogLogPlus(5, 5);
    hllpp.offer("AAA");
    final Entity entity = new Entity.Builder().group(TestGroups.ENTITY).vertex("A").property("freqMap", freqMap).property("hllpp", hllpp).build();
    elements.add(entity);
    final Edge edge = new Edge.Builder().group(TestGroups.EDGE).source("B").dest("C").directed(true).build();
    final FreqMap freqMap2 = new FreqMap();
    freqMap2.put("Y", 1000L);
    freqMap2.put("Z", 10000L);
    edge.putProperty("freqMap", freqMap2);
    final HyperLogLogPlus hllpp2 = new HyperLogLogPlus(5, 5);
    hllpp2.offer("AAA");
    hllpp2.offer("BBB");
    edge.putProperty("hllpp", hllpp2);
    elements.add(edge);
    return elements;
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) Edge(uk.gov.gchq.gaffer.data.element.Edge)

Example 24 with FreqMap

use of uk.gov.gchq.gaffer.types.FreqMap in project Gaffer by gchq.

the class GetDataFrameOfElementsHandlerTest method getElementsForUserDefinedConversion.

private static List<Element> getElementsForUserDefinedConversion() {
    final List<Element> elements = new ArrayList<>();
    final FreqMap freqMap = new FreqMap();
    freqMap.put("W", 10L);
    freqMap.put("X", 100L);
    final HyperLogLogPlus hllpp = new HyperLogLogPlus(5, 5);
    hllpp.offer("AAA");
    final Entity entity = new Entity.Builder().group(TestGroups.ENTITY).vertex("A").property("freqMap", freqMap).property("hllpp", hllpp).property("myProperty", new MyProperty(10)).build();
    elements.add(entity);
    final Edge edge = new Edge.Builder().group(TestGroups.EDGE).source("B").dest("C").directed(true).build();
    final FreqMap freqMap2 = new FreqMap();
    freqMap2.put("Y", 1000L);
    freqMap2.put("Z", 10000L);
    edge.putProperty("freqMap", freqMap2);
    final HyperLogLogPlus hllpp2 = new HyperLogLogPlus(5, 5);
    hllpp2.offer("AAA");
    hllpp2.offer("BBB");
    edge.putProperty("hllpp", hllpp2);
    edge.putProperty("myProperty", new MyProperty(50));
    elements.add(edge);
    return elements;
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) Edge(uk.gov.gchq.gaffer.data.element.Edge)

Example 25 with FreqMap

use of uk.gov.gchq.gaffer.types.FreqMap in project Gaffer by gchq.

the class AbstractAccumuloElementConverterTest method shouldDeserialiseEdgeIdWithQueriedDestVertex.

@Test
public void shouldDeserialiseEdgeIdWithQueriedDestVertex() {
    // Given
    final EdgeId expectedElementId = new EdgeSeed("vertex1", "vertex2", true, EdgeId.MatchedVertex.DESTINATION);
    final Edge edge = new Edge.Builder().source("vertex1").dest("vertex2").directed(true).group(TestGroups.ENTITY).property(TestPropertyNames.PROP_1, new FreqMap()).property(TestPropertyNames.PROP_2, new FreqMap()).build();
    final Key key = converter.getKeysFromEdge(edge).getSecond();
    // When
    final ElementId elementId = converter.getElementId(key, false);
    // Then
    assertEquals(expectedElementId, elementId);
}
Also used : FreqMap(uk.gov.gchq.gaffer.types.FreqMap) EdgeId(uk.gov.gchq.gaffer.data.element.id.EdgeId) EdgeSeed(uk.gov.gchq.gaffer.operation.data.EdgeSeed) Edge(uk.gov.gchq.gaffer.data.element.Edge) Key(org.apache.accumulo.core.data.Key) ElementId(uk.gov.gchq.gaffer.data.element.id.ElementId) Test(org.junit.jupiter.api.Test)

Aggregations

FreqMap (uk.gov.gchq.gaffer.types.FreqMap)62 Test (org.junit.jupiter.api.Test)29 ArrayList (java.util.ArrayList)9 Edge (uk.gov.gchq.gaffer.data.element.Edge)9 Entity (uk.gov.gchq.gaffer.data.element.Entity)9 Date (java.util.Date)6 Schema (uk.gov.gchq.gaffer.store.schema.Schema)6 FunctionTest (uk.gov.gchq.koryphe.function.FunctionTest)6 Configuration (org.apache.hadoop.conf.Configuration)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 Path (org.apache.hadoop.fs.Path)5 Row (org.apache.spark.sql.Row)5 SparkSession (org.apache.spark.sql.SparkSession)5 WrappedArray (scala.collection.mutable.WrappedArray)5 Element (uk.gov.gchq.gaffer.data.element.Element)5 HashMap (java.util.HashMap)4 Key (org.apache.accumulo.core.data.Key)4 Test (org.junit.Test)4 EdgeId (uk.gov.gchq.gaffer.data.element.id.EdgeId)4 ElementId (uk.gov.gchq.gaffer.data.element.id.ElementId)4