Search in sources :

Example 26 with FreqMap

use of uk.gov.gchq.gaffer.types.FreqMap in project Gaffer by gchq.

the class AbstractAccumuloElementConverterTest method shouldDeserialiseEdgeIdWithQueriedSourceVertex.

@Test
public void shouldDeserialiseEdgeIdWithQueriedSourceVertex() {
    // Given
    final EdgeId expectedElementId = new EdgeSeed("source1", "dest1", true);
    final Edge edge = new Edge.Builder().source("source1").dest("dest1").directed(true).group(TestGroups.ENTITY).property(TestPropertyNames.PROP_1, new FreqMap()).property(TestPropertyNames.PROP_2, new FreqMap()).build();
    final Key key = converter.getKeysFromEdge(edge).getSecond();
    // When
    final ElementId elementId = converter.getElementId(key, false);
    // Then
    assertEquals(expectedElementId, elementId);
}
Also used : FreqMap(uk.gov.gchq.gaffer.types.FreqMap) EdgeId(uk.gov.gchq.gaffer.data.element.id.EdgeId) EdgeSeed(uk.gov.gchq.gaffer.operation.data.EdgeSeed) Edge(uk.gov.gchq.gaffer.data.element.Edge) Key(org.apache.accumulo.core.data.Key) ElementId(uk.gov.gchq.gaffer.data.element.id.ElementId) Test(org.junit.jupiter.api.Test)

Example 27 with FreqMap

use of uk.gov.gchq.gaffer.types.FreqMap in project Gaffer by gchq.

the class AddElementsHandlerTest method testOnePartitionAllGroups.

@Test
public void testOnePartitionAllGroups(@TempDir java.nio.file.Path tempDir) throws IOException, OperationException, StoreException {
    // Given
    final List<Element> elementsToAdd = new ArrayList<>();
    // - Data for TestGroups.ENTITY
    elementsToAdd.addAll(AggregateAndSortDataTest.generateData());
    elementsToAdd.addAll(AggregateAndSortDataTest.generateData());
    // - Data for TestGroups.ENTITY_2
    elementsToAdd.add(WriteUnsortedDataTest.createEntityForEntityGroup_2(10000L));
    elementsToAdd.add(WriteUnsortedDataTest.createEntityForEntityGroup_2(100L));
    elementsToAdd.add(WriteUnsortedDataTest.createEntityForEntityGroup_2(10L));
    elementsToAdd.add(WriteUnsortedDataTest.createEntityForEntityGroup_2(1L));
    // - Data for TestGroups.EDGE
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup(10000L, 1000L, true, new Date(100L)));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup(100L, 100000L, false, new Date(200L)));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, true, new Date(300L)));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, true, new Date(400L)));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, false, new Date(400L)));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 2L, false, new Date(400L)));
    // - Data for TestGroups.EDGE_2
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(10000L, 20L, true));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(100L, 200L, false));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(10L, 50L, true));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(1L, 2000L, false));
    // - Shuffle the list so that the order is random
    Collections.shuffle(elementsToAdd);
    final AddElements add = new AddElements.Builder().input(elementsToAdd).build();
    final Context context = new Context();
    final Schema schema = TestUtils.gafferSchema("schemaUsingLongVertexType");
    final ParquetStoreProperties storeProperties = new ParquetStoreProperties();
    final String testDir = tempDir.toString();
    storeProperties.setDataDir(testDir + "/data");
    storeProperties.setTempFilesDir(testDir + "/tmpdata");
    final ParquetStore store = (ParquetStore) ParquetStore.createStore("graphId", schema, storeProperties);
    final FileSystem fs = FileSystem.get(new Configuration());
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    // When
    new AddElementsHandler().doOperation(add, context, store);
    // Then
    // - New snapshot directory should have been created.
    final long snapshotId = store.getLatestSnapshot();
    final Path snapshotPath = new Path(testDir + "/data", ParquetStore.getSnapshotPath(snapshotId));
    assertTrue(fs.exists(snapshotPath));
    // - There should be 1 file named partition-0.parquet (and an associated .crc file) in the "group=BasicEntity"
    // directory.
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.ENTITY, false) + "/" + ParquetStore.getFile(0))));
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.ENTITY, false) + "/." + ParquetStore.getFile(0) + ".crc")));
    // - The files should contain the data sorted by vertex and date.
    Row[] results = (Row[]) sparkSession.read().parquet(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.ENTITY, false) + "/" + ParquetStore.getFile(0)).toString()).collect();
    assertThat(results).hasSize(40);
    for (int i = 0; i < 40; i++) {
        assertEquals((long) i / 2, (long) results[i].getAs(ParquetStore.VERTEX));
        assertEquals(i % 2 == 0 ? 'b' : 'a', ((byte[]) results[i].getAs("byte"))[0]);
        assertEquals(i % 2 == 0 ? 8f : 6f, results[i].getAs("float"), 0.01f);
        assertEquals(11L * 2 * (i / 2), (long) results[i].getAs("long"));
        assertEquals(i % 2 == 0 ? 14 : 12, (int) results[i].getAs("short"));
        assertEquals(i % 2 == 0 ? 100000L : 200000L, (long) results[i].getAs("date"));
        assertEquals(2, (int) results[i].getAs("count"));
        assertArrayEquals(i % 2 == 0 ? new String[] { "A", "C" } : new String[] { "A", "B" }, (String[]) ((WrappedArray<String>) results[i].getAs("treeSet")).array());
        final FreqMap mergedFreqMap1 = new FreqMap();
        mergedFreqMap1.put("A", 2L);
        mergedFreqMap1.put("B", 2L);
        final FreqMap mergedFreqMap2 = new FreqMap();
        mergedFreqMap2.put("A", 2L);
        mergedFreqMap2.put("C", 2L);
        assertEquals(JavaConversions$.MODULE$.mapAsScalaMap(i % 2 == 0 ? mergedFreqMap2 : mergedFreqMap1), results[i].getAs("freqMap"));
    }
    // - There should be 1 file named partition-0.parquet (and an associated .crc file) in the "group=BasicEntity2"
    // directory.
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.ENTITY_2, false) + "/" + ParquetStore.getFile(0))));
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.ENTITY_2, false) + "/." + ParquetStore.getFile(0) + ".crc")));
    // - The files should contain the data sorted by vertex.
    results = (Row[]) sparkSession.read().parquet(new Path(snapshotPath, "graph/group=BasicEntity2/" + ParquetStore.getFile(0)).toString()).collect();
    assertThat(results).hasSize(4);
    checkEntityGroup2(WriteUnsortedDataTest.createEntityForEntityGroup_2(1L), results[0]);
    checkEntityGroup2(WriteUnsortedDataTest.createEntityForEntityGroup_2(10L), results[1]);
    checkEntityGroup2(WriteUnsortedDataTest.createEntityForEntityGroup_2(100L), results[2]);
    checkEntityGroup2(WriteUnsortedDataTest.createEntityForEntityGroup_2(10000L), results[3]);
    // - There should be 1 file named partition-0.parquet (and an associated .crc file) in the "group=BasicEdge"
    // directory and in the "reversed-group=BasicEdge" directory.
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE, false) + "/" + ParquetStore.getFile(0))));
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE, false) + "/." + ParquetStore.getFile(0) + ".crc")));
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE, true) + "/" + ParquetStore.getFile(0))));
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE, true) + "/." + ParquetStore.getFile(0) + ".crc")));
    // - The files should contain the data sorted by source, destination, directed, date
    results = (Row[]) sparkSession.read().parquet(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE, false) + "/" + ParquetStore.getFile(0)).toString()).collect();
    assertThat(results).hasSize(6);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 2L, false, new Date(400L)), results[0]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, false, new Date(400L)), results[1]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, true, new Date(300L)), results[2]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, true, new Date(400L)), results[3]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(100L, 100000L, false, new Date(200L)), results[4]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(10000L, 1000L, true, new Date(100L)), results[5]);
    results = (Row[]) sparkSession.read().parquet(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE, true) + "/" + ParquetStore.getFile(0)).toString()).collect();
    assertThat(results).hasSize(6);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 2L, false, new Date(400L)), results[0]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, false, new Date(400L)), results[1]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, true, new Date(300L)), results[2]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, true, new Date(400L)), results[3]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(10000L, 1000L, true, new Date(100L)), results[4]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(100L, 100000L, false, new Date(200L)), results[5]);
    // - There should be 1 file named partition-0.parquet (and an associated .crc file) in the "group=BasicEdge2"
    // directory and in the "reversed-group=BasicEdge2" directory.
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE_2, false) + "/" + ParquetStore.getFile(0))));
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE_2, false) + "/." + ParquetStore.getFile(0) + ".crc")));
    // - The files should contain the data sorted by source, destination, directed
    results = (Row[]) sparkSession.read().parquet(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE_2, false) + "/" + ParquetStore.getFile(0)).toString()).collect();
    assertThat(results).hasSize(4);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(1L, 2000L, false), results[0]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(10L, 50L, true), results[1]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(100L, 200L, false), results[2]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(10000L, 20L, true), results[3]);
    results = (Row[]) sparkSession.read().parquet(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE_2, true) + "/" + ParquetStore.getFile(0)).toString()).collect();
    assertThat(results).hasSize(4);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(10000L, 20L, true), results[0]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(10L, 50L, true), results[1]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(100L, 200L, false), results[2]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(1L, 2000L, false), results[3]);
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Context(uk.gov.gchq.gaffer.store.Context) ParquetStore(uk.gov.gchq.gaffer.parquetstore.ParquetStore) Path(org.apache.hadoop.fs.Path) SparkSession(org.apache.spark.sql.SparkSession) Configuration(org.apache.hadoop.conf.Configuration) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) Element(uk.gov.gchq.gaffer.data.element.Element) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) Date(java.util.Date) WrappedArray(scala.collection.mutable.WrappedArray) ParquetStoreProperties(uk.gov.gchq.gaffer.parquetstore.ParquetStoreProperties) FileSystem(org.apache.hadoop.fs.FileSystem) Row(org.apache.spark.sql.Row) WriteUnsortedDataTest(uk.gov.gchq.gaffer.parquetstore.utils.WriteUnsortedDataTest) Test(org.junit.jupiter.api.Test) AggregateAndSortDataTest(uk.gov.gchq.gaffer.parquetstore.utils.AggregateAndSortDataTest)

Example 28 with FreqMap

use of uk.gov.gchq.gaffer.types.FreqMap in project Gaffer by gchq.

the class DataGen method generateEntityRow.

public static GenericRowWithSchema generateEntityRow(final SchemaUtils utils, final String group, final String vertex, final Byte aByte, final Double aDouble, final Float aFloat, final TreeSet<String> treeSet, final Long aLong, final Short aShort, final Date date, final FreqMap freqMap, final String visibility) throws SerialisationException {
    final GafferGroupObjectConverter entityConverter = new GafferGroupObjectConverter(group, utils.getCoreProperties(group), utils.getCorePropertiesForReversedEdges(), utils.getColumnToSerialiser(group), utils.getSerialisers(), utils.getColumnToPaths(group));
    final List<Object> list = new ArrayList<>();
    final scala.collection.mutable.Map<String, Long> map = new scala.collection.mutable.HashMap<>();
    for (final Map.Entry<String, Long> entry : freqMap.entrySet()) {
        map.put(entry.getKey(), entry.getValue());
    }
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects(ParquetStore.VERTEX, vertex)));
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("byte", aByte)));
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("double", aDouble)));
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("float", aFloat)));
    list.add(WrappedArray$.MODULE$.make(entityConverter.gafferObjectToParquetObjects("treeSet", treeSet)[0]));
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("long", aLong)));
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("short", aShort)));
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("date", date)));
    list.add(map);
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("count", 1)));
    if (null != visibility) {
        list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects(TestTypes.VISIBILITY, visibility)));
    }
    final Object[] objects = new Object[list.size()];
    list.toArray(objects);
    return new GenericRowWithSchema(objects, utils.getSparkSchema(group));
}
Also used : ArrayList(java.util.ArrayList) GafferGroupObjectConverter(uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter) GenericRowWithSchema(org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) Map(java.util.Map)

Example 29 with FreqMap

use of uk.gov.gchq.gaffer.types.FreqMap in project Gaffer by gchq.

the class RoadTrafficStringElementGenerator method _apply.

@Override
public Iterable<Element> _apply(final String line) {
    final String[] fields = extractFields(line);
    if (null == fields) {
        return Collections.emptyList();
    }
    // Extract required fields
    final FreqMap vehicleCountsByType = getVehicleCounts(fields);
    final Date startDate = getDate(fields[dCount.ordinal()], fields[Hour.ordinal()]);
    final Date endDate = null != startDate ? DateUtils.addMilliseconds(DateUtils.addHours(startDate, 1), -1) : null;
    final String region = fields[Region_Name.ordinal()];
    final String location = fields[ONS_LA_Name.ordinal()];
    final String road = fields[Road.ordinal()];
    final String junctionA = road + ":" + fields[A_Junction.ordinal()];
    final String junctionB = road + ":" + fields[B_Junction.ordinal()];
    final String junctionALocation = fields[A_Ref_E.ordinal()] + "," + fields[A_Ref_N.ordinal()];
    final String junctionBLocation = fields[B_Ref_E.ordinal()] + "," + fields[B_Ref_N.ordinal()];
    final List<Edge> edges = Arrays.asList(new Edge.Builder().group(ElementGroup.REGION_CONTAINS_LOCATION).source(region).dest(location).directed(true).build(), new Edge.Builder().group(ElementGroup.LOCATION_CONTAINS_ROAD).source(location).dest(road).directed(true).build(), new Edge.Builder().group(ElementGroup.ROAD_HAS_JUNCTION).source(road).dest(junctionA).directed(true).build(), new Edge.Builder().group(ElementGroup.ROAD_HAS_JUNCTION).source(road).dest(junctionB).directed(true).build(), new Edge.Builder().group(ElementGroup.JUNCTION_LOCATED_AT).source(junctionA).dest(junctionALocation).directed(true).build(), new Edge.Builder().group(ElementGroup.JUNCTION_LOCATED_AT).source(junctionB).dest(junctionBLocation).directed(true).build(), new Edge.Builder().group(ElementGroup.ROAD_USE).source(junctionA).dest(junctionB).directed(true).property("startDate", startDate).property("endDate", endDate).property("count", getTotalCount(vehicleCountsByType)).property("countByVehicleType", vehicleCountsByType).build());
    final List<Entity> entities = Arrays.asList(new Entity.Builder().group(ElementGroup.JUNCTION_USE).vertex(junctionA).property("countByVehicleType", vehicleCountsByType).property("endDate", endDate).property("startDate", startDate).property("count", getTotalCount(vehicleCountsByType)).build(), new Entity.Builder().group(ElementGroup.JUNCTION_USE).vertex(junctionB).property("countByVehicleType", vehicleCountsByType).property("endDate", endDate).property("startDate", startDate).property("count", getTotalCount(vehicleCountsByType)).build());
    final List<Entity> cardinalityEntities = createCardinalities(edges);
    return new ChainedIterable<>(edges, entities, cardinalityEntities);
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) ChainedIterable(uk.gov.gchq.gaffer.commonutil.iterable.ChainedIterable) Edge(uk.gov.gchq.gaffer.data.element.Edge) Date(java.util.Date)

Example 30 with FreqMap

use of uk.gov.gchq.gaffer.types.FreqMap in project Gaffer by gchq.

the class MapFilterExample method freqMapIsMoreThan2.

public void freqMapIsMoreThan2() {
    // ---------------------------------------------------------
    final MapFilter function = new MapFilter("key1", new IsMoreThan(2L));
    // ---------------------------------------------------------
    final FreqMap map1 = new FreqMap();
    map1.put("key1", 1L);
    final FreqMap map2 = new FreqMap();
    map2.put("key1", 2L);
    final FreqMap map3 = new FreqMap();
    map3.put("key1", 3L);
    final FreqMap map4 = new FreqMap();
    map4.put("key1", 3L);
    map4.put("key2", 0L);
    final FreqMap map5 = new FreqMap();
    map5.put("key2", 3L);
    runExample(function, map1, map2, map3, map4, map5);
}
Also used : FreqMap(uk.gov.gchq.gaffer.types.FreqMap) MapFilter(uk.gov.gchq.gaffer.function.MapFilter) IsMoreThan(uk.gov.gchq.gaffer.function.filter.IsMoreThan)

Aggregations

FreqMap (uk.gov.gchq.gaffer.types.FreqMap)62 Test (org.junit.jupiter.api.Test)29 ArrayList (java.util.ArrayList)9 Edge (uk.gov.gchq.gaffer.data.element.Edge)9 Entity (uk.gov.gchq.gaffer.data.element.Entity)9 Date (java.util.Date)6 Schema (uk.gov.gchq.gaffer.store.schema.Schema)6 FunctionTest (uk.gov.gchq.koryphe.function.FunctionTest)6 Configuration (org.apache.hadoop.conf.Configuration)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 Path (org.apache.hadoop.fs.Path)5 Row (org.apache.spark.sql.Row)5 SparkSession (org.apache.spark.sql.SparkSession)5 WrappedArray (scala.collection.mutable.WrappedArray)5 Element (uk.gov.gchq.gaffer.data.element.Element)5 HashMap (java.util.HashMap)4 Key (org.apache.accumulo.core.data.Key)4 Test (org.junit.Test)4 EdgeId (uk.gov.gchq.gaffer.data.element.id.EdgeId)4 ElementId (uk.gov.gchq.gaffer.data.element.id.ElementId)4