Search in sources :

Example 1 with GafferGroupObjectConverter

use of uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter in project Gaffer by gchq.

the class DataGen method generateEntityRow.

public static GenericRowWithSchema generateEntityRow(final SchemaUtils utils, final String group, final String vertex, final Byte aByte, final Double aDouble, final Float aFloat, final TreeSet<String> treeSet, final Long aLong, final Short aShort, final Date date, final FreqMap freqMap, final String visibility) throws SerialisationException {
    final GafferGroupObjectConverter entityConverter = new GafferGroupObjectConverter(group, utils.getCoreProperties(group), utils.getCorePropertiesForReversedEdges(), utils.getColumnToSerialiser(group), utils.getSerialisers(), utils.getColumnToPaths(group));
    final List<Object> list = new ArrayList<>();
    final scala.collection.mutable.Map<String, Long> map = new scala.collection.mutable.HashMap<>();
    for (final Map.Entry<String, Long> entry : freqMap.entrySet()) {
        map.put(entry.getKey(), entry.getValue());
    }
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects(ParquetStore.VERTEX, vertex)));
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("byte", aByte)));
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("double", aDouble)));
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("float", aFloat)));
    list.add(WrappedArray$.MODULE$.make(entityConverter.gafferObjectToParquetObjects("treeSet", treeSet)[0]));
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("long", aLong)));
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("short", aShort)));
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("date", date)));
    list.add(map);
    list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects("count", 1)));
    if (null != visibility) {
        list.addAll(Arrays.asList(entityConverter.gafferObjectToParquetObjects(TestTypes.VISIBILITY, visibility)));
    }
    final Object[] objects = new Object[list.size()];
    list.toArray(objects);
    return new GenericRowWithSchema(objects, utils.getSparkSchema(group));
}
Also used : ArrayList(java.util.ArrayList) GafferGroupObjectConverter(uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter) GenericRowWithSchema(org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) Map(java.util.Map)

Example 2 with GafferGroupObjectConverter

use of uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter in project Gaffer by gchq.

the class QueryGenerator method seedToParquetObject.

private ParquetElementSeed seedToParquetObject(final ElementId seed, final String group, final boolean isEntityGroup) throws SerialisationException {
    final GafferGroupObjectConverter converter = schemaUtils.getConverter(group);
    final String column;
    if (isEntityGroup) {
        column = ParquetStore.VERTEX;
    } else {
        column = ParquetStore.SOURCE;
    }
    if (seed instanceof EntityId) {
        return new ParquetEntitySeed(seed, converter.gafferObjectToParquetObjects(column, ((EntityId) seed).getVertex()));
    } else {
        return converter.edgeIdToParquetObjects((EdgeId) seed);
    }
}
Also used : EntityId(uk.gov.gchq.gaffer.data.element.id.EntityId) GafferGroupObjectConverter(uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter)

Example 3 with GafferGroupObjectConverter

use of uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter in project Gaffer by gchq.

the class WriteUnsortedData method writeElement.

private void writeElement(final Element element) throws IOException {
    final String group = element.getGroup();
    final GafferGroupObjectConverter converter = schemaUtils.getConverter(group);
    // Get partition
    final PartitionKey partitionKey = new PartitionKey(converter.corePropertiesToParquetObjects(element));
    final int partition = graphPartitioner.getGroupPartitioner(group).getPartitionId(partitionKey);
    // Get writer
    final ParquetWriter<Element> writer = getWriter(partition, group, false);
    if (null != writer) {
        writer.write(element);
    } else {
        LOGGER.warn("Skipped the addition of an Element of group {} as that group does not exist in the schema.", group);
    }
}
Also used : Element(uk.gov.gchq.gaffer.data.element.Element) GafferGroupObjectConverter(uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter) PartitionKey(uk.gov.gchq.gaffer.parquetstore.partitioner.PartitionKey)

Example 4 with GafferGroupObjectConverter

use of uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter in project Gaffer by gchq.

the class WriteUnsortedData method writeEdgeReversed.

private void writeEdgeReversed(final Edge edge) throws IOException {
    // Also write out edges partitioned as in the directory sorted by destination
    final String group = edge.getGroup();
    final GafferGroupObjectConverter converter = schemaUtils.getConverter(group);
    // Get partition
    final PartitionKey partitionKey = new PartitionKey(converter.corePropertiesToParquetObjectsForReversedEdge(edge));
    final int partition = graphPartitioner.getGroupPartitionerForReversedEdges(group).getPartitionId(partitionKey);
    // Get writer
    final ParquetWriter<Element> writer = getWriter(partition, group, true);
    if (null != writer) {
        writer.write(edge);
    } else {
        LOGGER.warn("Skipped the addition of an Element of group {} as that group does not exist in the schema.", group);
    }
}
Also used : Element(uk.gov.gchq.gaffer.data.element.Element) GafferGroupObjectConverter(uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter) PartitionKey(uk.gov.gchq.gaffer.parquetstore.partitioner.PartitionKey)

Example 5 with GafferGroupObjectConverter

use of uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter in project Gaffer by gchq.

the class DataGen method generateEdgeRow.

public static GenericRowWithSchema generateEdgeRow(final SchemaUtils utils, final String group, final String src, final String dst, final Boolean directed, final Byte aByte, final Double aDouble, final Float aFloat, final TreeSet<String> treeSet, final Long aLong, final Short aShort, final Date date, final FreqMap freqMap, final String visibility) throws SerialisationException {
    final GafferGroupObjectConverter edgeConverter = new GafferGroupObjectConverter(group, utils.getCoreProperties(group), utils.getCorePropertiesForReversedEdges(), utils.getColumnToSerialiser(group), utils.getSerialisers(), utils.getColumnToPaths(group));
    final List<Object> list = new ArrayList<>();
    final scala.collection.mutable.Map<String, Long> map = new scala.collection.mutable.HashMap<>();
    for (final Map.Entry<String, Long> entry : freqMap.entrySet()) {
        map.put(entry.getKey(), entry.getValue());
    }
    list.addAll(Arrays.asList(edgeConverter.gafferObjectToParquetObjects(ParquetStore.SOURCE, src)));
    list.addAll(Arrays.asList(edgeConverter.gafferObjectToParquetObjects(ParquetStore.DESTINATION, dst)));
    list.addAll(Arrays.asList(edgeConverter.gafferObjectToParquetObjects(ParquetStore.DIRECTED, directed)));
    list.addAll(Arrays.asList(edgeConverter.gafferObjectToParquetObjects("byte", aByte)));
    list.addAll(Arrays.asList(edgeConverter.gafferObjectToParquetObjects("double", aDouble)));
    list.addAll(Arrays.asList(edgeConverter.gafferObjectToParquetObjects("float", aFloat)));
    list.add(WrappedArray$.MODULE$.make(edgeConverter.gafferObjectToParquetObjects("treeSet", treeSet)[0]));
    list.addAll(Arrays.asList(edgeConverter.gafferObjectToParquetObjects("long", aLong)));
    list.addAll(Arrays.asList(edgeConverter.gafferObjectToParquetObjects("short", aShort)));
    list.addAll(Arrays.asList(edgeConverter.gafferObjectToParquetObjects("date", date)));
    list.add(map);
    list.addAll(Arrays.asList(edgeConverter.gafferObjectToParquetObjects("count", 1)));
    if (null != visibility) {
        list.addAll(Arrays.asList(edgeConverter.gafferObjectToParquetObjects(TestTypes.VISIBILITY, visibility)));
    }
    final Object[] objects = new Object[list.size()];
    list.toArray(objects);
    return new GenericRowWithSchema(objects, utils.getSparkSchema(group));
}
Also used : ArrayList(java.util.ArrayList) GafferGroupObjectConverter(uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter) GenericRowWithSchema(org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) Map(java.util.Map)

Aggregations

GafferGroupObjectConverter (uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter)7 Element (uk.gov.gchq.gaffer.data.element.Element)4 ArrayList (java.util.ArrayList)3 PartitionKey (uk.gov.gchq.gaffer.parquetstore.partitioner.PartitionKey)3 Map (java.util.Map)2 GenericRowWithSchema (org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema)2 ParquetElementReader (uk.gov.gchq.gaffer.parquetstore.io.reader.ParquetElementReader)2 FreqMap (uk.gov.gchq.gaffer.types.FreqMap)2 IOException (java.io.IOException)1 Arrays (java.util.Arrays)1 List (java.util.List)1 SortedSet (java.util.SortedSet)1 TreeSet (java.util.TreeSet)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 ParquetReader (org.apache.parquet.hadoop.ParquetReader)1 Logger (org.slf4j.Logger)1 LoggerFactory (org.slf4j.LoggerFactory)1 Edge (uk.gov.gchq.gaffer.data.element.Edge)1