Search in sources :

Example 1 with ParquetSerialiser

use of uk.gov.gchq.gaffer.parquetstore.serialisation.ParquetSerialiser in project Gaffer by gchq.

the class SchemaUtils method buildParquetSchema.

private MessageType buildParquetSchema(final String group) throws SerialisationException {
    SchemaElementDefinition groupGafferSchema;
    final boolean isEntity = gafferSchema.getEntityGroups().contains(group);
    final StringBuilder schemaString = new StringBuilder("message Element {\n");
    Serialiser serialiser = gafferSchema.getVertexSerialiser();
    // Check that the vertex does not get stored as nested data
    if (serialiser instanceof ParquetSerialiser && ((ParquetSerialiser) serialiser).getParquetSchema("test").contains(" group ")) {
        throw new SerialisationException("Can not have a vertex that is serialised as nested data as it can not be indexed");
    }
    if (isEntity) {
        groupGafferSchema = gafferSchema.getEntity(group);
        schemaString.append(convertColumnSerialiserToParquetColumns(serialiser, ParquetStore.VERTEX)).append("\n");
        addGroupColumnToSerialiser(group, ParquetStore.VERTEX, serialiser);
    } else {
        groupGafferSchema = gafferSchema.getEdge(group);
        schemaString.append(convertColumnSerialiserToParquetColumns(serialiser, ParquetStore.SOURCE)).append("\n");
        addGroupColumnToSerialiser(group, ParquetStore.SOURCE, serialiser);
        schemaString.append(convertColumnSerialiserToParquetColumns(serialiser, ParquetStore.DESTINATION)).append("\n");
        addGroupColumnToSerialiser(group, ParquetStore.DESTINATION, serialiser);
        addGroupColumnToSerialiser(group, ParquetStore.DIRECTED, BooleanParquetSerialiser.class.getCanonicalName());
        schemaString.append(convertColumnSerialiserToParquetColumns(getSerialiser(BooleanParquetSerialiser.class.getCanonicalName()), ParquetStore.DIRECTED)).append("\n");
    }
    Map<String, String> propertyMap = groupGafferSchema.getPropertyMap();
    for (final Map.Entry<String, String> entry : propertyMap.entrySet()) {
        if (entry.getKey().contains("_") || entry.getKey().contains(".")) {
            throw new SchemaException("The ParquetStore does not support properties which contain the characters '_' or '.'");
        }
        final TypeDefinition type = gafferSchema.getType(entry.getValue());
        addGroupColumnToSerialiser(group, entry.getKey(), type.getSerialiserClass());
        schemaString.append(convertColumnSerialiserToParquetColumns(getSerialiser(type.getSerialiserClass()), entry.getKey())).append("\n");
    }
    schemaString.append("}");
    String parquetSchemaString = schemaString.toString();
    final MessageType parquetSchema = MessageTypeParser.parseMessageType(parquetSchemaString);
    LOGGER.debug("Generated Parquet schema: " + parquetSchemaString);
    return parquetSchema;
}
Also used : SchemaException(uk.gov.gchq.gaffer.data.elementdefinition.exception.SchemaException) BooleanParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.BooleanParquetSerialiser) ParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.ParquetSerialiser) SerialisationException(uk.gov.gchq.gaffer.exception.SerialisationException) BooleanParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.BooleanParquetSerialiser) ParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.ParquetSerialiser) Serialiser(uk.gov.gchq.gaffer.serialisation.Serialiser) BooleanParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.BooleanParquetSerialiser) TypeDefinition(uk.gov.gchq.gaffer.store.schema.TypeDefinition) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) SchemaElementDefinition(uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition) MessageType(org.apache.parquet.schema.MessageType)

Aggregations

HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 MessageType (org.apache.parquet.schema.MessageType)1 SchemaException (uk.gov.gchq.gaffer.data.elementdefinition.exception.SchemaException)1 SerialisationException (uk.gov.gchq.gaffer.exception.SerialisationException)1 ParquetSerialiser (uk.gov.gchq.gaffer.parquetstore.serialisation.ParquetSerialiser)1 BooleanParquetSerialiser (uk.gov.gchq.gaffer.parquetstore.serialisation.impl.BooleanParquetSerialiser)1 Serialiser (uk.gov.gchq.gaffer.serialisation.Serialiser)1 SchemaElementDefinition (uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition)1 TypeDefinition (uk.gov.gchq.gaffer.store.schema.TypeDefinition)1