use of uk.gov.gchq.gaffer.parquetstore.serialisation.ParquetSerialiser in project Gaffer by gchq.
the class SchemaUtils method buildParquetSchema.
private MessageType buildParquetSchema(final String group) throws SerialisationException {
SchemaElementDefinition groupGafferSchema;
final boolean isEntity = gafferSchema.getEntityGroups().contains(group);
final StringBuilder schemaString = new StringBuilder("message Element {\n");
Serialiser serialiser = gafferSchema.getVertexSerialiser();
// Check that the vertex does not get stored as nested data
if (serialiser instanceof ParquetSerialiser && ((ParquetSerialiser) serialiser).getParquetSchema("test").contains(" group ")) {
throw new SerialisationException("Can not have a vertex that is serialised as nested data as it can not be indexed");
}
if (isEntity) {
groupGafferSchema = gafferSchema.getEntity(group);
schemaString.append(convertColumnSerialiserToParquetColumns(serialiser, ParquetStore.VERTEX)).append("\n");
addGroupColumnToSerialiser(group, ParquetStore.VERTEX, serialiser);
} else {
groupGafferSchema = gafferSchema.getEdge(group);
schemaString.append(convertColumnSerialiserToParquetColumns(serialiser, ParquetStore.SOURCE)).append("\n");
addGroupColumnToSerialiser(group, ParquetStore.SOURCE, serialiser);
schemaString.append(convertColumnSerialiserToParquetColumns(serialiser, ParquetStore.DESTINATION)).append("\n");
addGroupColumnToSerialiser(group, ParquetStore.DESTINATION, serialiser);
addGroupColumnToSerialiser(group, ParquetStore.DIRECTED, BooleanParquetSerialiser.class.getCanonicalName());
schemaString.append(convertColumnSerialiserToParquetColumns(getSerialiser(BooleanParquetSerialiser.class.getCanonicalName()), ParquetStore.DIRECTED)).append("\n");
}
Map<String, String> propertyMap = groupGafferSchema.getPropertyMap();
for (final Map.Entry<String, String> entry : propertyMap.entrySet()) {
if (entry.getKey().contains("_") || entry.getKey().contains(".")) {
throw new SchemaException("The ParquetStore does not support properties which contain the characters '_' or '.'");
}
final TypeDefinition type = gafferSchema.getType(entry.getValue());
addGroupColumnToSerialiser(group, entry.getKey(), type.getSerialiserClass());
schemaString.append(convertColumnSerialiserToParquetColumns(getSerialiser(type.getSerialiserClass()), entry.getKey())).append("\n");
}
schemaString.append("}");
String parquetSchemaString = schemaString.toString();
final MessageType parquetSchema = MessageTypeParser.parseMessageType(parquetSchemaString);
LOGGER.debug("Generated Parquet schema: " + parquetSchemaString);
return parquetSchema;
}
Aggregations