Search in sources :

Example 1 with SchemaElementDefinition

use of uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition in project Gaffer by gchq.

the class ElementValidatorTest method shouldReturnFalseWhenSchemaValidateWithInvalidElement.

@Test
public void shouldReturnFalseWhenSchemaValidateWithInvalidElement() {
    // Given
    final Schema schema = mock(Schema.class);
    final String group = TestGroups.EDGE;
    final Element elm = mock(Element.class);
    final SchemaElementDefinition elementDef = mock(SchemaElementDefinition.class);
    final ElementFilter filter = mock(ElementFilter.class);
    final boolean includeIsA = true;
    final ElementValidator validator = new ElementValidator(schema, includeIsA);
    given(elm.getGroup()).willReturn(group);
    given(schema.getElement(group)).willReturn(elementDef);
    given(elementDef.getValidator(includeIsA)).willReturn(filter);
    given(filter.filter(elm)).willReturn(false);
    // When
    final boolean isValid = validator.validate(elm);
    // Then
    assertFalse(isValid);
}
Also used : Schema(uk.gov.gchq.gaffer.store.schema.Schema) Element(uk.gov.gchq.gaffer.data.element.Element) ElementFilter(uk.gov.gchq.gaffer.data.element.function.ElementFilter) SchemaElementDefinition(uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition) Test(org.junit.Test)

Example 2 with SchemaElementDefinition

use of uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition in project Gaffer by gchq.

the class ElementValidatorTest method shouldReturnTrueWhenSchemaValidateWithValidElement.

@Test
public void shouldReturnTrueWhenSchemaValidateWithValidElement() {
    // Given
    final Schema schema = mock(Schema.class);
    final String group = TestGroups.EDGE;
    final Element elm = mock(Element.class);
    final SchemaElementDefinition elementDef = mock(SchemaElementDefinition.class);
    final ElementFilter filter = mock(ElementFilter.class);
    final boolean includeIsA = true;
    final ElementValidator validator = new ElementValidator(schema, includeIsA);
    given(elm.getGroup()).willReturn(group);
    given(schema.getElement(group)).willReturn(elementDef);
    given(elementDef.getValidator(includeIsA)).willReturn(filter);
    given(filter.filter(elm)).willReturn(true);
    // When
    final boolean isValid = validator.validate(elm);
    // Then
    assertTrue(isValid);
}
Also used : Schema(uk.gov.gchq.gaffer.store.schema.Schema) Element(uk.gov.gchq.gaffer.data.element.Element) ElementFilter(uk.gov.gchq.gaffer.data.element.function.ElementFilter) SchemaElementDefinition(uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition) Test(org.junit.Test)

Example 3 with SchemaElementDefinition

use of uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition in project Gaffer by gchq.

the class SchemaToStructTypeConverter method buildSchema.

private void buildSchema() {
    LOGGER.info("Building Spark SQL schema for groups {}", StringUtils.join(groups, ','));
    for (final String group : groups) {
        final SchemaElementDefinition elementDefn = schema.getElement(group);
        final List<StructField> structFieldList = new ArrayList<>();
        if (elementDefn instanceof SchemaEntityDefinition) {
            entityOrEdgeByGroup.put(group, EntityOrEdge.ENTITY);
            final SchemaEntityDefinition entityDefinition = (SchemaEntityDefinition) elementDefn;
            final String vertexClass = schema.getType(entityDefinition.getVertex()).getClassString();
            final DataType vertexType = getType(vertexClass);
            if (vertexType == null) {
                throw new RuntimeException("Vertex must be a recognised type: found " + vertexClass);
            }
            LOGGER.info("Group {} is an entity group - {} is of type {}", group, VERTEX_COL_NAME, vertexType);
            structFieldList.add(new StructField(VERTEX_COL_NAME, vertexType, true, Metadata.empty()));
        } else {
            entityOrEdgeByGroup.put(group, EntityOrEdge.EDGE);
            final SchemaEdgeDefinition edgeDefinition = (SchemaEdgeDefinition) elementDefn;
            final String srcClass = schema.getType(edgeDefinition.getSource()).getClassString();
            final String dstClass = schema.getType(edgeDefinition.getDestination()).getClassString();
            final DataType srcType = getType(srcClass);
            final DataType dstType = getType(dstClass);
            if (srcType == null || dstType == null) {
                throw new RuntimeException("Both source and destination must be recognised types: source was " + srcClass + " destination was " + dstClass);
            }
            LOGGER.info("Group {} is an edge group - {} is of type {}, {} is of type {}", group, SRC_COL_NAME, srcType, DST_COL_NAME, dstType);
            structFieldList.add(new StructField(SRC_COL_NAME, srcType, true, Metadata.empty()));
            structFieldList.add(new StructField(DST_COL_NAME, dstType, true, Metadata.empty()));
        }
        final Set<String> properties = elementDefn.getProperties();
        for (final String property : properties) {
            // Check if property is of a known type that can be handled by default
            final String propertyClass = elementDefn.getPropertyClass(property).getCanonicalName();
            DataType propertyType = getType(propertyClass);
            if (propertyType != null) {
                propertyNeedsConversion.put(property, needsConversion(propertyClass));
                structFieldList.add(new StructField(property, propertyType, true, Metadata.empty()));
                LOGGER.info("Property {} is of type {}", property, propertyType);
            } else {
                // Check if any of the provided converters can handle it
                if (converters != null) {
                    for (final Converter converter : converters) {
                        if (converter.canHandle(elementDefn.getPropertyClass(property))) {
                            propertyNeedsConversion.put(property, true);
                            propertyType = converter.convertedType();
                            converterByProperty.put(property, converter);
                            structFieldList.add(new StructField(property, propertyType, true, Metadata.empty()));
                            LOGGER.info("Property {} of type {} will be converted by {} to {}", property, propertyClass, converter.getClass().getName(), propertyType);
                            break;
                        }
                    }
                    if (propertyType == null) {
                        LOGGER.warn("Ignoring property {} as it is not a recognised type and none of the provided " + "converters can handle it", property);
                    }
                }
            }
        }
        structTypeByGroup.put(group, new StructType(structFieldList.toArray(new StructField[structFieldList.size()])));
    }
    // Create reverse map of field name to StructField
    final Map<String, Set<StructField>> fieldToStructs = new HashMap<>();
    for (final String group : groups) {
        final StructType groupSchema = structTypeByGroup.get(group);
        for (final String field : groupSchema.fieldNames()) {
            if (fieldToStructs.get(field) == null) {
                fieldToStructs.put(field, new HashSet<StructField>());
            }
            fieldToStructs.get(field).add(groupSchema.apply(field));
        }
    }
    // Check consistency, i.e. if the same field appears in multiple groups then the types are consistent
    for (final Entry<String, Set<StructField>> entry : fieldToStructs.entrySet()) {
        final Set<StructField> schemas = entry.getValue();
        if (schemas.size() > 1) {
            throw new IllegalArgumentException("Inconsistent fields: the field " + entry.getKey() + " has more than one definition: " + StringUtils.join(schemas, ','));
        }
    }
    // Merge schemas for groups together - fields should appear in the order the groups were provided
    final LinkedHashSet<StructField> fields = new LinkedHashSet<>();
    fields.add(new StructField(GROUP, DataTypes.StringType, false, Metadata.empty()));
    usedProperties.add(GROUP);
    for (final String group : groups) {
        final StructType groupSchema = structTypeByGroup.get(group);
        for (final String field : groupSchema.fieldNames()) {
            final StructField struct = groupSchema.apply(field);
            // Add struct to fields unless it has already been added
            if (!fields.contains(struct)) {
                fields.add(struct);
                usedProperties.add(field);
            }
        }
    }
    structType = new StructType(fields.toArray(new StructField[fields.size()]));
    LOGGER.info("Schema is {}", structType);
    LOGGER.debug("properties -> conversion: {}", StringUtils.join(propertyNeedsConversion.entrySet(), ','));
}
Also used : LinkedHashSet(java.util.LinkedHashSet) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) StructType(org.apache.spark.sql.types.StructType) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SchemaEntityDefinition(uk.gov.gchq.gaffer.store.schema.SchemaEntityDefinition) StructField(org.apache.spark.sql.types.StructField) DataType(org.apache.spark.sql.types.DataType) SchemaEdgeDefinition(uk.gov.gchq.gaffer.store.schema.SchemaEdgeDefinition) Converter(uk.gov.gchq.gaffer.spark.operation.dataframe.converter.property.Converter) HyperLogLogPlusConverter(uk.gov.gchq.gaffer.spark.operation.dataframe.converter.property.impl.HyperLogLogPlusConverter) FreqMapConverter(uk.gov.gchq.gaffer.spark.operation.dataframe.converter.property.impl.FreqMapConverter) UnionConverter(uk.gov.gchq.gaffer.spark.operation.dataframe.converter.property.impl.datasketches.theta.UnionConverter) SchemaElementDefinition(uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition)

Example 4 with SchemaElementDefinition

use of uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition in project Gaffer by gchq.

the class AbstractCoreKeyAccumuloElementConverter method buildColumnQualifier.

@Override
public byte[] buildColumnQualifier(final String group, final Properties properties) throws AccumuloElementConversionException {
    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    final SchemaElementDefinition elementDefinition = schema.getElement(group);
    if (null == elementDefinition) {
        throw new AccumuloElementConversionException("No SchemaElementDefinition found for group " + group + ", is this group in your schema or do your table iterators need updating?");
    }
    final Iterator<String> propertyNames = elementDefinition.getGroupBy().iterator();
    while (propertyNames.hasNext()) {
        String propertyName = propertyNames.next();
        final TypeDefinition typeDefinition = elementDefinition.getPropertyTypeDef(propertyName);
        final Serialisation serialiser = (typeDefinition != null) ? typeDefinition.getSerialiser() : null;
        try {
            if (null != serialiser) {
                Object value = properties.get(propertyName);
                if (null != value) {
                    final byte[] bytes = serialiser.serialise(value);
                    writeBytes(bytes, out);
                } else {
                    final byte[] bytes = serialiser.serialiseNull();
                    writeBytes(bytes, out);
                }
            } else {
                writeBytes(AccumuloStoreConstants.EMPTY_BYTES, out);
            }
        } catch (final IOException e) {
            throw new AccumuloElementConversionException("Failed to write serialise property to ByteArrayOutputStream" + propertyName, e);
        }
    }
    return out.toByteArray();
}
Also used : Serialisation(uk.gov.gchq.gaffer.serialisation.Serialisation) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) SchemaElementDefinition(uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition) AccumuloElementConversionException(uk.gov.gchq.gaffer.accumulostore.key.exception.AccumuloElementConversionException) TypeDefinition(uk.gov.gchq.gaffer.store.schema.TypeDefinition)

Example 5 with SchemaElementDefinition

use of uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition in project Gaffer by gchq.

the class AbstractCoreKeyAccumuloElementConverter method getPropertiesFromValue.

@Override
public Properties getPropertiesFromValue(final String group, final Value value) throws AccumuloElementConversionException {
    final Properties properties = new Properties();
    if (value == null || value.getSize() == 0) {
        return properties;
    }
    final byte[] bytes = value.get();
    int lastDelimiter = 0;
    final int arrayLength = bytes.length;
    long currentPropLength;
    final SchemaElementDefinition elementDefinition = schema.getElement(group);
    if (null == elementDefinition) {
        throw new AccumuloElementConversionException("No SchemaElementDefinition found for group " + group + ", is this group in your schema or do your table iterators need updating?");
    }
    final Iterator<String> propertyNames = elementDefinition.getProperties().iterator();
    while (propertyNames.hasNext() && lastDelimiter < arrayLength) {
        final String propertyName = propertyNames.next();
        if (isStoredInValue(propertyName, elementDefinition)) {
            final TypeDefinition typeDefinition = elementDefinition.getPropertyTypeDef(propertyName);
            final Serialisation<?> serialiser = (typeDefinition != null) ? typeDefinition.getSerialiser() : null;
            if (null != serialiser) {
                final int numBytesForLength = CompactRawSerialisationUtils.decodeVIntSize(bytes[lastDelimiter]);
                final byte[] length = new byte[numBytesForLength];
                System.arraycopy(bytes, lastDelimiter, length, 0, numBytesForLength);
                try {
                    currentPropLength = CompactRawSerialisationUtils.readLong(length);
                } catch (final SerialisationException e) {
                    throw new AccumuloElementConversionException("Exception reading length of property", e);
                }
                lastDelimiter += numBytesForLength;
                if (currentPropLength > 0) {
                    try {
                        properties.put(propertyName, serialiser.deserialise(Arrays.copyOfRange(bytes, lastDelimiter, lastDelimiter += currentPropLength)));
                    } catch (SerialisationException e) {
                        throw new AccumuloElementConversionException("Failed to deserialise property " + propertyName, e);
                    }
                } else {
                    try {
                        properties.put(propertyName, serialiser.deserialiseEmptyBytes());
                    } catch (SerialisationException e) {
                        throw new AccumuloElementConversionException("Failed to deserialise property " + propertyName, e);
                    }
                }
            }
        }
    }
    return properties;
}
Also used : SerialisationException(uk.gov.gchq.gaffer.exception.SerialisationException) Properties(uk.gov.gchq.gaffer.data.element.Properties) SchemaElementDefinition(uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition) AccumuloElementConversionException(uk.gov.gchq.gaffer.accumulostore.key.exception.AccumuloElementConversionException) TypeDefinition(uk.gov.gchq.gaffer.store.schema.TypeDefinition)

Aggregations

SchemaElementDefinition (uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition)19 AccumuloElementConversionException (uk.gov.gchq.gaffer.accumulostore.key.exception.AccumuloElementConversionException)7 Element (uk.gov.gchq.gaffer.data.element.Element)5 ElementFilter (uk.gov.gchq.gaffer.data.element.function.ElementFilter)5 Schema (uk.gov.gchq.gaffer.store.schema.Schema)5 TypeDefinition (uk.gov.gchq.gaffer.store.schema.TypeDefinition)5 Test (org.junit.Test)4 Properties (uk.gov.gchq.gaffer.data.element.Properties)4 SerialisationException (uk.gov.gchq.gaffer.exception.SerialisationException)4 Serialisation (uk.gov.gchq.gaffer.serialisation.Serialisation)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 LinkedHashSet (java.util.LinkedHashSet)1 Set (java.util.Set)1 Value (org.apache.accumulo.core.data.Value)1 DataType (org.apache.spark.sql.types.DataType)1