Search in sources :

Example 26 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project gora by apache.

the class SchemaUtils method checkStoreSchema.

/* ============================= CHECK PIG SCHEMA =================================== */
/**
 * Method that performs a check on the Pig Schema that will be used to store. This is, when executing:
 * <pre>
 * STORE relation_data INTO '.' USING org.apache.gora.pig.GoraStorage(
 *                     'java.lang.String',
 *                     'admin.WebPage',
 *                     'baseUrl,status')
 * </pre>
 * the relation's schema is checked before storing.
 * If there is any error, throws an IOException.
 *
 * The check consist on checking the "key" field, and that all fields declared in the STORE command
 * are present in the pig relation.
 *
 * @param pigSchema - The Pig Schema to check.
 * @param queryFields - Declared storing fields ("baseUrl" and "status" in the example).
 * @param persistentSchema - The schema of the Persistent entity ("admin.WebPage" in the example).
 * @throws IOException
 */
public static void checkStoreSchema(ResourceSchema pigSchema, List<String> queryFields, Schema persistentSchema) throws IOException {
    // Expected pig schema: tuple (key, recordfield, recordfield, recordfi...)
    ResourceFieldSchema[] pigFieldsSchemas = pigSchema.getFields();
    String persistentClassName = persistentSchema.getFullName();
    List<String> pigFieldSchemasNames = new ArrayList<String>(Arrays.asList(pigSchema.fieldNames()));
    if (!pigFieldSchemasNames.contains("key")) {
        LOG.info("Declared Pig fields: " + String.join(",", pigFieldSchemasNames));
        throw new IOException("Expected a field called \"key\", but not found.");
    }
    // All fields are mandatory
    if (pigFieldSchemasNames.containsAll(queryFields)) {
        for (ResourceFieldSchema pigFieldSchema : pigFieldsSchemas) {
            if (queryFields.contains(pigFieldSchema.getName())) {
                Field persistentField = persistentSchema.getField(pigFieldSchema.getName());
                if (persistentField == null) {
                    throw new IOException("Declared field in Pig [" + pigFieldSchema.getName() + "] to store does not exists." + persistentClassName + ".");
                }
                checkEqualSchema(pigFieldSchema, persistentSchema.getField(pigFieldSchema.getName()).schema());
            }
        }
    } else {
        throw new IOException("Some fields declared in the constructor (" + String.join(",", queryFields) + ") are missing in the tuples to be saved (" + Arrays.toString(pigSchema.fieldNames()) + ")");
    }
}
Also used : Field(org.apache.avro.Schema.Field) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Example 27 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project phoenix by apache.

the class PhoenixPigSchemaUtil method getResourceSchema.

public static ResourceSchema getResourceSchema(final Configuration configuration, Dependencies dependencies) throws IOException {
    final ResourceSchema schema = new ResourceSchema();
    try {
        List<ColumnInfo> columns = null;
        final SchemaType schemaType = PhoenixConfigurationUtil.getSchemaType(configuration);
        if (SchemaType.QUERY.equals(schemaType)) {
            final String sqlQuery = PhoenixConfigurationUtil.getSelectStatement(configuration);
            Preconditions.checkNotNull(sqlQuery, "No Sql Query exists within the configuration");
            final SqlQueryToColumnInfoFunction function = new SqlQueryToColumnInfoFunction(configuration);
            columns = function.apply(sqlQuery);
        } else {
            columns = dependencies.getSelectColumnMetadataList(configuration);
        }
        ResourceFieldSchema[] fields = new ResourceFieldSchema[columns.size()];
        int i = 0;
        for (ColumnInfo cinfo : columns) {
            int sqlType = cinfo.getSqlType();
            PDataType phoenixDataType = PDataType.fromTypeId(sqlType);
            byte pigType = TypeUtil.getPigDataTypeForPhoenixType(phoenixDataType);
            ResourceFieldSchema field = new ResourceFieldSchema();
            field.setType(pigType).setName(cinfo.getDisplayName());
            fields[i++] = field;
        }
        schema.setFields(fields);
    } catch (SQLException sqle) {
        LOG.error(String.format("Error: SQLException [%s] ", sqle.getMessage()));
        throw new IOException(sqle);
    }
    return schema;
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) SQLException(java.sql.SQLException) ColumnInfo(org.apache.phoenix.util.ColumnInfo) IOException(java.io.IOException) SchemaType(org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.SchemaType) PDataType(org.apache.phoenix.schema.types.PDataType) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Example 28 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project elephant-bird by twitter.

the class BytesWritableConverter method getLoadSchema.

@Override
public ResourceFieldSchema getLoadSchema() throws IOException {
    ResourceFieldSchema schema = new ResourceFieldSchema();
    schema.setType(DataType.BYTEARRAY);
    return schema;
}
Also used : ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Example 29 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project elephant-bird by twitter.

the class IntWritableConverter method getLoadSchema.

@Override
public ResourceFieldSchema getLoadSchema() throws IOException {
    ResourceFieldSchema schema = new ResourceFieldSchema();
    schema.setType(DataType.INTEGER);
    return schema;
}
Also used : ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Example 30 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project elephant-bird by twitter.

the class LongWritableConverter method getLoadSchema.

@Override
public ResourceFieldSchema getLoadSchema() throws IOException {
    ResourceFieldSchema schema = new ResourceFieldSchema();
    schema.setType(DataType.LONG);
    return schema;
}
Also used : ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Aggregations

ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)42 ResourceSchema (org.apache.pig.ResourceSchema)22 IOException (java.io.IOException)16 ArrayList (java.util.ArrayList)7 Map (java.util.Map)7 Tuple (org.apache.pig.data.Tuple)7 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)5 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)4 SQLException (java.sql.SQLException)4 HashMap (java.util.HashMap)4 Field (org.apache.avro.Schema.Field)4 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)4 BasicBSONObject (org.bson.BasicBSONObject)4 Test (org.junit.Test)4 List (java.util.List)3 DataBag (org.apache.pig.data.DataBag)3 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)3 BSONObject (org.bson.BSONObject)3 DateTime (org.joda.time.DateTime)3 LinkedHashMap (java.util.LinkedHashMap)2