Search in sources :

Example 31 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project elephant-bird by twitter.

the class ProtobufWritableConverter method checkStoreSchema.

@Override
public void checkStoreSchema(ResourceFieldSchema schema) throws IOException {
    Preconditions.checkNotNull(schema, "Schema is null");
    Preconditions.checkArgument(DataType.TUPLE == schema.getType(), "Expected schema type '%s' but found type '%s'", DataType.findTypeName(DataType.TUPLE), DataType.findTypeName(schema.getType()));
    ResourceSchema childSchema = schema.getSchema();
    Preconditions.checkNotNull(childSchema, "Child schema is null");
    Schema actualSchema = Schema.getPigSchema(childSchema);
    Preconditions.checkArgument(Schema.equals(expectedSchema, actualSchema, false, true), "Expected store schema '%s' but found schema '%s'", expectedSchema, actualSchema);
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)

Example 32 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project elephant-bird by twitter.

the class TextConverter method getLoadSchema.

@Override
public ResourceFieldSchema getLoadSchema() throws IOException {
    ResourceFieldSchema schema = new ResourceFieldSchema();
    schema.setType(DataType.CHARARRAY);
    return schema;
}
Also used : ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Example 33 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project elephant-bird by twitter.

the class VectorWritableConverter method checkSparseVectorEntriesSchema.

private void checkSparseVectorEntriesSchema(ResourceSchema entriesSchema) throws IOException {
    // check entries.length == 1
    assertNotNull(entriesSchema, "ResourceSchema of entries is null");
    ResourceFieldSchema[] entriesFieldSchemas = entriesSchema.getFields();
    assertNotNull(entriesFieldSchemas, "Tuple field schemas are null");
    assertTupleLength(1, entriesFieldSchemas.length, "entries");
    // check entries[0] == entry:tuple
    assertFieldTypeEquals(DataType.TUPLE, entriesFieldSchemas[0].getType(), "entries[0]");
    // check entries[0].length == 2
    ResourceSchema entriesTupleSchema = entriesFieldSchemas[0].getSchema();
    assertNotNull(entriesTupleSchema, "ResourceSchema of entries[0] is null");
    ResourceFieldSchema[] entriesTupleFieldSchemas = entriesTupleSchema.getFields();
    assertNotNull(entriesTupleFieldSchemas, "Tuple field schemas are null");
    assertTupleLength(2, entriesTupleFieldSchemas.length, "entries[0]");
    // check entries[0][0] == index:int
    assertFieldTypeEquals(DataType.INTEGER, entriesTupleFieldSchemas[0].getType(), "entries[0][0]");
    // check entries[0][1] == value:double
    assertFieldTypeIsNumeric(entriesTupleFieldSchemas[1].getType(), "entries[0][1]");
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Example 34 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project elephant-bird by twitter.

the class VectorWritableConverter method checkStoreSchema.

@Override
public void checkStoreSchema(ResourceFieldSchema schema) throws IOException {
    assertFieldTypeEquals(DataType.TUPLE, schema.getType(), "tuple");
    ResourceSchema vectorSchema = schema.getSchema();
    assertNotNull(vectorSchema, "ResourceSchema for tuple is null");
    ResourceFieldSchema[] vectorFieldSchemas = vectorSchema.getFields();
    assertNotNull(vectorFieldSchemas, "Tuple field schemas are null");
    if (vectorFieldSchemas.length == 1 && vectorFieldSchemas[0].getType() == DataType.BAG) {
        // has to be sparse format
        Preconditions.checkNotNull(cardinality, "Cardinality undefined");
        checkSparseVectorEntriesSchema(vectorFieldSchemas[0].getSchema());
    } else if (vectorFieldSchemas.length == 2 && vectorFieldSchemas[1].getType() == DataType.BAG) {
        // has to be sparse format
        Preconditions.checkState(cardinality == null, "Cardinality already defined");
        // check tuple[0] == cardinality:int
        assertFieldTypeEquals(DataType.INTEGER, vectorFieldSchemas[0].getType(), "tuple[0]");
        checkSparseVectorEntriesSchema(vectorFieldSchemas[1].getSchema());
    } else {
        // has to be dense format
        for (int i = 0; i < vectorFieldSchemas.length; ++i) {
            assertFieldTypeIsNumeric(vectorFieldSchemas[i].getType(), "tuple[" + i + "]");
        }
    }
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Example 35 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project elephant-bird by twitter.

the class SequenceFileLoader method getSchema.

@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {
    // determine key field schema
    ResourceFieldSchema keySchema = config.keyConverter.getLoadSchema();
    if (keySchema == null) {
        return null;
    }
    keySchema.setName("key");
    // determine value field schema
    ResourceFieldSchema valueSchema = config.valueConverter.getLoadSchema();
    if (valueSchema == null) {
        return null;
    }
    valueSchema.setName("value");
    // return tuple schema
    ResourceSchema resourceSchema = new ResourceSchema();
    resourceSchema.setFields(new ResourceFieldSchema[] { keySchema, valueSchema });
    return resourceSchema;
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Aggregations

ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)42 ResourceSchema (org.apache.pig.ResourceSchema)22 IOException (java.io.IOException)16 ArrayList (java.util.ArrayList)7 Map (java.util.Map)7 Tuple (org.apache.pig.data.Tuple)7 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)5 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)4 SQLException (java.sql.SQLException)4 HashMap (java.util.HashMap)4 Field (org.apache.avro.Schema.Field)4 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)4 BasicBSONObject (org.bson.BasicBSONObject)4 Test (org.junit.Test)4 List (java.util.List)3 DataBag (org.apache.pig.data.DataBag)3 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)3 BSONObject (org.bson.BSONObject)3 DateTime (org.joda.time.DateTime)3 LinkedHashMap (java.util.LinkedHashMap)2