Search in sources :

Example 31 with ResourceSchema

use of org.apache.pig.ResourceSchema in project elephant-bird by twitter.

the class ProtobufWritableConverter method checkStoreSchema.

@Override
public void checkStoreSchema(ResourceFieldSchema schema) throws IOException {
    Preconditions.checkNotNull(schema, "Schema is null");
    Preconditions.checkArgument(DataType.TUPLE == schema.getType(), "Expected schema type '%s' but found type '%s'", DataType.findTypeName(DataType.TUPLE), DataType.findTypeName(schema.getType()));
    ResourceSchema childSchema = schema.getSchema();
    Preconditions.checkNotNull(childSchema, "Child schema is null");
    Schema actualSchema = Schema.getPigSchema(childSchema);
    Preconditions.checkArgument(Schema.equals(expectedSchema, actualSchema, false, true), "Expected store schema '%s' but found schema '%s'", expectedSchema, actualSchema);
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)

Example 32 with ResourceSchema

use of org.apache.pig.ResourceSchema in project elephant-bird by twitter.

the class SequenceFileLoader method getSchema.

@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {
    // determine key field schema
    ResourceFieldSchema keySchema = config.keyConverter.getLoadSchema();
    if (keySchema == null) {
        return null;
    }
    keySchema.setName("key");
    // determine value field schema
    ResourceFieldSchema valueSchema = config.valueConverter.getLoadSchema();
    if (valueSchema == null) {
        return null;
    }
    valueSchema.setName("value");
    // return tuple schema
    ResourceSchema resourceSchema = new ResourceSchema();
    resourceSchema.setFields(new ResourceFieldSchema[] { keySchema, valueSchema });
    return resourceSchema;
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Example 33 with ResourceSchema

use of org.apache.pig.ResourceSchema in project elephant-bird by twitter.

the class VectorWritableConverter method checkSparseVectorEntriesSchema.

private void checkSparseVectorEntriesSchema(ResourceSchema entriesSchema) throws IOException {
    // check entries.length == 1
    assertNotNull(entriesSchema, "ResourceSchema of entries is null");
    ResourceFieldSchema[] entriesFieldSchemas = entriesSchema.getFields();
    assertNotNull(entriesFieldSchemas, "Tuple field schemas are null");
    assertTupleLength(1, entriesFieldSchemas.length, "entries");
    // check entries[0] == entry:tuple
    assertFieldTypeEquals(DataType.TUPLE, entriesFieldSchemas[0].getType(), "entries[0]");
    // check entries[0].length == 2
    ResourceSchema entriesTupleSchema = entriesFieldSchemas[0].getSchema();
    assertNotNull(entriesTupleSchema, "ResourceSchema of entries[0] is null");
    ResourceFieldSchema[] entriesTupleFieldSchemas = entriesTupleSchema.getFields();
    assertNotNull(entriesTupleFieldSchemas, "Tuple field schemas are null");
    assertTupleLength(2, entriesTupleFieldSchemas.length, "entries[0]");
    // check entries[0][0] == index:int
    assertFieldTypeEquals(DataType.INTEGER, entriesTupleFieldSchemas[0].getType(), "entries[0][0]");
    // check entries[0][1] == value:double
    assertFieldTypeIsNumeric(entriesTupleFieldSchemas[1].getType(), "entries[0][1]");
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Example 34 with ResourceSchema

use of org.apache.pig.ResourceSchema in project elephant-bird by twitter.

the class VectorWritableConverter method checkStoreSchema.

@Override
public void checkStoreSchema(ResourceFieldSchema schema) throws IOException {
    assertFieldTypeEquals(DataType.TUPLE, schema.getType(), "tuple");
    ResourceSchema vectorSchema = schema.getSchema();
    assertNotNull(vectorSchema, "ResourceSchema for tuple is null");
    ResourceFieldSchema[] vectorFieldSchemas = vectorSchema.getFields();
    assertNotNull(vectorFieldSchemas, "Tuple field schemas are null");
    if (vectorFieldSchemas.length == 1 && vectorFieldSchemas[0].getType() == DataType.BAG) {
        // has to be sparse format
        Preconditions.checkNotNull(cardinality, "Cardinality undefined");
        checkSparseVectorEntriesSchema(vectorFieldSchemas[0].getSchema());
    } else if (vectorFieldSchemas.length == 2 && vectorFieldSchemas[1].getType() == DataType.BAG) {
        // has to be sparse format
        Preconditions.checkState(cardinality == null, "Cardinality already defined");
        // check tuple[0] == cardinality:int
        assertFieldTypeEquals(DataType.INTEGER, vectorFieldSchemas[0].getType(), "tuple[0]");
        checkSparseVectorEntriesSchema(vectorFieldSchemas[1].getSchema());
    } else {
        // has to be dense format
        for (int i = 0; i < vectorFieldSchemas.length; ++i) {
            assertFieldTypeIsNumeric(vectorFieldSchemas[i].getType(), "tuple[" + i + "]");
        }
    }
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Example 35 with ResourceSchema

use of org.apache.pig.ResourceSchema in project parquet-mr by apache.

the class ParquetLoader method getSchema.

@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {
    if (LOG.isDebugEnabled()) {
        String jobToString = String.format("job[id=%s, name=%s]", job.getJobID(), job.getJobName());
        LOG.debug("LoadMetadata.getSchema({}, {})", location, jobToString);
    }
    setInput(location, job);
    return new ResourceSchema(schema);
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) PigSchemaConverter.pigSchemaToString(org.apache.parquet.pig.PigSchemaConverter.pigSchemaToString)

Aggregations

ResourceSchema (org.apache.pig.ResourceSchema)35 ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)20 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)11 Test (org.junit.Test)10 IOException (java.io.IOException)9 ArrayList (java.util.ArrayList)6 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 Properties (java.util.Properties)5 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)4 UDFContext (org.apache.pig.impl.util.UDFContext)4 HashMap (java.util.HashMap)3 List (java.util.List)3 Map (java.util.Map)3 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)3 Tuple (org.apache.pig.data.Tuple)3 Descriptors (com.google.protobuf.Descriptors)2 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)2 SQLException (java.sql.SQLException)2 LinkedHashMap (java.util.LinkedHashMap)2 Field (org.apache.avro.Schema.Field)2