use of org.apache.pig.ResourceSchema in project elephant-bird by twitter.
the class ProtobufWritableConverter method checkStoreSchema.
@Override
public void checkStoreSchema(ResourceFieldSchema schema) throws IOException {
Preconditions.checkNotNull(schema, "Schema is null");
Preconditions.checkArgument(DataType.TUPLE == schema.getType(), "Expected schema type '%s' but found type '%s'", DataType.findTypeName(DataType.TUPLE), DataType.findTypeName(schema.getType()));
ResourceSchema childSchema = schema.getSchema();
Preconditions.checkNotNull(childSchema, "Child schema is null");
Schema actualSchema = Schema.getPigSchema(childSchema);
Preconditions.checkArgument(Schema.equals(expectedSchema, actualSchema, false, true), "Expected store schema '%s' but found schema '%s'", expectedSchema, actualSchema);
}
use of org.apache.pig.ResourceSchema in project elephant-bird by twitter.
the class SequenceFileLoader method getSchema.
@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {
// determine key field schema
ResourceFieldSchema keySchema = config.keyConverter.getLoadSchema();
if (keySchema == null) {
return null;
}
keySchema.setName("key");
// determine value field schema
ResourceFieldSchema valueSchema = config.valueConverter.getLoadSchema();
if (valueSchema == null) {
return null;
}
valueSchema.setName("value");
// return tuple schema
ResourceSchema resourceSchema = new ResourceSchema();
resourceSchema.setFields(new ResourceFieldSchema[] { keySchema, valueSchema });
return resourceSchema;
}
use of org.apache.pig.ResourceSchema in project elephant-bird by twitter.
the class VectorWritableConverter method checkSparseVectorEntriesSchema.
private void checkSparseVectorEntriesSchema(ResourceSchema entriesSchema) throws IOException {
// check entries.length == 1
assertNotNull(entriesSchema, "ResourceSchema of entries is null");
ResourceFieldSchema[] entriesFieldSchemas = entriesSchema.getFields();
assertNotNull(entriesFieldSchemas, "Tuple field schemas are null");
assertTupleLength(1, entriesFieldSchemas.length, "entries");
// check entries[0] == entry:tuple
assertFieldTypeEquals(DataType.TUPLE, entriesFieldSchemas[0].getType(), "entries[0]");
// check entries[0].length == 2
ResourceSchema entriesTupleSchema = entriesFieldSchemas[0].getSchema();
assertNotNull(entriesTupleSchema, "ResourceSchema of entries[0] is null");
ResourceFieldSchema[] entriesTupleFieldSchemas = entriesTupleSchema.getFields();
assertNotNull(entriesTupleFieldSchemas, "Tuple field schemas are null");
assertTupleLength(2, entriesTupleFieldSchemas.length, "entries[0]");
// check entries[0][0] == index:int
assertFieldTypeEquals(DataType.INTEGER, entriesTupleFieldSchemas[0].getType(), "entries[0][0]");
// check entries[0][1] == value:double
assertFieldTypeIsNumeric(entriesTupleFieldSchemas[1].getType(), "entries[0][1]");
}
use of org.apache.pig.ResourceSchema in project elephant-bird by twitter.
the class VectorWritableConverter method checkStoreSchema.
@Override
public void checkStoreSchema(ResourceFieldSchema schema) throws IOException {
assertFieldTypeEquals(DataType.TUPLE, schema.getType(), "tuple");
ResourceSchema vectorSchema = schema.getSchema();
assertNotNull(vectorSchema, "ResourceSchema for tuple is null");
ResourceFieldSchema[] vectorFieldSchemas = vectorSchema.getFields();
assertNotNull(vectorFieldSchemas, "Tuple field schemas are null");
if (vectorFieldSchemas.length == 1 && vectorFieldSchemas[0].getType() == DataType.BAG) {
// has to be sparse format
Preconditions.checkNotNull(cardinality, "Cardinality undefined");
checkSparseVectorEntriesSchema(vectorFieldSchemas[0].getSchema());
} else if (vectorFieldSchemas.length == 2 && vectorFieldSchemas[1].getType() == DataType.BAG) {
// has to be sparse format
Preconditions.checkState(cardinality == null, "Cardinality already defined");
// check tuple[0] == cardinality:int
assertFieldTypeEquals(DataType.INTEGER, vectorFieldSchemas[0].getType(), "tuple[0]");
checkSparseVectorEntriesSchema(vectorFieldSchemas[1].getSchema());
} else {
// has to be dense format
for (int i = 0; i < vectorFieldSchemas.length; ++i) {
assertFieldTypeIsNumeric(vectorFieldSchemas[i].getType(), "tuple[" + i + "]");
}
}
}
use of org.apache.pig.ResourceSchema in project parquet-mr by apache.
the class ParquetLoader method getSchema.
@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {
if (LOG.isDebugEnabled()) {
String jobToString = String.format("job[id=%s, name=%s]", job.getJobID(), job.getJobName());
LOG.debug("LoadMetadata.getSchema({}, {})", location, jobToString);
}
setInput(location, job);
return new ResourceSchema(schema);
}
Aggregations