use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project gora by apache.
the class SchemaUtils method checkStoreSchema.
/* ============================= CHECK PIG SCHEMA =================================== */
/**
* Method that performs a check on the Pig Schema that will be used to store. This is, when executing:
* <pre>
* STORE relation_data INTO '.' USING org.apache.gora.pig.GoraStorage(
* 'java.lang.String',
* 'admin.WebPage',
* 'baseUrl,status')
* </pre>
* the relation's schema is checked before storing.
* If there is any error, throws an IOException.
*
* The check consist on checking the "key" field, and that all fields declared in the STORE command
* are present in the pig relation.
*
* @param pigSchema - The Pig Schema to check.
* @param queryFields - Declared storing fields ("baseUrl" and "status" in the example).
* @param persistentSchema - The schema of the Persistent entity ("admin.WebPage" in the example).
* @throws IOException
*/
public static void checkStoreSchema(ResourceSchema pigSchema, List<String> queryFields, Schema persistentSchema) throws IOException {
// Expected pig schema: tuple (key, recordfield, recordfield, recordfi...)
ResourceFieldSchema[] pigFieldsSchemas = pigSchema.getFields();
String persistentClassName = persistentSchema.getFullName();
List<String> pigFieldSchemasNames = new ArrayList<String>(Arrays.asList(pigSchema.fieldNames()));
if (!pigFieldSchemasNames.contains("key")) {
LOG.info("Declared Pig fields: " + String.join(",", pigFieldSchemasNames));
throw new IOException("Expected a field called \"key\", but not found.");
}
// All fields are mandatory
if (pigFieldSchemasNames.containsAll(queryFields)) {
for (ResourceFieldSchema pigFieldSchema : pigFieldsSchemas) {
if (queryFields.contains(pigFieldSchema.getName())) {
Field persistentField = persistentSchema.getField(pigFieldSchema.getName());
if (persistentField == null) {
throw new IOException("Declared field in Pig [" + pigFieldSchema.getName() + "] to store does not exists." + persistentClassName + ".");
}
checkEqualSchema(pigFieldSchema, persistentSchema.getField(pigFieldSchema.getName()).schema());
}
}
} else {
throw new IOException("Some fields declared in the constructor (" + String.join(",", queryFields) + ") are missing in the tuples to be saved (" + Arrays.toString(pigSchema.fieldNames()) + ")");
}
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project phoenix by apache.
the class PhoenixPigSchemaUtil method getResourceSchema.
public static ResourceSchema getResourceSchema(final Configuration configuration, Dependencies dependencies) throws IOException {
final ResourceSchema schema = new ResourceSchema();
try {
List<ColumnInfo> columns = null;
final SchemaType schemaType = PhoenixConfigurationUtil.getSchemaType(configuration);
if (SchemaType.QUERY.equals(schemaType)) {
final String sqlQuery = PhoenixConfigurationUtil.getSelectStatement(configuration);
Preconditions.checkNotNull(sqlQuery, "No Sql Query exists within the configuration");
final SqlQueryToColumnInfoFunction function = new SqlQueryToColumnInfoFunction(configuration);
columns = function.apply(sqlQuery);
} else {
columns = dependencies.getSelectColumnMetadataList(configuration);
}
ResourceFieldSchema[] fields = new ResourceFieldSchema[columns.size()];
int i = 0;
for (ColumnInfo cinfo : columns) {
int sqlType = cinfo.getSqlType();
PDataType phoenixDataType = PDataType.fromTypeId(sqlType);
byte pigType = TypeUtil.getPigDataTypeForPhoenixType(phoenixDataType);
ResourceFieldSchema field = new ResourceFieldSchema();
field.setType(pigType).setName(cinfo.getDisplayName());
fields[i++] = field;
}
schema.setFields(fields);
} catch (SQLException sqle) {
LOG.error(String.format("Error: SQLException [%s] ", sqle.getMessage()));
throw new IOException(sqle);
}
return schema;
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project elephant-bird by twitter.
the class BytesWritableConverter method getLoadSchema.
@Override
public ResourceFieldSchema getLoadSchema() throws IOException {
ResourceFieldSchema schema = new ResourceFieldSchema();
schema.setType(DataType.BYTEARRAY);
return schema;
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project elephant-bird by twitter.
the class IntWritableConverter method getLoadSchema.
@Override
public ResourceFieldSchema getLoadSchema() throws IOException {
ResourceFieldSchema schema = new ResourceFieldSchema();
schema.setType(DataType.INTEGER);
return schema;
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project elephant-bird by twitter.
the class LongWritableConverter method getLoadSchema.
@Override
public ResourceFieldSchema getLoadSchema() throws IOException {
ResourceFieldSchema schema = new ResourceFieldSchema();
schema.setType(DataType.LONG);
return schema;
}
Aggregations