use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project hive by apache.
the class PigHCatUtil method getBagSubSchema.
protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException {
// there are two cases - array<Type> and array<struct<...>>
// in either case the element type of the array is represented in a
// tuple field schema in the bag's field schema - the second case (struct)
// more naturally translates to the tuple - in the first case (array<Type>)
// we simulate the tuple by putting the single field in a tuple
Properties props = UDFContext.getUDFContext().getClientSystemProps();
String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT;
if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) {
innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME).replaceAll("FIELDNAME", hfs.getName());
}
String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT;
if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) {
innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME).replaceAll("FIELDNAME", hfs.getName());
}
ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName).setDescription("The tuple in the bag").setType(DataType.TUPLE);
HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
if (arrayElementFieldSchema.getType() == Type.STRUCT) {
bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
} else if (arrayElementFieldSchema.getType() == Type.ARRAY) {
ResourceSchema s = new ResourceSchema();
List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema));
s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()]));
bagSubFieldSchemas[0].setSchema(s);
} else {
ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName).setDescription("The inner field in the tuple in the bag").setType(getPigType(arrayElementFieldSchema)).setSchema(// the element type is not a tuple - so no subschema
null);
bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
}
return new ResourceSchema().setFields(bagSubFieldSchemas);
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project hive by apache.
the class TestPigHCatUtil method testGetBagSubSchemaConfigured.
@Test
public void testGetBagSubSchemaConfigured() throws Exception {
// NOTE: pig-0.8 sets client system properties by actually getting the client
// system properties. Starting in pig-0.9 you must pass the properties in.
// When updating our pig dependency this will need updated.
System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t");
System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple");
UDFContext.getUDFContext().setClientSystemProps(System.getProperties());
// Define the expected schema.
ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t").setDescription("The tuple in the bag").setType(DataType.TUPLE);
ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY);
bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
// Get the actual converted schema.
HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList(new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
HCatFieldSchema actualHCatFieldSchema = new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null);
ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema);
Assert.assertEquals(expected.toString(), actual.toString());
// Clean up System properties that were set by this test
System.clearProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME);
System.clearProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME);
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.
the class JSONPigReplace method substitute.
/*
* Returns result of substituting pig objects in Tuple t into
* initStr
*
* @param Tuple t : Pig tuple containing pig objects
* @param Object s : Schema representing Tuple t
* @param String un : String to represent un-named Schema Fields
*
* @return Array of BasicBSONObjects that contain all replacements for "marked" strings
*/
public BasicBSONObject[] substitute(final Tuple t, final Object s, final String un) throws Exception {
unnamedStr = un;
final ResourceFieldSchema[] fields;
try {
final ResourceSchema schema;
if (s instanceof String) {
schema = new ResourceSchema(Utils.getSchemaFromString((String) s));
} else if (s instanceof Schema) {
schema = new ResourceSchema((Schema) s);
} else if (s instanceof ResourceSchema) {
schema = (ResourceSchema) s;
} else {
throw new IllegalArgumentException("Schema must be represented either by a string or a Schema " + "object, not " + s);
}
fields = schema.getFields();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid Schema Format", e);
}
// Make Tuple t into BSONObject using schema provided and store result in pObj
final BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
for (int i = 0; i < fields.length; i++) {
writeField(builder, fields[i], t.get(i));
}
// BSONObject that represents Pig Tuple input using Pig Schema
BasicBSONObject pObj = (BasicBSONObject) builder.get();
// fill map of replacement strings to corresponding objects to replace these strings with
fillReplacementMap(pObj);
// Now, replace replacement strings (of form $elem) with corresponding objects in pObj
return replaceAll(initBSONs, reps);
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project eiger by wlloyd.
the class CassandraStorage method getSchema.
public ResourceSchema getSchema(String location, Job job) throws IOException {
setLocation(location, job);
CfDef cfDef = getCfDef(loadSignature);
if (cfDef.column_type.equals("Super"))
return null;
// top-level schema, no type
ResourceSchema schema = new ResourceSchema();
// get default marshallers and validators
List<AbstractType> marshallers = getDefaultMarshallers(cfDef);
Map<ByteBuffer, AbstractType> validators = getValidatorMap(cfDef);
// add key
ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
keyFieldSchema.setName("key");
keyFieldSchema.setType(getPigType(marshallers.get(2)));
// will become the bag of tuples
ResourceFieldSchema bagFieldSchema = new ResourceFieldSchema();
bagFieldSchema.setName("columns");
bagFieldSchema.setType(DataType.BAG);
ResourceSchema bagSchema = new ResourceSchema();
List<ResourceFieldSchema> tupleFields = new ArrayList<ResourceFieldSchema>();
// default comparator/validator
ResourceSchema innerTupleSchema = new ResourceSchema();
ResourceFieldSchema tupleField = new ResourceFieldSchema();
tupleField.setType(DataType.TUPLE);
tupleField.setSchema(innerTupleSchema);
ResourceFieldSchema colSchema = new ResourceFieldSchema();
colSchema.setName("name");
colSchema.setType(getPigType(marshallers.get(0)));
tupleFields.add(colSchema);
ResourceFieldSchema valSchema = new ResourceFieldSchema();
AbstractType validator = marshallers.get(1);
valSchema.setName("value");
valSchema.setType(getPigType(validator));
tupleFields.add(valSchema);
// defined validators/indexes
for (ColumnDef cdef : cfDef.column_metadata) {
colSchema = new ResourceFieldSchema();
colSchema.setName(new String(cdef.getName()));
colSchema.setType(getPigType(marshallers.get(0)));
tupleFields.add(colSchema);
valSchema = new ResourceFieldSchema();
validator = validators.get(cdef.getName());
if (validator == null)
validator = marshallers.get(1);
valSchema.setName("value");
valSchema.setType(getPigType(validator));
tupleFields.add(valSchema);
}
innerTupleSchema.setFields(tupleFields.toArray(new ResourceFieldSchema[tupleFields.size()]));
// a bag can contain only one tuple, but that tuple can contain anything
bagSchema.setFields(new ResourceFieldSchema[] { tupleField });
bagFieldSchema.setSchema(bagSchema);
// top level schema contains everything
schema.setFields(new ResourceFieldSchema[] { keyFieldSchema, bagFieldSchema });
return schema;
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project phoenix by apache.
the class PhoenixPigSchemaUtil method getResourceSchema.
public static ResourceSchema getResourceSchema(final Configuration configuration, Dependencies dependencies) throws IOException {
final ResourceSchema schema = new ResourceSchema();
try {
List<ColumnInfo> columns = null;
final SchemaType schemaType = PhoenixConfigurationUtil.getSchemaType(configuration);
if (SchemaType.QUERY.equals(schemaType)) {
final String sqlQuery = PhoenixConfigurationUtil.getSelectStatement(configuration);
Preconditions.checkNotNull(sqlQuery, "No Sql Query exists within the configuration");
final SqlQueryToColumnInfoFunction function = new SqlQueryToColumnInfoFunction(configuration);
columns = function.apply(sqlQuery);
} else {
columns = dependencies.getSelectColumnMetadataList(configuration);
}
ResourceFieldSchema[] fields = new ResourceFieldSchema[columns.size()];
int i = 0;
for (ColumnInfo cinfo : columns) {
int sqlType = cinfo.getSqlType();
PDataType phoenixDataType = PDataType.fromTypeId(sqlType);
byte pigType = TypeUtil.getPigDataTypeForPhoenixType(phoenixDataType);
ResourceFieldSchema field = new ResourceFieldSchema();
field.setType(pigType).setName(cinfo.getDisplayName());
fields[i++] = field;
}
schema.setFields(fields);
} catch (SQLException sqle) {
LOG.error(String.format("Error: SQLException [%s] ", sqle.getMessage()));
throw new IOException(sqle);
}
return schema;
}
Aggregations