use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.
the class JSONPigReplace method substitute.
/*
* Returns result of substituting pig objects in Tuple t into
* initStr
*
* @param Tuple t : Pig tuple containing pig objects
* @param Object s : Schema representing Tuple t
* @param String un : String to represent un-named Schema Fields
*
* @return Array of BasicBSONObjects that contain all replacements for "marked" strings
*/
public BasicBSONObject[] substitute(final Tuple t, final Object s, final String un) throws Exception {
unnamedStr = un;
final ResourceFieldSchema[] fields;
try {
final ResourceSchema schema;
if (s instanceof String) {
schema = new ResourceSchema(Utils.getSchemaFromString((String) s));
} else if (s instanceof Schema) {
schema = new ResourceSchema((Schema) s);
} else if (s instanceof ResourceSchema) {
schema = (ResourceSchema) s;
} else {
throw new IllegalArgumentException("Schema must be represented either by a string or a Schema " + "object, not " + s);
}
fields = schema.getFields();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid Schema Format", e);
}
// Make Tuple t into BSONObject using schema provided and store result in pObj
final BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
for (int i = 0; i < fields.length; i++) {
writeField(builder, fields[i], t.get(i));
}
// BSONObject that represents Pig Tuple input using Pig Schema
BasicBSONObject pObj = (BasicBSONObject) builder.get();
// fill map of replacement strings to corresponding objects to replace these strings with
fillReplacementMap(pObj);
// Now, replace replacement strings (of form $elem) with corresponding objects in pObj
return replaceAll(initBSONs, reps);
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project eiger by wlloyd.
the class CassandraStorage method getSchema.
public ResourceSchema getSchema(String location, Job job) throws IOException {
setLocation(location, job);
CfDef cfDef = getCfDef(loadSignature);
if (cfDef.column_type.equals("Super"))
return null;
// top-level schema, no type
ResourceSchema schema = new ResourceSchema();
// get default marshallers and validators
List<AbstractType> marshallers = getDefaultMarshallers(cfDef);
Map<ByteBuffer, AbstractType> validators = getValidatorMap(cfDef);
// add key
ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
keyFieldSchema.setName("key");
keyFieldSchema.setType(getPigType(marshallers.get(2)));
// will become the bag of tuples
ResourceFieldSchema bagFieldSchema = new ResourceFieldSchema();
bagFieldSchema.setName("columns");
bagFieldSchema.setType(DataType.BAG);
ResourceSchema bagSchema = new ResourceSchema();
List<ResourceFieldSchema> tupleFields = new ArrayList<ResourceFieldSchema>();
// default comparator/validator
ResourceSchema innerTupleSchema = new ResourceSchema();
ResourceFieldSchema tupleField = new ResourceFieldSchema();
tupleField.setType(DataType.TUPLE);
tupleField.setSchema(innerTupleSchema);
ResourceFieldSchema colSchema = new ResourceFieldSchema();
colSchema.setName("name");
colSchema.setType(getPigType(marshallers.get(0)));
tupleFields.add(colSchema);
ResourceFieldSchema valSchema = new ResourceFieldSchema();
AbstractType validator = marshallers.get(1);
valSchema.setName("value");
valSchema.setType(getPigType(validator));
tupleFields.add(valSchema);
// defined validators/indexes
for (ColumnDef cdef : cfDef.column_metadata) {
colSchema = new ResourceFieldSchema();
colSchema.setName(new String(cdef.getName()));
colSchema.setType(getPigType(marshallers.get(0)));
tupleFields.add(colSchema);
valSchema = new ResourceFieldSchema();
validator = validators.get(cdef.getName());
if (validator == null)
validator = marshallers.get(1);
valSchema.setName("value");
valSchema.setType(getPigType(validator));
tupleFields.add(valSchema);
}
innerTupleSchema.setFields(tupleFields.toArray(new ResourceFieldSchema[tupleFields.size()]));
// a bag can contain only one tuple, but that tuple can contain anything
bagSchema.setFields(new ResourceFieldSchema[] { tupleField });
bagFieldSchema.setSchema(bagSchema);
// top level schema contains everything
schema.setFields(new ResourceFieldSchema[] { keyFieldSchema, bagFieldSchema });
return schema;
}
Aggregations