use of org.apache.pig.ResourceSchema in project mongo-hadoop by mongodb.
the class MongoStorage method writeField.
protected void writeField(final BasicDBObjectBuilder builder, final ResourceSchema.ResourceFieldSchema field, final Object d) throws IOException {
// If the field is missing or the value is null, write a null
if (d == null) {
builder.add(field.getName(), null);
return;
}
ResourceSchema s = field.getSchema();
// Based on the field's type, write it out
byte i = field.getType();
if (i == DataType.INTEGER) {
builder.add(field.getName(), d);
} else if (i == DataType.LONG) {
builder.add(field.getName(), d);
} else if (i == DataType.FLOAT) {
builder.add(field.getName(), d);
} else if (i == DataType.DOUBLE) {
builder.add(field.getName(), d);
} else if (i == DataType.BYTEARRAY) {
builder.add(field.getName(), d.toString());
} else if (i == DataType.CHARARRAY) {
builder.add(field.getName(), d);
} else if (i == DataType.TUPLE) {
// Given a TUPLE, create a Map so BSONEncoder will eat it
if (s == null) {
throw new IOException("Schemas must be fully specified to use this storage function. No schema found for field " + field.getName());
}
ResourceFieldSchema[] fs = s.getFields();
Map<String, Object> m = new LinkedHashMap<String, Object>();
for (int j = 0; j < fs.length; j++) {
m.put(fs[j].getName(), ((Tuple) d).get(j));
}
builder.add(field.getName(), (Map) m);
} else if (i == DataType.BAG) {
// Given a BAG, create an Array so BSONEncoder will eat it.
ResourceFieldSchema[] fs;
if (s == null) {
throw new IOException("Schemas must be fully specified to use this storage function. No schema found for field " + field.getName());
}
fs = s.getFields();
if (fs.length != 1 || fs[0].getType() != DataType.TUPLE) {
throw new IOException("Found a bag without a tuple " + "inside!");
}
// Drill down the next level to the tuple's schema.
s = fs[0].getSchema();
if (s == null) {
throw new IOException("Schemas must be fully specified to use this storage function. No schema found for field " + field.getName());
}
fs = s.getFields();
List<Map<String, Object>> a = new ArrayList<Map<String, Object>>();
for (Tuple t : (DataBag) d) {
Map<String, Object> ma = new LinkedHashMap<String, Object>();
for (int j = 0; j < fs.length; j++) {
ma.put(fs[j].getName(), t.get(j));
}
a.add(ma);
}
builder.add(field.getName(), a);
} else if (i == DataType.MAP) {
Map map = (Map) d;
for (Object key : map.keySet()) {
builder.add(key.toString(), map.get(key));
}
}
}
use of org.apache.pig.ResourceSchema in project mongo-hadoop by mongodb.
the class MongoStorage method prepareToWrite.
public void prepareToWrite(final RecordWriter writer) throws IOException {
recordWriter = (MongoRecordWriter) writer;
LOG.info("Preparing to write to " + recordWriter);
if (recordWriter == null) {
throw new IOException("Invalid Record Writer");
}
// Parse the schema from the string stored in the properties object.
UDFContext udfc = UDFContext.getUDFContext();
Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfContextSignature });
String strSchema = p.getProperty(PIG_OUTPUT_SCHEMA_UDF_CONTEXT);
if (strSchema == null) {
throw new IOException("Could not find schema in UDF context");
}
try {
// Parse the schema from the string stored in the properties object.
this.schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
if (options != null) {
// If we are insuring any indexes do so now:
for (MongoStorageOptions.Index in : options.getIndexes()) {
recordWriter.ensureIndex(in.index, in.options);
}
}
}
use of org.apache.pig.ResourceSchema in project mongo-hadoop by mongodb.
the class MongoStorageTest method testMap.
@Test
public void testMap() throws Exception {
MongoStorage ms = new MongoStorage();
BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
ResourceSchema schema = new ResourceSchema(Utils.getSchemaFromString("m:map[]"));
Map<String, Object> val = new HashMap<String, Object>();
val.put("f1", 1);
val.put("f2", "2");
ms.writeField(builder, schema.getFields()[0], val);
DBObject out = builder.get();
Set<String> outKeySet = out.keySet();
assertEquals(2, outKeySet.size());
assertEquals(1, out.get("f1"));
assertEquals("2", out.get("f2"));
}
use of org.apache.pig.ResourceSchema in project phoenix by apache.
the class PhoenixPigSchemaUtilTest method testSchema.
@Test
public void testSchema() throws SQLException, IOException {
final Configuration configuration = mock(Configuration.class);
when(configuration.get(PhoenixConfigurationUtil.SCHEMA_TYPE)).thenReturn(SchemaType.TABLE.name());
final ResourceSchema actual = PhoenixPigSchemaUtil.getResourceSchema(configuration, new Dependencies() {
List<ColumnInfo> getSelectColumnMetadataList(Configuration configuration) throws SQLException {
return Lists.newArrayList(ID_COLUMN, NAME_COLUMN);
}
});
// expected schema.
final ResourceFieldSchema[] fields = new ResourceFieldSchema[2];
fields[0] = new ResourceFieldSchema().setName("ID").setType(DataType.LONG);
fields[1] = new ResourceFieldSchema().setName("NAME").setType(DataType.CHARARRAY);
final ResourceSchema expected = new ResourceSchema().setFields(fields);
assertEquals(expected.toString(), actual.toString());
}
use of org.apache.pig.ResourceSchema in project hive by apache.
the class HCatBaseStorer method checkSchema.
@Override
public void checkSchema(ResourceSchema resourceSchema) throws IOException {
/* Schema provided by user and the schema computed by Pig
* at the time of calling store must match.
*/
Schema runtimeSchema = Schema.getPigSchema(resourceSchema);
if (pigSchema != null) {
if (!Schema.equals(runtimeSchema, pigSchema, false, true)) {
throw new FrontendException("Schema provided in store statement doesn't match with the Schema" + "returned by Pig run-time. Schema provided in HCatStorer: " + pigSchema.toString() + " Schema received from Pig runtime: " + runtimeSchema.toString(), PigHCatUtil.PIG_EXCEPTION_CODE);
}
} else {
pigSchema = runtimeSchema;
}
UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] { sign }).setProperty(PIG_SCHEMA, ObjectSerializer.serialize(pigSchema));
}
Aggregations