Search in sources :

Example 6 with ResourceSchema

use of org.apache.pig.ResourceSchema in project mongo-hadoop by mongodb.

the class MongoStorage method writeField.

protected void writeField(final BasicDBObjectBuilder builder, final ResourceSchema.ResourceFieldSchema field, final Object d) throws IOException {
    // If the field is missing or the value is null, write a null
    if (d == null) {
        builder.add(field.getName(), null);
        return;
    }
    ResourceSchema s = field.getSchema();
    // Based on the field's type, write it out
    byte i = field.getType();
    if (i == DataType.INTEGER) {
        builder.add(field.getName(), d);
    } else if (i == DataType.LONG) {
        builder.add(field.getName(), d);
    } else if (i == DataType.FLOAT) {
        builder.add(field.getName(), d);
    } else if (i == DataType.DOUBLE) {
        builder.add(field.getName(), d);
    } else if (i == DataType.BYTEARRAY) {
        builder.add(field.getName(), d.toString());
    } else if (i == DataType.CHARARRAY) {
        builder.add(field.getName(), d);
    } else if (i == DataType.TUPLE) {
        // Given a TUPLE, create a Map so BSONEncoder will eat it
        if (s == null) {
            throw new IOException("Schemas must be fully specified to use this storage function.  No schema found for field " + field.getName());
        }
        ResourceFieldSchema[] fs = s.getFields();
        Map<String, Object> m = new LinkedHashMap<String, Object>();
        for (int j = 0; j < fs.length; j++) {
            m.put(fs[j].getName(), ((Tuple) d).get(j));
        }
        builder.add(field.getName(), (Map) m);
    } else if (i == DataType.BAG) {
        // Given a BAG, create an Array so BSONEncoder will eat it.
        ResourceFieldSchema[] fs;
        if (s == null) {
            throw new IOException("Schemas must be fully specified to use this storage function.  No schema found for field " + field.getName());
        }
        fs = s.getFields();
        if (fs.length != 1 || fs[0].getType() != DataType.TUPLE) {
            throw new IOException("Found a bag without a tuple " + "inside!");
        }
        // Drill down the next level to the tuple's schema.
        s = fs[0].getSchema();
        if (s == null) {
            throw new IOException("Schemas must be fully specified to use this storage function.  No schema found for field " + field.getName());
        }
        fs = s.getFields();
        List<Map<String, Object>> a = new ArrayList<Map<String, Object>>();
        for (Tuple t : (DataBag) d) {
            Map<String, Object> ma = new LinkedHashMap<String, Object>();
            for (int j = 0; j < fs.length; j++) {
                ma.put(fs[j].getName(), t.get(j));
            }
            a.add(ma);
        }
        builder.add(field.getName(), a);
    } else if (i == DataType.MAP) {
        Map map = (Map) d;
        for (Object key : map.keySet()) {
            builder.add(key.toString(), map.get(key));
        }
    }
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) IOException(java.io.IOException) LinkedHashMap(java.util.LinkedHashMap) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ArrayList(java.util.ArrayList) List(java.util.List) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) Tuple(org.apache.pig.data.Tuple)

Example 7 with ResourceSchema

use of org.apache.pig.ResourceSchema in project mongo-hadoop by mongodb.

the class MongoStorage method prepareToWrite.

public void prepareToWrite(final RecordWriter writer) throws IOException {
    recordWriter = (MongoRecordWriter) writer;
    LOG.info("Preparing to write to " + recordWriter);
    if (recordWriter == null) {
        throw new IOException("Invalid Record Writer");
    }
    // Parse the schema from the string stored in the properties object.
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfContextSignature });
    String strSchema = p.getProperty(PIG_OUTPUT_SCHEMA_UDF_CONTEXT);
    if (strSchema == null) {
        throw new IOException("Could not find schema in UDF context");
    }
    try {
        // Parse the schema from the string stored in the properties object.
        this.schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));
    } catch (Exception e) {
        LOG.error(e.getMessage(), e);
    }
    if (options != null) {
        // If we are insuring any indexes do so now:
        for (MongoStorageOptions.Index in : options.getIndexes()) {
            recordWriter.ensureIndex(in.index, in.options);
        }
    }
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) UDFContext(org.apache.pig.impl.util.UDFContext) IOException(java.io.IOException) Properties(java.util.Properties) ParseException(java.text.ParseException) IOException(java.io.IOException)

Example 8 with ResourceSchema

use of org.apache.pig.ResourceSchema in project mongo-hadoop by mongodb.

the class MongoStorageTest method testMap.

@Test
public void testMap() throws Exception {
    MongoStorage ms = new MongoStorage();
    BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
    ResourceSchema schema = new ResourceSchema(Utils.getSchemaFromString("m:map[]"));
    Map<String, Object> val = new HashMap<String, Object>();
    val.put("f1", 1);
    val.put("f2", "2");
    ms.writeField(builder, schema.getFields()[0], val);
    DBObject out = builder.get();
    Set<String> outKeySet = out.keySet();
    assertEquals(2, outKeySet.size());
    assertEquals(1, out.get("f1"));
    assertEquals("2", out.get("f2"));
}
Also used : BasicDBObjectBuilder(com.mongodb.BasicDBObjectBuilder) ResourceSchema(org.apache.pig.ResourceSchema) HashMap(java.util.HashMap) DBObject(com.mongodb.DBObject) DBObject(com.mongodb.DBObject) Test(org.junit.Test)

Example 9 with ResourceSchema

use of org.apache.pig.ResourceSchema in project phoenix by apache.

the class PhoenixPigSchemaUtilTest method testSchema.

@Test
public void testSchema() throws SQLException, IOException {
    final Configuration configuration = mock(Configuration.class);
    when(configuration.get(PhoenixConfigurationUtil.SCHEMA_TYPE)).thenReturn(SchemaType.TABLE.name());
    final ResourceSchema actual = PhoenixPigSchemaUtil.getResourceSchema(configuration, new Dependencies() {

        List<ColumnInfo> getSelectColumnMetadataList(Configuration configuration) throws SQLException {
            return Lists.newArrayList(ID_COLUMN, NAME_COLUMN);
        }
    });
    // expected schema.
    final ResourceFieldSchema[] fields = new ResourceFieldSchema[2];
    fields[0] = new ResourceFieldSchema().setName("ID").setType(DataType.LONG);
    fields[1] = new ResourceFieldSchema().setName("NAME").setType(DataType.CHARARRAY);
    final ResourceSchema expected = new ResourceSchema().setFields(fields);
    assertEquals(expected.toString(), actual.toString());
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) Configuration(org.apache.hadoop.conf.Configuration) SQLException(java.sql.SQLException) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) List(java.util.List) Dependencies(org.apache.phoenix.pig.util.PhoenixPigSchemaUtil.Dependencies) Test(org.junit.Test)

Example 10 with ResourceSchema

use of org.apache.pig.ResourceSchema in project hive by apache.

the class HCatBaseStorer method checkSchema.

@Override
public void checkSchema(ResourceSchema resourceSchema) throws IOException {
    /*  Schema provided by user and the schema computed by Pig
    * at the time of calling store must match.
    */
    Schema runtimeSchema = Schema.getPigSchema(resourceSchema);
    if (pigSchema != null) {
        if (!Schema.equals(runtimeSchema, pigSchema, false, true)) {
            throw new FrontendException("Schema provided in store statement doesn't match with the Schema" + "returned by Pig run-time. Schema provided in HCatStorer: " + pigSchema.toString() + " Schema received from Pig runtime: " + runtimeSchema.toString(), PigHCatUtil.PIG_EXCEPTION_CODE);
        }
    } else {
        pigSchema = runtimeSchema;
    }
    UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] { sign }).setProperty(PIG_SCHEMA, ObjectSerializer.serialize(pigSchema));
}
Also used : HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) ResourceSchema(org.apache.pig.ResourceSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException)

Aggregations

ResourceSchema (org.apache.pig.ResourceSchema)18 ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)11 IOException (java.io.IOException)8 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 ArrayList (java.util.ArrayList)5 Properties (java.util.Properties)5 Test (org.junit.Test)5 UDFContext (org.apache.pig.impl.util.UDFContext)4 HashMap (java.util.HashMap)3 List (java.util.List)3 Map (java.util.Map)3 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)3 Tuple (org.apache.pig.data.Tuple)3 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)2 SQLException (java.sql.SQLException)2 LinkedHashMap (java.util.LinkedHashMap)2 DataBag (org.apache.pig.data.DataBag)2 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)2 BasicBSONObject (org.bson.BasicBSONObject)2 DateTime (org.joda.time.DateTime)2