Search in sources :

Example 11 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project hive by apache.

the class TestHiveAccumuloTableOutputFormat method testWriteToMockInstanceWithVisibility.

@Test
public void testWriteToMockInstanceWithVisibility() throws Exception {
    Instance inst = new MockInstance(test.getMethodName());
    Connector conn = inst.getConnector("root", new PasswordToken(""));
    Authorizations auths = new Authorizations("foo");
    conn.securityOperations().changeUserAuthorizations("root", auths);
    HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
    String table = test.getMethodName();
    conn.tableOperations().create(table);
    JobConf conf = new JobConf();
    conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
    conf.set(AccumuloConnectionParameters.USER_NAME, "root");
    conf.set(AccumuloConnectionParameters.USER_PASS, "");
    conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
    conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
    FileSystem local = FileSystem.getLocal(conf);
    outputFormat.checkOutputSpecs(local, conf);
    RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
    List<String> names = Arrays.asList("row", "col1", "col2");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), accumuloSerDeParams.getRowIdFactory());
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo), serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
    ByteArrayRef bytes = new ByteArrayRef();
    bytes.setData("row value1 value2".getBytes());
    struct.init(bytes, 0, bytes.getData().length);
    // Serialize the struct into a mutation
    Mutation m = serializer.serialize(struct, structOI);
    // Write the mutation
    recordWriter.write(new Text(table), m);
    // Close the writer
    recordWriter.close(null);
    Iterator<Entry<Key, Value>> iter = conn.createScanner(table, auths).iterator();
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    Entry<Key, Value> entry = iter.next();
    Key k = entry.getKey();
    Value v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq1", k.getColumnQualifier().toString());
    Assert.assertEquals("foo", k.getColumnVisibility().toString());
    Assert.assertEquals("value1", new String(v.get()));
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    entry = iter.next();
    k = entry.getKey();
    v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq2", k.getColumnQualifier().toString());
    Assert.assertEquals("foo", k.getColumnVisibility().toString());
    Assert.assertEquals("value2", new String(v.get()));
    Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Configuration(org.apache.hadoop.conf.Configuration) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) Properties(java.util.Properties) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) Entry(java.util.Map.Entry) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) FileSystem(org.apache.hadoop.fs.FileSystem) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) JobConf(org.apache.hadoop.mapred.JobConf) AccumuloRowSerializer(org.apache.hadoop.hive.accumulo.serde.AccumuloRowSerializer) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Authorizations(org.apache.accumulo.core.security.Authorizations) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) Value(org.apache.accumulo.core.data.Value) Mutation(org.apache.accumulo.core.data.Mutation) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 12 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project hive by apache.

the class TestHiveAccumuloTableOutputFormat method testBinarySerializationOnStringFallsBackToUtf8.

@Test
public void testBinarySerializationOnStringFallsBackToUtf8() throws Exception {
    Instance inst = new MockInstance(test.getMethodName());
    Connector conn = inst.getConnector("root", new PasswordToken(""));
    HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
    String table = test.getMethodName();
    conn.tableOperations().create(table);
    JobConf conf = new JobConf();
    conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
    conf.set(AccumuloConnectionParameters.USER_NAME, "root");
    conf.set(AccumuloConnectionParameters.USER_PASS, "");
    conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
    conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
    FileSystem local = FileSystem.getLocal(conf);
    outputFormat.checkOutputSpecs(local, conf);
    RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
    List<String> names = Arrays.asList("row", "col1", "col2");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    tableProperties.setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName());
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, accumuloSerDeParams.getRowIdFactory());
    TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), Arrays.asList(stringTypeInfo, stringTypeInfo, stringTypeInfo), serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
    ByteArrayRef bytes = new ByteArrayRef();
    bytes.setData("row value1 value2".getBytes());
    struct.init(bytes, 0, bytes.getData().length);
    // Serialize the struct into a mutation
    Mutation m = serializer.serialize(struct, structOI);
    // Write the mutation
    recordWriter.write(new Text(table), m);
    // Close the writer
    recordWriter.close(null);
    Iterator<Entry<Key, Value>> iter = conn.createScanner(table, new Authorizations()).iterator();
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    Entry<Key, Value> entry = iter.next();
    Key k = entry.getKey();
    Value v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq1", k.getColumnQualifier().toString());
    Assert.assertEquals("", k.getColumnVisibility().toString());
    Assert.assertEquals("value1", new String(v.get()));
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    entry = iter.next();
    k = entry.getKey();
    v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq2", k.getColumnQualifier().toString());
    Assert.assertEquals("", k.getColumnVisibility().toString());
    Assert.assertEquals("value2", new String(v.get()));
    Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Configuration(org.apache.hadoop.conf.Configuration) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) Properties(java.util.Properties) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) Entry(java.util.Map.Entry) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) AccumuloRowSerializer(org.apache.hadoop.hive.accumulo.serde.AccumuloRowSerializer) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Authorizations(org.apache.accumulo.core.security.Authorizations) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) Value(org.apache.accumulo.core.data.Value) Mutation(org.apache.accumulo.core.data.Mutation) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 13 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project presto by prestodb.

the class RecordFileWriter method initializeSerializer.

@SuppressWarnings("deprecation")
private static Serializer initializeSerializer(Configuration conf, Properties properties, String serializerName) {
    try {
        Serializer result = (Serializer) Class.forName(serializerName).getConstructor().newInstance();
        result.initialize(conf, properties);
        return result;
    } catch (SerDeException | ReflectiveOperationException e) {
        throw Throwables.propagate(e);
    }
}
Also used : SerDeException(org.apache.hadoop.hive.serde2.SerDeException) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 14 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project phoenix by apache.

the class PhoenixSerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    tableProperties = tbl;
    if (LOG.isDebugEnabled()) {
        LOG.debug("SerDe initialize : " + tbl.getProperty("name"));
    }
    serdeParams = new LazySerDeParameters(conf, tbl, getClass().getName());
    objectInspector = createLazyPhoenixInspector(conf, tbl);
    String inOutWork = tbl.getProperty(PhoenixStorageHandlerConstants.IN_OUT_WORK);
    if (inOutWork == null) {
        return;
    }
    serializer = new PhoenixSerializer(conf, tbl);
    row = new PhoenixRow(serdeParams.getColumnNames());
}
Also used : LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)

Example 15 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project hive by apache.

the class AvroSerializer method serialize.

// Hive is pretty simple (read: stupid) in writing out values via the serializer.
// We're just going to go through, matching indices.  Hive formats normally
// handle mismatches with null.  We don't have that option, so instead we'll
// end up throwing an exception for invalid records.
public Writable serialize(Object o, ObjectInspector objectInspector, List<String> columnNames, List<TypeInfo> columnTypes, Schema schema) throws AvroSerdeException {
    StructObjectInspector soi = (StructObjectInspector) objectInspector;
    GenericData.Record record = new GenericData.Record(schema);
    List<? extends StructField> outputFieldRefs = soi.getAllStructFieldRefs();
    if (outputFieldRefs.size() != columnNames.size()) {
        throw new AvroSerdeException("Number of input columns was different than output columns (in = " + columnNames.size() + " vs out = " + outputFieldRefs.size());
    }
    int size = schema.getFields().size();
    if (outputFieldRefs.size() != size) {
        throw new AvroSerdeException("Hive passed in a different number of fields than the schema expected: (Hive wanted " + outputFieldRefs.size() + ", Avro expected " + schema.getFields().size());
    }
    List<? extends StructField> allStructFieldRefs = soi.getAllStructFieldRefs();
    List<Object> structFieldsDataAsList = soi.getStructFieldsDataAsList(o);
    for (int i = 0; i < size; i++) {
        Field field = schema.getFields().get(i);
        TypeInfo typeInfo = columnTypes.get(i);
        StructField structFieldRef = allStructFieldRefs.get(i);
        Object structFieldData = structFieldsDataAsList.get(i);
        ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector();
        Object val = serialize(typeInfo, fieldOI, structFieldData, field.schema());
        record.put(field.name(), val);
    }
    if (!GenericData.get().validate(schema, record)) {
        throw new SerializeToAvroException(schema, record);
    }
    cache.setRecord(record);
    return cache;
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) GenericData(org.apache.avro.generic.GenericData) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) Field(org.apache.avro.Schema.Field) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)11 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)10 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)10 Properties (java.util.Properties)9 Test (org.junit.Test)9 Mutation (org.apache.accumulo.core.data.Mutation)8 Configuration (org.apache.hadoop.conf.Configuration)8 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)8 LazyStruct (org.apache.hadoop.hive.serde2.lazy.LazyStruct)8 JobConf (org.apache.hadoop.mapred.JobConf)8 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)7 Serializer (org.apache.hadoop.hive.serde2.Serializer)7 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)6 ArrayList (java.util.ArrayList)5 ColumnVisibility (org.apache.accumulo.core.security.ColumnVisibility)5 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)5 Text (org.apache.hadoop.io.Text)5 Entry (java.util.Map.Entry)4 Connector (org.apache.accumulo.core.client.Connector)4 Instance (org.apache.accumulo.core.client.Instance)4