Search in sources :

Example 16 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project hive by apache.

the class AccumuloSerDe method initialize.

public void initialize(Configuration conf, Properties properties) throws SerDeException {
    accumuloSerDeParameters = new AccumuloSerDeParameters(conf, properties, getClass().getName());
    final LazySerDeParameters serDeParams = accumuloSerDeParameters.getSerDeParameters();
    final List<ColumnMapping> mappings = accumuloSerDeParameters.getColumnMappings();
    final List<TypeInfo> columnTypes = accumuloSerDeParameters.getHiveColumnTypes();
    final AccumuloRowIdFactory factory = accumuloSerDeParameters.getRowIdFactory();
    ArrayList<ObjectInspector> columnObjectInspectors = getColumnObjectInspectors(columnTypes, serDeParams, mappings, factory);
    cachedObjectInspector = LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(serDeParams.getColumnNames(), columnObjectInspectors, serDeParams.getSeparators()[0], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    cachedRow = new LazyAccumuloRow((LazySimpleStructObjectInspector) cachedObjectInspector);
    serializer = new AccumuloRowSerializer(accumuloSerDeParameters.getRowIdOffset(), accumuloSerDeParameters.getSerDeParameters(), accumuloSerDeParameters.getColumnMappings(), accumuloSerDeParameters.getTableVisibilityLabel(), accumuloSerDeParameters.getRowIdFactory());
    if (log.isInfoEnabled()) {
        log.info("Initialized with {} type: {}", accumuloSerDeParameters.getSerDeParameters().getColumnNames(), accumuloSerDeParameters.getSerDeParameters().getColumnTypes());
    }
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) HiveAccumuloRowIdColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping)

Example 17 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project hive by apache.

the class TestAccumuloRowSerializer method testVisibilityLabel.

@Test
public void testVisibilityLabel() throws IOException, SerDeException {
    List<String> columns = Arrays.asList("row", "cq1", "cq2", "cq3");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
    List<String> typeNames = new ArrayList<String>(types.size());
    for (TypeInfo type : types) {
        typeNames.add(type.getTypeName());
    }
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1#b,cf:cq2#b,cf:cq3");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), accumuloSerDeParams.getRowIdFactory());
    // Create the LazyStruct from the LazyStruct...Inspector
    LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(oi);
    ByteArrayRef byteRef = new ByteArrayRef();
    byteRef.setData(new byte[] { 'r', 'o', 'w', '1', ' ', '1', '0', ' ', '2', '0', ' ', 'v', 'a', 'l', 'u', 'e' });
    obj.init(byteRef, 0, byteRef.getData().length);
    Mutation m = (Mutation) serializer.serialize(obj, oi);
    Assert.assertArrayEquals("row1".getBytes(), m.getRow());
    List<ColumnUpdate> updates = m.getUpdates();
    Assert.assertEquals(3, updates.size());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream out = new DataOutputStream(baos);
    ColumnUpdate update = updates.get(0);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    out.writeInt(10);
    Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
    update = updates.get(1);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    baos.reset();
    out.writeInt(20);
    Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
    update = updates.get(2);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    Assert.assertEquals("value", new String(update.getValue()));
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ColumnUpdate(org.apache.accumulo.core.data.ColumnUpdate) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Mutation(org.apache.accumulo.core.data.Mutation) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Test(org.junit.Test)

Example 18 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project hive by apache.

the class ScriptOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    firstRow = true;
    statsMap.put(Counter.DESERIALIZE_ERRORS.toString(), deserialize_error_count);
    statsMap.put(Counter.SERIALIZE_ERRORS.toString(), serialize_error_count);
    try {
        this.hconf = hconf;
        scriptOutputDeserializer = conf.getScriptOutputInfo().getDeserializerClass().newInstance();
        SerDeUtils.initializeSerDe(scriptOutputDeserializer, hconf, conf.getScriptOutputInfo().getProperties(), null);
        scriptInputSerializer = (Serializer) conf.getScriptInputInfo().getDeserializerClass().newInstance();
        scriptInputSerializer.initialize(hconf, conf.getScriptInputInfo().getProperties());
        outputObjInspector = scriptOutputDeserializer.getObjectInspector();
    } catch (Exception e) {
        throw new HiveException(ErrorMsg.SCRIPT_INIT_ERROR.getErrorCodedMsg(), e);
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 19 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project hive by apache.

the class TestHiveAccumuloTableOutputFormat method testWriteMap.

@Test
public void testWriteMap() throws Exception {
    Instance inst = new MockInstance(test.getMethodName());
    Connector conn = inst.getConnector("root", new PasswordToken(""));
    HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
    String table = test.getMethodName();
    conn.tableOperations().create(table);
    JobConf conf = new JobConf();
    conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
    conf.set(AccumuloConnectionParameters.USER_NAME, "root");
    conf.set(AccumuloConnectionParameters.USER_PASS, "");
    conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
    conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
    FileSystem local = FileSystem.getLocal(conf);
    outputFormat.checkOutputSpecs(local, conf);
    RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
    List<String> names = Arrays.asList("row", "col1");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:*");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, accumuloSerDeParams.getRowIdFactory());
    TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
    LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, (byte) ',', (byte) ':', serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(Arrays.asList("row", "data"), Arrays.asList(stringOI, mapOI), (byte) ' ', serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
    ByteArrayRef bytes = new ByteArrayRef();
    bytes.setData("row cq1:value1,cq2:value2".getBytes());
    struct.init(bytes, 0, bytes.getData().length);
    // Serialize the struct into a mutation
    Mutation m = serializer.serialize(struct, structOI);
    // Write the mutation
    recordWriter.write(new Text(table), m);
    // Close the writer
    recordWriter.close(null);
    Iterator<Entry<Key, Value>> iter = conn.createScanner(table, new Authorizations()).iterator();
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    Entry<Key, Value> entry = iter.next();
    Key k = entry.getKey();
    Value v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq1", k.getColumnQualifier().toString());
    Assert.assertEquals(AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, k.getColumnVisibilityParsed());
    Assert.assertEquals("value1", new String(v.get()));
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    entry = iter.next();
    k = entry.getKey();
    v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq2", k.getColumnQualifier().toString());
    Assert.assertEquals(AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, k.getColumnVisibilityParsed());
    Assert.assertEquals("value2", new String(v.get()));
    Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Configuration(org.apache.hadoop.conf.Configuration) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) Properties(java.util.Properties) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) Entry(java.util.Map.Entry) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) AccumuloRowSerializer(org.apache.hadoop.hive.accumulo.serde.AccumuloRowSerializer) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Authorizations(org.apache.accumulo.core.security.Authorizations) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Text(org.apache.hadoop.io.Text) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) Value(org.apache.accumulo.core.data.Value) Mutation(org.apache.accumulo.core.data.Mutation) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 20 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project presto by prestodb.

the class HiveWriteUtils method initializeSerializer.

@SuppressWarnings("deprecation")
public static Serializer initializeSerializer(Configuration conf, Properties properties, String serializerName) {
    try {
        Serializer result = (Serializer) Class.forName(serializerName).getConstructor().newInstance();
        result.initialize(conf, properties);
        return result;
    } catch (SerDeException | ReflectiveOperationException e) {
        throw Throwables.propagate(e);
    }
}
Also used : SerDeException(org.apache.hadoop.hive.serde2.SerDeException) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)11 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)10 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)10 Properties (java.util.Properties)9 Test (org.junit.Test)9 Mutation (org.apache.accumulo.core.data.Mutation)8 Configuration (org.apache.hadoop.conf.Configuration)8 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)8 LazyStruct (org.apache.hadoop.hive.serde2.lazy.LazyStruct)8 JobConf (org.apache.hadoop.mapred.JobConf)8 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)7 Serializer (org.apache.hadoop.hive.serde2.Serializer)7 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)6 ArrayList (java.util.ArrayList)5 ColumnVisibility (org.apache.accumulo.core.security.ColumnVisibility)5 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)5 Text (org.apache.hadoop.io.Text)5 Entry (java.util.Map.Entry)4 Connector (org.apache.accumulo.core.client.Connector)4 Instance (org.apache.accumulo.core.client.Instance)4