Search in sources :

Example 31 with LazySerDeParameters

use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.

the class TestAccumuloRowSerializer method testVisibilityLabel.

@Test
public void testVisibilityLabel() throws IOException, SerDeException {
    List<String> columns = Arrays.asList("row", "cq1", "cq2", "cq3");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
    List<String> typeNames = new ArrayList<String>(types.size());
    for (TypeInfo type : types) {
        typeNames.add(type.getTypeName());
    }
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1#b,cf:cq2#b,cf:cq3");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), accumuloSerDeParams.getRowIdFactory());
    // Create the LazyStruct from the LazyStruct...Inspector
    LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(oi);
    ByteArrayRef byteRef = new ByteArrayRef();
    byteRef.setData(new byte[] { 'r', 'o', 'w', '1', ' ', '1', '0', ' ', '2', '0', ' ', 'v', 'a', 'l', 'u', 'e' });
    obj.init(byteRef, 0, byteRef.getData().length);
    Mutation m = (Mutation) serializer.serialize(obj, oi);
    Assert.assertArrayEquals("row1".getBytes(), m.getRow());
    List<ColumnUpdate> updates = m.getUpdates();
    Assert.assertEquals(3, updates.size());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream out = new DataOutputStream(baos);
    ColumnUpdate update = updates.get(0);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    out.writeInt(10);
    Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
    update = updates.get(1);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    baos.reset();
    out.writeInt(20);
    Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
    update = updates.get(2);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    Assert.assertEquals("value", new String(update.getValue()));
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ColumnUpdate(org.apache.accumulo.core.data.ColumnUpdate) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Mutation(org.apache.accumulo.core.data.Mutation) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Test(org.junit.Test)

Example 32 with LazySerDeParameters

use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project phoenix by apache.

the class PhoenixObjectInspectorFactory method createStructObjectInspector.

public static LazySimpleStructObjectInspector createStructObjectInspector(TypeInfo type, LazySerDeParameters serdeParams) {
    StructTypeInfo structTypeInfo = (StructTypeInfo) type;
    List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
    List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
    List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
    for (int i = 0; i < fieldTypeInfos.size(); i++) {
        fieldObjectInspectors.add(createObjectInspector(fieldTypeInfos.get(i), serdeParams));
    }
    return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(fieldNames, fieldObjectInspectors, null, serdeParams.getSeparators()[1], serdeParams, ObjectInspectorOptions.JAVA);
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 33 with LazySerDeParameters

use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project phoenix by apache.

the class PhoenixObjectInspectorFactory method createObjectInspector.

public static ObjectInspector createObjectInspector(TypeInfo type, LazySerDeParameters serdeParams) {
    ObjectInspector oi = null;
    if (LOG.isDebugEnabled()) {
        LOG.debug("Type : " + type);
    }
    switch(type.getCategory()) {
        case PRIMITIVE:
            switch(((PrimitiveTypeInfo) type).getPrimitiveCategory()) {
                case BOOLEAN:
                    oi = new PhoenixBooleanObjectInspector();
                    break;
                case BYTE:
                    oi = new PhoenixByteObjectInspector();
                    break;
                case SHORT:
                    oi = new PhoenixShortObjectInspector();
                    break;
                case INT:
                    oi = new PhoenixIntObjectInspector();
                    break;
                case LONG:
                    oi = new PhoenixLongObjectInspector();
                    break;
                case FLOAT:
                    oi = new PhoenixFloatObjectInspector();
                    break;
                case DOUBLE:
                    oi = new PhoenixDoubleObjectInspector();
                    break;
                case VARCHAR:
                // same string
                case STRING:
                    oi = new PhoenixStringObjectInspector(serdeParams.isEscaped(), serdeParams.getEscapeChar());
                    break;
                case CHAR:
                    oi = new PhoenixCharObjectInspector((PrimitiveTypeInfo) type);
                    break;
                case DATE:
                    oi = new PhoenixDateObjectInspector();
                    break;
                case TIMESTAMP:
                    oi = new PhoenixTimestampObjectInspector();
                    break;
                case DECIMAL:
                    oi = new PhoenixDecimalObjectInspector((PrimitiveTypeInfo) type);
                    break;
                case BINARY:
                    oi = new PhoenixBinaryObjectInspector();
                    break;
                default:
                    throw new RuntimeException("Hive internal error. not supported data type " + ": " + type);
            }
            break;
        case LIST:
            if (LOG.isDebugEnabled()) {
                LOG.debug("List type started");
            }
            ObjectInspector listElementObjectInspector = createObjectInspector(((ListTypeInfo) type).getListElementTypeInfo(), serdeParams);
            if (LOG.isDebugEnabled()) {
                LOG.debug("List type ended");
            }
            oi = new PhoenixListObjectInspector(listElementObjectInspector, serdeParams.getSeparators()[0], serdeParams);
            break;
        default:
            throw new RuntimeException("Hive internal error. not supported data type : " + type);
    }
    return oi;
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 34 with LazySerDeParameters

use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.

the class TestVectorSerDeRow method getSerDeParams.

private LazySerDeParameters getSerDeParams(Configuration conf, Properties tbl, StructObjectInspector rowObjectInspector, byte[] separators) throws SerDeException {
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowObjectInspector);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowObjectInspector);
    addToProperties(tbl, fieldNames, fieldTypes);
    LazySerDeParameters lazySerDeParams = new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName());
    for (int i = 0; i < separators.length; i++) {
        lazySerDeParams.setSeparator(i, separators[i]);
    }
    return lazySerDeParams;
}
Also used : LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)

Example 35 with LazySerDeParameters

use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.

the class TestVectorSerDeRow method innerTestVectorSerializeRow.

void innerTestVectorSerializeRow(Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
    String[] emptyScratchTypeNames = new String[0];
    VectorRandomRowSource source = new VectorRandomRowSource();
    source.init(r, VectorRandomRowSource.SupportedTypes.ALL, 4, false);
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
    batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorAssignRow vectorAssignRow = new VectorAssignRow();
    vectorAssignRow.init(source.typeNames());
    int fieldCount = source.typeNames().size();
    DeserializeRead deserializeRead;
    SerializeWrite serializeWrite;
    switch(serializationType) {
        case BINARY_SORTABLE:
            deserializeRead = new BinarySortableDeserializeRead(source.typeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new BinarySortableSerializeWrite(fieldCount);
            break;
        case LAZY_BINARY:
            deserializeRead = new LazyBinaryDeserializeRead(source.typeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new LazyBinarySerializeWrite(fieldCount);
            break;
        case LAZY_SIMPLE:
            {
                StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
                // Use different separator values.
                byte[] separators = new byte[] { (byte) 9, (byte) 2, (byte) 3, (byte) 4, (byte) 5, (byte) 6, (byte) 7, (byte) 8 };
                LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector, separators);
                deserializeRead = new LazySimpleDeserializeRead(source.typeInfos(), /* useExternalBuffer */
                false, lazySerDeParams);
                serializeWrite = new LazySimpleSerializeWrite(fieldCount, lazySerDeParams);
            }
            break;
        default:
            throw new Error("Unknown serialization type " + serializationType);
    }
    VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite);
    vectorSerializeRow.init(source.typeNames());
    Object[][] randomRows = source.randomRows(2000);
    int firstRandomRowIndex = 0;
    for (int i = 0; i < randomRows.length; i++) {
        Object[] row = randomRows[i];
        vectorAssignRow.assignRow(batch, batch.size, row);
        batch.size++;
        if (batch.size == batch.DEFAULT_SIZE) {
            serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
            firstRandomRowIndex = i + 1;
            batch.reset();
        }
    }
    if (batch.size > 0) {
        serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
    }
}
Also used : LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DeserializeRead(org.apache.hadoop.hive.serde2.fast.DeserializeRead) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) SerializeWrite(org.apache.hadoop.hive.serde2.fast.SerializeWrite) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)26 Text (org.apache.hadoop.io.Text)20 Configuration (org.apache.hadoop.conf.Configuration)19 Properties (java.util.Properties)18 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)16 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)14 Test (org.junit.Test)12 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)10 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)10 Mutation (org.apache.accumulo.core.data.Mutation)8 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)8 LazyStruct (org.apache.hadoop.hive.serde2.lazy.LazyStruct)8 ArrayList (java.util.ArrayList)7 ByteStream (org.apache.hadoop.hive.serde2.ByteStream)7 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)7 LazySimpleDeserializeRead (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead)6 LazySimpleSerializeWrite (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite)6 Entry (java.util.Map.Entry)5 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)5 Connector (org.apache.accumulo.core.client.Connector)4