Search in sources :

Example 11 with LazySerDeParameters

use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.

the class TestAccumuloRowSerializer method testMapSerialization.

@Test
public void testMapSerialization() throws IOException, SerDeException {
    List<String> columns = Arrays.asList("row", "col");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo));
    List<String> typeNames = new ArrayList<String>(types.size());
    for (TypeInfo type : types) {
        typeNames.add(type.getTypeName());
    }
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:*");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.COLLECTION_DELIM, ",");
    tableProperties.setProperty(serdeConstants.MAPKEY_DELIM, ":");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
    LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, (byte) ',', (byte) ':', serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columns, Arrays.asList(stringOI, mapOI), (byte) ' ', serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
    // Create the LazyStruct from the LazyStruct...Inspector
    LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(structOI);
    ByteArrayRef byteRef = new ByteArrayRef();
    byteRef.setData("row1 cq1:10,cq2:20,cq3:value".getBytes());
    obj.init(byteRef, 0, byteRef.getData().length);
    Mutation m = (Mutation) serializer.serialize(obj, structOI);
    Assert.assertArrayEquals("row1".getBytes(), m.getRow());
    List<ColumnUpdate> updates = m.getUpdates();
    Assert.assertEquals(3, updates.size());
    ColumnUpdate update = updates.get(0);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
    Assert.assertEquals("10", new String(update.getValue()));
    update = updates.get(1);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
    Assert.assertEquals("20", new String(update.getValue()));
    update = updates.get(2);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
    Assert.assertEquals("value", new String(update.getValue()));
}
Also used : LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ColumnUpdate(org.apache.accumulo.core.data.ColumnUpdate) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) ArrayList(java.util.ArrayList) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Mutation(org.apache.accumulo.core.data.Mutation) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Test(org.junit.Test)

Example 12 with LazySerDeParameters

use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.

the class TestDefaultAccumuloRowIdFactory method testCorrectComplexInspectors.

@Test
public void testCorrectComplexInspectors() throws SerDeException {
    AccumuloSerDe accumuloSerDe = new AccumuloSerDe();
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "row,col");
    properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<col1:int,col2:int>,map<string,string>");
    accumuloSerDe.initialize(conf, properties);
    AccumuloRowIdFactory factory = accumuloSerDe.getParams().getRowIdFactory();
    List<TypeInfo> columnTypes = accumuloSerDe.getParams().getHiveColumnTypes();
    ColumnMapper mapper = accumuloSerDe.getParams().getColumnMapper();
    LazySerDeParameters serDeParams = accumuloSerDe.getParams().getSerDeParameters();
    List<ObjectInspector> OIs = accumuloSerDe.getColumnObjectInspectors(columnTypes, serDeParams, mapper.getColumnMappings(), factory);
    // Expect the correct OIs
    Assert.assertEquals(2, OIs.size());
    Assert.assertEquals(LazySimpleStructObjectInspector.class, OIs.get(0).getClass());
    Assert.assertEquals(LazyMapObjectInspector.class, OIs.get(1).getClass());
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) OIs.get(0);
    Assert.assertEquals(2, (int) structOI.getSeparator());
    LazyMapObjectInspector mapOI = (LazyMapObjectInspector) OIs.get(1);
    Assert.assertEquals(2, (int) mapOI.getItemSeparator());
    Assert.assertEquals(3, (int) mapOI.getKeyValueSeparator());
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) LazyIntObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 13 with LazySerDeParameters

use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.

the class TestDefaultAccumuloRowIdFactory method testCorrectPrimitiveInspectors.

@Test
public void testCorrectPrimitiveInspectors() throws SerDeException {
    AccumuloSerDe accumuloSerDe = new AccumuloSerDe();
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "row,col");
    properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,int");
    accumuloSerDe.initialize(conf, properties);
    AccumuloRowIdFactory factory = accumuloSerDe.getParams().getRowIdFactory();
    List<TypeInfo> columnTypes = accumuloSerDe.getParams().getHiveColumnTypes();
    ColumnMapper mapper = accumuloSerDe.getParams().getColumnMapper();
    LazySerDeParameters serDeParams = accumuloSerDe.getParams().getSerDeParameters();
    List<ObjectInspector> OIs = accumuloSerDe.getColumnObjectInspectors(columnTypes, serDeParams, mapper.getColumnMappings(), factory);
    Assert.assertEquals(2, OIs.size());
    Assert.assertEquals(LazyStringObjectInspector.class, OIs.get(0).getClass());
    Assert.assertEquals(LazyIntObjectInspector.class, OIs.get(1).getClass());
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) LazyIntObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 14 with LazySerDeParameters

use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project phoenix by apache.

the class PhoenixSerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    tableProperties = tbl;
    if (LOG.isDebugEnabled()) {
        LOG.debug("SerDe initialize : " + tbl.getProperty("name"));
    }
    serdeParams = new LazySerDeParameters(conf, tbl, getClass().getName());
    objectInspector = createLazyPhoenixInspector(conf, tbl);
    String inOutWork = tbl.getProperty(PhoenixStorageHandlerConstants.IN_OUT_WORK);
    if (inOutWork == null) {
        return;
    }
    serializer = new PhoenixSerializer(conf, tbl);
    row = new PhoenixRow(serdeParams.getColumnNames());
}
Also used : LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)

Example 15 with LazySerDeParameters

use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.

the class AccumuloRowSerializer method writeWithLevel.

/**
 * Recursively serialize an Object using its {@link ObjectInspector}, respecting the
 * separators defined by the {@link LazySerDeParameters}.
 * @param oi ObjectInspector for the current object
 * @param value The current object
 * @param output A buffer output is written to
 * @param mapping The mapping for this Hive column
 * @param level The current level/offset for the SerDe separator
 * @throws IOException
 */
protected void writeWithLevel(ObjectInspector oi, Object value, ByteStream.Output output, ColumnMapping mapping, int level) throws IOException {
    switch(oi.getCategory()) {
        case PRIMITIVE:
            if (mapping.getEncoding() == ColumnEncoding.BINARY) {
                this.writeBinary(output, value, (PrimitiveObjectInspector) oi);
            } else {
                this.writeString(output, value, (PrimitiveObjectInspector) oi);
            }
            return;
        case LIST:
            char separator = (char) serDeParams.getSeparators()[level];
            ListObjectInspector loi = (ListObjectInspector) oi;
            List<?> list = loi.getList(value);
            ObjectInspector eoi = loi.getListElementObjectInspector();
            if (list == null) {
                log.debug("No objects found when serializing list");
                return;
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        output.write(separator);
                    }
                    writeWithLevel(eoi, list.get(i), output, mapping, level + 1);
                }
            }
            return;
        case MAP:
            char sep = (char) serDeParams.getSeparators()[level];
            char keyValueSeparator = (char) serDeParams.getSeparators()[level + 1];
            MapObjectInspector moi = (MapObjectInspector) oi;
            ObjectInspector koi = moi.getMapKeyObjectInspector();
            ObjectInspector voi = moi.getMapValueObjectInspector();
            Map<?, ?> map = moi.getMap(value);
            if (map == null) {
                log.debug("No object found when serializing map");
                return;
            } else {
                boolean first = true;
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    if (first) {
                        first = false;
                    } else {
                        output.write(sep);
                    }
                    writeWithLevel(koi, entry.getKey(), output, mapping, level + 2);
                    output.write(keyValueSeparator);
                    writeWithLevel(voi, entry.getValue(), output, mapping, level + 2);
                }
            }
            return;
        case STRUCT:
            sep = (char) serDeParams.getSeparators()[level];
            StructObjectInspector soi = (StructObjectInspector) oi;
            List<? extends StructField> fields = soi.getAllStructFieldRefs();
            list = soi.getStructFieldsDataAsList(value);
            if (list == null) {
                log.debug("No object found when serializing struct");
                return;
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        output.write(sep);
                    }
                    writeWithLevel(fields.get(i).getFieldObjectInspector(), list.get(i), output, mapping, level + 1);
                }
            }
            return;
        default:
            throw new RuntimeException("Unknown category type: " + oi.getCategory());
    }
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)26 Text (org.apache.hadoop.io.Text)20 Configuration (org.apache.hadoop.conf.Configuration)19 Properties (java.util.Properties)18 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)16 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)14 Test (org.junit.Test)12 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)10 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)10 Mutation (org.apache.accumulo.core.data.Mutation)8 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)8 LazyStruct (org.apache.hadoop.hive.serde2.lazy.LazyStruct)8 ArrayList (java.util.ArrayList)7 ByteStream (org.apache.hadoop.hive.serde2.ByteStream)7 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)7 LazySimpleDeserializeRead (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead)6 LazySimpleSerializeWrite (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite)6 Entry (java.util.Map.Entry)5 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)5 Connector (org.apache.accumulo.core.client.Connector)4