Search in sources :

Example 1 with LazyArray

use of org.apache.hadoop.hive.serde2.lazy.LazyArray in project hive by apache.

the class TestLazyArrayMapStruct method testNestedinArrayAtLevel.

/**
   * @param nestingLevel
   * @param dtype
   * @param tableProp
   * @throws SerDeException
   */
private void testNestedinArrayAtLevel(int nestingLevel, ObjectInspector.Category dtype, Properties tableProp) throws SerDeException {
    //create type with nestingLevel levels of nesting
    //set inner schema for dtype
    String inSchema = null;
    switch(dtype) {
        case LIST:
            inSchema = "array<tinyint>";
            break;
        case MAP:
            inSchema = "map<string,int>";
            break;
        case STRUCT:
            inSchema = "struct<s:string,i:tinyint>";
            break;
        case UNION:
            inSchema = "uniontype<string,tinyint>";
            break;
        default:
            fail("type not supported by test case");
    }
    StringBuilder schema = new StringBuilder(inSchema);
    for (int i = 0; i < nestingLevel - 1; i++) {
        schema.insert(0, "array<");
        schema.append(">");
    }
    System.err.println("Testing nesting level " + nestingLevel + ". Using schema " + schema);
    // Create the SerDe
    LazySimpleSerDe serDe = new LazySimpleSerDe();
    Configuration conf = new Configuration();
    tableProp.setProperty("columns", "narray");
    tableProp.setProperty("columns.types", schema.toString());
    SerDeUtils.initializeSerDe(serDe, conf, tableProp, null);
    LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tableProp, LazySimpleSerDe.class.getName());
    //create the serialized string for type
    byte[] separators = serdeParams.getSeparators();
    System.err.println("Using separator " + (char) separators[nestingLevel]);
    byte[] serializedRow = null;
    switch(dtype) {
        case LIST:
            serializedRow = new byte[] { '8', separators[nestingLevel], '9' };
            break;
        case MAP:
            byte kvSep = separators[nestingLevel + 1];
            byte kvPairSep = separators[nestingLevel];
            serializedRow = new byte[] { '1', kvSep, '1', kvPairSep, '2', kvSep, '2' };
            break;
        case STRUCT:
            serializedRow = new byte[] { '8', separators[nestingLevel], '9' };
            break;
        case UNION:
            serializedRow = new byte[] { '0', separators[nestingLevel], '9' };
            break;
        default:
            fail("type not supported by test case");
    }
    //create LazyStruct with serialized string with expected separators
    StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
    LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(oi);
    TestLazyPrimitive.initLazyObject(struct, serializedRow, 0, serializedRow.length);
    //Get fields out of the lazy struct and check if they match expected
    // results
    //Get first level array
    LazyArray array = (LazyArray) struct.getField(0);
    //Peel off the n-1 levels to get to the underlying array
    for (int i = 0; i < nestingLevel - 2; i++) {
        array = (LazyArray) array.getListElementObject(0);
    }
    //verify the serialized format for dtype
    switch(dtype) {
        case LIST:
            LazyArray array1 = (LazyArray) array.getListElementObject(0);
            //check elements of the innermost array
            assertEquals(2, array1.getListLength());
            assertEquals(new ByteWritable((byte) 8), ((LazyByte) array1.getListElementObject(0)).getWritableObject());
            assertEquals(new ByteWritable((byte) 9), ((LazyByte) array1.getListElementObject(1)).getWritableObject());
            break;
        case MAP:
            LazyMap lazyMap = (LazyMap) array.getListElementObject(0);
            Map map = lazyMap.getMap();
            System.err.println(map);
            assertEquals(2, map.size());
            Iterator<Map.Entry<LazyString, LazyInteger>> it = map.entrySet().iterator();
            Entry<LazyString, LazyInteger> e1 = it.next();
            assertEquals(e1.getKey().getWritableObject(), new Text(new byte[] { '1' }));
            assertEquals(e1.getValue().getWritableObject(), new IntWritable(1));
            Entry<LazyString, LazyInteger> e2 = it.next();
            assertEquals(e2.getKey().getWritableObject(), new Text(new byte[] { '2' }));
            assertEquals(e2.getValue().getWritableObject(), new IntWritable(2));
            break;
        case STRUCT:
            LazyStruct innerStruct = (LazyStruct) array.getListElementObject(0);
            //check elements of the innermost struct
            assertEquals(2, innerStruct.getFieldsAsList().size());
            assertEquals(new Text(new byte[] { '8' }), ((LazyString) innerStruct.getField(0)).getWritableObject());
            assertEquals(new ByteWritable((byte) 9), ((LazyByte) innerStruct.getField(1)).getWritableObject());
            break;
        case UNION:
            LazyUnion lazyUnion = (LazyUnion) array.getListElementObject(0);
            //check elements of the innermost union
            assertEquals(new Text(new byte[] { '9' }), ((LazyString) lazyUnion.getField()).getWritableObject());
            break;
        default:
            fail("type not supported by test case");
    }
    //test serialization
    Text serializedText = (Text) serDe.serialize(struct.getObject(), serDe.getObjectInspector());
    org.junit.Assert.assertArrayEquals(serializedRow, serializedText.getBytes());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) Entry(java.util.Map.Entry) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) Map(java.util.Map) IntWritable(org.apache.hadoop.io.IntWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 2 with LazyArray

use of org.apache.hadoop.hive.serde2.lazy.LazyArray in project hive by apache.

the class TestLazyArrayMapStruct method testLazyArray.

/**
   * Test the LazyArray class.
   */
public void testLazyArray() throws Throwable {
    try {
        // Array of Byte
        Text nullSequence = new Text("\\N");
        ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("array<tinyint>").get(0), new byte[] { (byte) 1 }, 0, nullSequence, false, (byte) 0);
        LazyArray b = (LazyArray) LazyFactory.createLazyObject(oi);
        byte[] data = new byte[] { '-', '1', 1, '\\', 'N', 1, '8' };
        TestLazyPrimitive.initLazyObject(b, data, 0, data.length);
        assertNull(b.getListElementObject(-1));
        assertEquals(new ByteWritable((byte) -1), ((LazyByte) b.getListElementObject(0)).getWritableObject());
        assertEquals(new ByteWritable((byte) -1), ((LazyByte) b.getList().get(0)).getWritableObject());
        assertNull(b.getListElementObject(1));
        assertNull(b.getList().get(1));
        assertEquals(new ByteWritable((byte) 8), ((LazyByte) b.getListElementObject(2)).getWritableObject());
        assertEquals(new ByteWritable((byte) 8), ((LazyByte) b.getList().get(2)).getWritableObject());
        assertNull(b.getListElementObject(3));
        assertEquals(3, b.getList().size());
        // Array of String
        oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("array<string>").get(0), new byte[] { (byte) '\t' }, 0, nullSequence, false, (byte) 0);
        b = (LazyArray) LazyFactory.createLazyObject(oi);
        data = new byte[] { 'a', 'b', '\t', 'c', '\t', '\\', 'N', '\t', '\t', 'd' };
        // Note: the first and last element of the byte[] are NOT used
        TestLazyPrimitive.initLazyObject(b, data, 1, data.length - 2);
        assertNull(b.getListElementObject(-1));
        assertEquals(new Text("b"), ((LazyString) b.getListElementObject(0)).getWritableObject());
        assertEquals(new Text("b"), ((LazyString) b.getList().get(0)).getWritableObject());
        assertEquals(new Text("c"), ((LazyString) b.getListElementObject(1)).getWritableObject());
        assertEquals(new Text("c"), ((LazyString) b.getList().get(1)).getWritableObject());
        assertNull((b.getListElementObject(2)));
        assertNull((b.getList().get(2)));
        assertEquals(new Text(""), ((LazyString) b.getListElementObject(3)).getWritableObject());
        assertEquals(new Text(""), ((LazyString) b.getList().get(3)).getWritableObject());
        assertEquals(new Text(""), ((LazyString) b.getListElementObject(4)).getWritableObject());
        assertEquals(new Text(""), ((LazyString) b.getList().get(4)).getWritableObject());
        assertNull((b.getListElementObject(5)));
        assertEquals(5, b.getList().size());
        // -- HIVE-4149
        b = (LazyArray) LazyFactory.createLazyObject(oi);
        data = new byte[] { 'a', '\t', '\\', 'N' };
        TestLazyPrimitive.initLazyObject(b, data, 0, data.length);
        assertEquals(new Text("a"), ((LazyString) b.getListElementObject(0)).getWritableObject());
        assertNull(b.getListElementObject(1));
        data = new byte[] { '\\', 'N', '\t', 'a' };
        TestLazyPrimitive.initLazyObject(b, data, 0, data.length);
        assertNull(b.getListElementObject(0));
        // twice (returns not cleaned cache)
        assertNull(b.getListElementObject(0));
        assertEquals(new Text("a"), ((LazyString) b.getListElementObject(1)).getWritableObject());
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Text(org.apache.hadoop.io.Text) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable)

Example 3 with LazyArray

use of org.apache.hadoop.hive.serde2.lazy.LazyArray in project hive by apache.

the class TestAccumuloSerDe method testArraySerialization.

@Test
public void testArraySerialization() throws Exception {
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:vals");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "row,values");
    properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,array<string>");
    properties.setProperty(serdeConstants.COLLECTION_DELIM, ":");
    // Get one of the default separators to avoid having to set a custom separator
    char separator = ':';
    serde.initialize(conf, properties);
    AccumuloHiveRow row = new AccumuloHiveRow();
    row.setRowId("r1");
    row.add("cf", "vals", ("value1" + separator + "value2" + separator + "value3").getBytes());
    Object obj = serde.deserialize(row);
    assertNotNull(obj);
    assertTrue(obj instanceof LazyAccumuloRow);
    LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
    Object field0 = lazyRow.getField(0);
    assertNotNull(field0);
    assertTrue(field0 instanceof LazyString);
    assertEquals(row.getRowId(), ((LazyString) field0).getWritableObject().toString());
    Object field1 = lazyRow.getField(1);
    assertNotNull(field1);
    assertTrue(field1 instanceof LazyArray);
    LazyArray array = (LazyArray) field1;
    List<Object> values = array.getList();
    assertEquals(3, values.size());
    for (int i = 0; i < 3; i++) {
        Object o = values.get(i);
        assertNotNull(o);
        assertTrue(o instanceof LazyString);
        assertEquals("value" + (i + 1), ((LazyString) o).getWritableObject().toString());
    }
}
Also used : LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Configuration(org.apache.hadoop.conf.Configuration) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) Properties(java.util.Properties) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) Test(org.junit.Test)

Example 4 with LazyArray

use of org.apache.hadoop.hive.serde2.lazy.LazyArray in project hive by apache.

the class AvroLazyObjectInspector method toLazyListObject.

/**
   * Convert the given object to a lazy object using the given {@link ObjectInspector}
   *
   * @param obj Object to be converted to a {@link LazyObject}
   * @param oi ObjectInspector used for the conversion
   * @return the created {@link LazyObject lazy object}
   * */
private Object toLazyListObject(Object obj, ObjectInspector objectInspector) {
    if (obj == null) {
        return null;
    }
    List<?> listObj = (List<?>) obj;
    LazyArray retList = (LazyArray) LazyFactory.createLazyObject(objectInspector);
    List<Object> lazyList = retList.getList();
    ObjectInspector listElementOI = ((ListObjectInspector) objectInspector).getListElementObjectInspector();
    for (int i = 0; i < listObj.size(); i++) {
        lazyList.add(toLazyObject(listObj.get(i), listElementOI));
    }
    return retList;
}
Also used : LazyUnionObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) LazyListObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyListObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) LazyListObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyListObjectInspector) ArrayList(java.util.ArrayList) List(java.util.List) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) LazyObject(org.apache.hadoop.hive.serde2.lazy.LazyObject)

Example 5 with LazyArray

use of org.apache.hadoop.hive.serde2.lazy.LazyArray in project presto by prestodb.

the class RcFileTester method decodeRecordReaderValue.

private static Object decodeRecordReaderValue(Type type, Object actualValue) {
    if (actualValue instanceof LazyPrimitive) {
        actualValue = ((LazyPrimitive<?, ?>) actualValue).getWritableObject();
    }
    if (actualValue instanceof BooleanWritable) {
        actualValue = ((BooleanWritable) actualValue).get();
    } else if (actualValue instanceof ByteWritable) {
        actualValue = ((ByteWritable) actualValue).get();
    } else if (actualValue instanceof BytesWritable) {
        actualValue = new SqlVarbinary(((BytesWritable) actualValue).copyBytes());
    } else if (actualValue instanceof DateWritable) {
        actualValue = new SqlDate(((DateWritable) actualValue).getDays());
    } else if (actualValue instanceof DoubleWritable) {
        actualValue = ((DoubleWritable) actualValue).get();
    } else if (actualValue instanceof FloatWritable) {
        actualValue = ((FloatWritable) actualValue).get();
    } else if (actualValue instanceof IntWritable) {
        actualValue = ((IntWritable) actualValue).get();
    } else if (actualValue instanceof LongWritable) {
        actualValue = ((LongWritable) actualValue).get();
    } else if (actualValue instanceof ShortWritable) {
        actualValue = ((ShortWritable) actualValue).get();
    } else if (actualValue instanceof HiveDecimalWritable) {
        DecimalType decimalType = (DecimalType) type;
        HiveDecimalWritable writable = (HiveDecimalWritable) actualValue;
        // writable messes with the scale so rescale the values to the Presto type
        BigInteger rescaledValue = rescale(writable.getHiveDecimal().unscaledValue(), writable.getScale(), decimalType.getScale());
        actualValue = new SqlDecimal(rescaledValue, decimalType.getPrecision(), decimalType.getScale());
    } else if (actualValue instanceof Text) {
        actualValue = actualValue.toString();
    } else if (actualValue instanceof TimestampWritable) {
        TimestampWritable timestamp = (TimestampWritable) actualValue;
        actualValue = new SqlTimestamp((timestamp.getSeconds() * 1000) + (timestamp.getNanos() / 1000000L), UTC_KEY);
    } else if (actualValue instanceof StructObject) {
        StructObject structObject = (StructObject) actualValue;
        actualValue = decodeRecordReaderStruct(type, structObject.getFieldsAsList());
    } else if (actualValue instanceof LazyBinaryArray) {
        actualValue = decodeRecordReaderList(type, ((LazyBinaryArray) actualValue).getList());
    } else if (actualValue instanceof LazyBinaryMap) {
        actualValue = decodeRecordReaderMap(type, ((LazyBinaryMap) actualValue).getMap());
    } else if (actualValue instanceof LazyArray) {
        actualValue = decodeRecordReaderList(type, ((LazyArray) actualValue).getList());
    } else if (actualValue instanceof LazyMap) {
        actualValue = decodeRecordReaderMap(type, ((LazyMap) actualValue).getMap());
    } else if (actualValue instanceof List) {
        actualValue = decodeRecordReaderList(type, ((List<?>) actualValue));
    }
    return actualValue;
}
Also used : SqlVarbinary(com.facebook.presto.spi.type.SqlVarbinary) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) SqlTimestamp(com.facebook.presto.spi.type.SqlTimestamp) LazyBinaryArray(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) StructObject(org.apache.hadoop.hive.serde2.StructObject) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) BytesWritable(org.apache.hadoop.io.BytesWritable) SqlDecimal(com.facebook.presto.spi.type.SqlDecimal) Text(org.apache.hadoop.io.Text) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) SqlDate(com.facebook.presto.spi.type.SqlDate) DecimalType(com.facebook.presto.spi.type.DecimalType) BigInteger(java.math.BigInteger) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap)

Aggregations

LazyArray (org.apache.hadoop.hive.serde2.lazy.LazyArray)4 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 IntWritable (org.apache.hadoop.io.IntWritable)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Map (java.util.Map)2 Configuration (org.apache.hadoop.conf.Configuration)2 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)2 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)2 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)2 LazyMap (org.apache.hadoop.hive.serde2.lazy.LazyMap)2 LazyPrimitive (org.apache.hadoop.hive.serde2.lazy.LazyPrimitive)2 LazyBinaryMap (org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2 BooleanWritable (org.apache.hadoop.io.BooleanWritable)2 BytesWritable (org.apache.hadoop.io.BytesWritable)2 DoubleWritable (org.apache.hadoop.io.DoubleWritable)2 FloatWritable (org.apache.hadoop.io.FloatWritable)2 LongWritable (org.apache.hadoop.io.LongWritable)2