Search in sources :

Example 1 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class TestLazyAccumuloMap method testMixedSerializationMap.

@Test
public void testMixedSerializationMap() throws SerDeException, IOException {
    AccumuloHiveRow row = new AccumuloHiveRow("row");
    row.add(new Text("cf1"), new Text(toBytes(1)), "2".getBytes());
    row.add(new Text("cf1"), new Text(toBytes(2)), "4".getBytes());
    row.add(new Text("cf1"), new Text(toBytes(3)), "6".getBytes());
    HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, ColumnEncoding.BINARY, ColumnEncoding.STRING, "column", TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo).toString());
    // Map of Integer to String
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<int,int>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
    LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi);
    map.init(row, mapping);
    Assert.assertEquals(3, map.getMapSize());
    Object o = map.getMapValueElement(new IntWritable(1));
    Assert.assertNotNull(o);
    Assert.assertEquals(new IntWritable(2), ((LazyInteger) o).getWritableObject());
    o = map.getMapValueElement(new IntWritable(2));
    Assert.assertNotNull(o);
    Assert.assertEquals(new IntWritable(4), ((LazyInteger) o).getWritableObject());
    o = map.getMapValueElement(new IntWritable(3));
    Assert.assertNotNull(o);
    Assert.assertEquals(new IntWritable(6), ((LazyInteger) o).getWritableObject());
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) Text(org.apache.hadoop.io.Text) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 2 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class TestLazyAccumuloMap method testStringMapWithProjection.

@Test
public void testStringMapWithProjection() throws SerDeException {
    AccumuloHiveRow row = new AccumuloHiveRow("row");
    row.add("cf1", "foo", "bar".getBytes());
    row.add("cf1", "bar", "foo".getBytes());
    row.add("cf2", "foo1", "bar1".getBytes());
    row.add("cf3", "bar1", "foo1".getBytes());
    HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, ColumnEncoding.STRING, ColumnEncoding.STRING, "column", TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo).toString());
    // Map of Integer to String
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
    LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi);
    map.init(row, mapping);
    Assert.assertEquals(2, map.getMapSize());
    Object o = map.getMapValueElement(new Text("foo"));
    Assert.assertNotNull(o);
    Assert.assertEquals(new Text("bar"), ((LazyString) o).getWritableObject());
    o = map.getMapValueElement(new Text("bar"));
    Assert.assertNotNull(o);
    Assert.assertEquals(new Text("foo"), ((LazyString) o).getWritableObject());
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) Text(org.apache.hadoop.io.Text) Test(org.junit.Test)

Example 3 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class ColumnarStorageBench method getArrayWritableObjectInspector.

private ObjectInspector getArrayWritableObjectInspector(final String columnTypes) {
    List<TypeInfo> columnTypeList = TypeInfoUtils.getTypeInfosFromTypeString(columnTypes);
    List<String> columnNameList = Arrays.asList(getColumnNames(columnTypes).split(","));
    StructTypeInfo rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNameList, columnTypeList);
    return new ArrayWritableObjectInspector(rowTypeInfo);
}
Also used : ArrayWritableObjectInspector(org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 4 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class MapWork method checkVectorizerSupportedTypes.

private boolean checkVectorizerSupportedTypes(boolean hasLlap) {
    for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : aliasToWork.entrySet()) {
        final String alias = entry.getKey();
        Operator<? extends OperatorDesc> op = entry.getValue();
        PartitionDesc partitionDesc = aliasToPartnInfo.get(alias);
        if (op instanceof TableScanOperator && partitionDesc != null && partitionDesc.getTableDesc() != null) {
            final TableScanOperator tsOp = (TableScanOperator) op;
            final List<String> readColumnNames = tsOp.getNeededColumns();
            final Properties props = partitionDesc.getTableDesc().getProperties();
            final List<TypeInfo> typeInfos = TypeInfoUtils.getTypeInfosFromTypeString(props.getProperty(serdeConstants.LIST_COLUMN_TYPES));
            final List<String> allColumnTypes = TypeInfoUtils.getTypeStringsFromTypeInfo(typeInfos);
            final List<String> allColumnNames = Utilities.getColumnNames(props);
            hasLlap = Utilities.checkVectorizerSupportedTypes(readColumnNames, allColumnNames, allColumnTypes);
        }
    }
    return hasLlap;
}
Also used : FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Properties(java.util.Properties) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 5 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class RegexSerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    // We can get the table definition from tbl.
    // Read the configuration parameters
    inputRegex = tbl.getProperty(INPUT_REGEX);
    String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(tbl.getProperty(INPUT_REGEX_CASE_SENSITIVE));
    // output format string is not supported anymore, warn user of deprecation
    if (null != tbl.getProperty("output.format.string")) {
        LOG.warn("output.format.string has been deprecated");
    }
    // Parse the configuration parameters
    if (inputRegex != null) {
        inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
    } else {
        inputPattern = null;
        throw new SerDeException("This table does not have serde property \"input.regex\"!");
    }
    final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    List<String> columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    assert columnNames.size() == columnTypes.size();
    numColumns = columnNames.size();
    /* Constructing the row ObjectInspector:
     * The row consists of some set of primitive columns, each column will
     * be a java object of primitive type.
     */
    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
    for (int c = 0; c < numColumns; c++) {
        TypeInfo typeInfo = columnTypes.get(c);
        if (typeInfo instanceof PrimitiveTypeInfo) {
            PrimitiveTypeInfo pti = (PrimitiveTypeInfo) columnTypes.get(c);
            AbstractPrimitiveJavaObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti);
            columnOIs.add(oi);
        } else {
            throw new SerDeException(getClass().getName() + " doesn't allow column [" + c + "] named " + columnNames.get(c) + " with type " + columnTypes.get(c));
        }
    }
    // StandardStruct uses ArrayList to store the row.
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs, Lists.newArrayList(Splitter.on('\0').split(tbl.getProperty("columns.comments"))));
    row = new ArrayList<Object>(numColumns);
    // Constructing the row object, etc, which will be reused for all rows.
    for (int c = 0; c < numColumns; c++) {
        row.add(null);
    }
    outputFields = new Object[numColumns];
    outputRowText = new Text();
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) AbstractPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector) ArrayList(java.util.ArrayList) AbstractPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)23 ArrayList (java.util.ArrayList)18 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)17 Text (org.apache.hadoop.io.Text)14 LazyMapObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)11 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)9 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 Cell (org.apache.hadoop.hbase.Cell)7 KeyValue (org.apache.hadoop.hbase.KeyValue)7 Result (org.apache.hadoop.hbase.client.Result)7 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)7 IntWritable (org.apache.hadoop.io.IntWritable)7 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)6 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)5 Test (org.junit.Test)5 HiveAccumuloMapColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping)4 IOException (java.io.IOException)3 ColumnMapping (org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)2