Search in sources :

Example 21 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class RegexSerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    // We can get the table definition from tbl.
    // Read the configuration parameters
    inputRegex = tbl.getProperty(INPUT_REGEX);
    outputFormatString = tbl.getProperty(OUTPUT_FORMAT_STRING);
    String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(tbl.getProperty(INPUT_REGEX_CASE_SENSITIVE));
    // Parse the configuration parameters
    if (inputRegex != null) {
        inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
    } else {
        inputPattern = null;
    }
    final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    List<String> columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    assert columnNames.size() == columnTypes.size();
    numColumns = columnNames.size();
    // All columns have to be of type STRING.
    for (int c = 0; c < numColumns; c++) {
        if (!columnTypes.get(c).equals(TypeInfoFactory.stringTypeInfo)) {
            throw new SerDeException(getClass().getName() + " only accepts string columns, but column[" + c + "] named " + columnNames.get(c) + " has type " + columnTypes.get(c));
        }
    }
    // Constructing the row ObjectInspector:
    // The row consists of some string columns, each column will be a java
    // String object.
    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
    for (int c = 0; c < numColumns; c++) {
        columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    }
    // StandardStruct uses ArrayList to store the row.
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
    // Constructing the row object, etc, which will be reused for all rows.
    row = new ArrayList<String>(numColumns);
    for (int c = 0; c < numColumns; c++) {
        row.add(null);
    }
    outputFields = new Object[numColumns];
    outputRowText = new Text();
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 22 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class TypedBytesSerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    // We can get the table definition from tbl.
    serializeBytesWritable = new BytesWritable();
    barrStr = new NonSyncDataOutputBuffer();
    tbOut = new TypedBytesWritableOutput(barrStr);
    inBarrStr = new NonSyncDataInputBuffer();
    tbIn = new TypedBytesWritableInput(inBarrStr);
    // Read the configuration parameters
    String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    columnTypes = null;
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    assert columnNames.size() == columnTypes.size();
    numColumns = columnNames.size();
    // All columns have to be primitive.
    for (int c = 0; c < numColumns; c++) {
        if (columnTypes.get(c).getCategory() != Category.PRIMITIVE) {
            throw new SerDeException(getClass().getName() + " only accepts primitive columns, but column[" + c + "] named " + columnNames.get(c) + " has category " + columnTypes.get(c).getCategory());
        }
    }
    // Constructing the row ObjectInspector:
    // The row consists of some string columns, each column will be a java
    // String object.
    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
    for (int c = 0; c < numColumns; c++) {
        columnOIs.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(columnTypes.get(c)));
    }
    // StandardStruct uses ArrayList to store the row.
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
    // Constructing the row object, etc, which will be reused for all rows.
    row = new ArrayList<Object>(numColumns);
    for (int c = 0; c < numColumns; c++) {
        row.add(null);
    }
}
Also used : TypedBytesWritableInput(org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesWritableInput) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TypedBytesWritableOutput(org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesWritableOutput) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) NonSyncDataOutputBuffer(org.apache.hadoop.hive.ql.io.NonSyncDataOutputBuffer) NonSyncDataInputBuffer(org.apache.hadoop.hive.ql.io.NonSyncDataInputBuffer) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 23 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class LazyHBaseCellMapTest method testInitColumnPrefix.

public void testInitColumnPrefix() throws Exception {
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
    LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi);
    // Initialize a result
    Cell[] cells = new KeyValue[2];
    final String col1 = "1";
    final String col2 = "2";
    cells[0] = new KeyValue(TEST_ROW, COLUMN_FAMILY, Bytes.toBytes(QUAL_PREFIX + col1), Bytes.toBytes("cfacol1"));
    cells[1] = new KeyValue(TEST_ROW, COLUMN_FAMILY, Bytes.toBytes(QUAL_PREFIX + col2), Bytes.toBytes("cfacol2"));
    Result r = Result.create(cells);
    List<Boolean> mapBinaryStorage = new ArrayList<Boolean>();
    mapBinaryStorage.add(false);
    mapBinaryStorage.add(false);
    b.init(r, COLUMN_FAMILY, mapBinaryStorage, Bytes.toBytes(QUAL_PREFIX), true);
    assertNotNull(b.getMapValueElement(new Text(col1)));
    assertNotNull(b.getMapValueElement(new Text(col2)));
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) Cell(org.apache.hadoop.hbase.Cell) Result(org.apache.hadoop.hbase.client.Result)

Example 24 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class ColumnarStorageBench method createRandomRow.

private Object createRandomRow(final String columnTypes) throws SerDeException {
    Writable recordWritable = createRecord(TypeInfoUtils.getTypeInfosFromTypeString(columnTypes));
    Writable simpleWritable = lazySimpleSerDe.serialize(recordWritable, getArrayWritableObjectInspector(columnTypes));
    return lazySimpleSerDe.deserialize(simpleWritable);
}
Also used : Writable(org.apache.hadoop.io.Writable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable)

Example 25 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class TestLazyAccumuloMap method testBinaryIntMap.

@Test
public void testBinaryIntMap() throws SerDeException, IOException {
    AccumuloHiveRow row = new AccumuloHiveRow("row");
    row.add(new Text("cf1"), new Text(toBytes(1)), toBytes(2));
    row.add(new Text("cf1"), new Text(toBytes(2)), toBytes(4));
    row.add(new Text("cf1"), new Text(toBytes(3)), toBytes(6));
    HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, ColumnEncoding.BINARY, ColumnEncoding.BINARY, "column", TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo).toString());
    // Map of Integer to String
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<int,int>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
    LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi);
    map.init(row, mapping);
    Assert.assertEquals(3, map.getMapSize());
    Object o = map.getMapValueElement(new IntWritable(1));
    Assert.assertNotNull(o);
    Assert.assertEquals(new IntWritable(2), ((LazyInteger) o).getWritableObject());
    o = map.getMapValueElement(new IntWritable(2));
    Assert.assertNotNull(o);
    Assert.assertEquals(new IntWritable(4), ((LazyInteger) o).getWritableObject());
    o = map.getMapValueElement(new IntWritable(3));
    Assert.assertNotNull(o);
    Assert.assertEquals(new IntWritable(6), ((LazyInteger) o).getWritableObject());
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) Text(org.apache.hadoop.io.Text) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)23 ArrayList (java.util.ArrayList)18 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)17 Text (org.apache.hadoop.io.Text)14 LazyMapObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)11 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)9 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 Cell (org.apache.hadoop.hbase.Cell)7 KeyValue (org.apache.hadoop.hbase.KeyValue)7 Result (org.apache.hadoop.hbase.client.Result)7 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)7 IntWritable (org.apache.hadoop.io.IntWritable)7 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)6 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)5 Test (org.junit.Test)5 HiveAccumuloMapColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping)4 IOException (java.io.IOException)3 ColumnMapping (org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)2