Search in sources :

Example 96 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class RegexSerDe method initialize.

@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
    super.initialize(configuration, tableProperties, partitionProperties);
    numColumns = this.getColumnNames().size();
    // Read the configuration parameters
    inputRegex = properties.getProperty(INPUT_REGEX);
    outputFormatString = properties.getProperty(OUTPUT_FORMAT_STRING);
    boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(properties.getProperty(INPUT_REGEX_CASE_SENSITIVE));
    // Parse the configuration parameters
    if (inputRegex != null) {
        inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
    } else {
        inputPattern = null;
    }
    // All columns have to be of type STRING
    int i = 0;
    for (TypeInfo type : getColumnTypes()) {
        if (!type.equals(TypeInfoFactory.stringTypeInfo)) {
            throw new SerDeException(getClass().getName() + " only accepts string columns, but column[" + i + "] named " + getColumnNames().get(i) + " has type " + type);
        }
        i++;
    }
    // Constructing the row ObjectInspector:
    // The row consists of some string columns, each column will be a java
    // String object.
    List<ObjectInspector> columnOIs = Collections.nCopies(numColumns, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    // StandardStruct uses ArrayList to store the row.
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(getColumnNames(), columnOIs);
    // Constructing the row object, etc, which will be reused for all rows.
    row = new ArrayList<>(Collections.nCopies(numColumns, null));
    outputFields = new Object[numColumns];
    outputRowText = new Text();
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 97 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class MapJoinOneLongKeyBenchBase method doSetup.

public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation) throws Exception {
    HiveConf hiveConf = new HiveConf();
    long seed = 2543;
    // 10,000,000.
    int rowCount = 10000000;
    String[] bigTableColumnNames = new String[] { "number1" };
    TypeInfo[] bigTableTypeInfos = new TypeInfo[] { TypeInfoFactory.longTypeInfo };
    int[] bigTableKeyColumnNums = new int[] { 0 };
    String[] smallTableValueColumnNames = new String[] { "sv1", "sv2" };
    TypeInfo[] smallTableValueTypeInfos = new TypeInfo[] { TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo };
    int[] bigTableRetainColumnNums = new int[] { 0 };
    int[] smallTableRetainKeyColumnNums = new int[] {};
    int[] smallTableRetainValueColumnNums = new int[] { 0, 1 };
    SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters();
    smallTableGenerationParameters.setValueOption(ValueOption.ONLY_ONE);
    setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, smallTableGenerationParameters);
}
Also used : SmallTableGenerationParameters(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 98 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class MapJoinOneStringKeyBenchBase method doSetup.

public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation) throws Exception {
    HiveConf hiveConf = new HiveConf();
    long seed = 2543;
    // 100,000.
    int rowCount = 100000;
    String[] bigTableColumnNames = new String[] { "b1" };
    TypeInfo[] bigTableTypeInfos = new TypeInfo[] { TypeInfoFactory.stringTypeInfo };
    int[] bigTableKeyColumnNums = new int[] { 0 };
    String[] smallTableValueColumnNames = new String[] { "sv1", "sv2" };
    TypeInfo[] smallTableValueTypeInfos = new TypeInfo[] { TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo };
    int[] bigTableRetainColumnNums = new int[] { 0 };
    int[] smallTableRetainKeyColumnNums = new int[] {};
    int[] smallTableRetainValueColumnNums = new int[] { 0, 1 };
    SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters();
    smallTableGenerationParameters.setValueOption(ValueOption.ONLY_ONE);
    setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, smallTableGenerationParameters);
}
Also used : SmallTableGenerationParameters(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 99 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class JsonReadBench method buildArguments.

private ObjectInspector[] buildArguments(String typeStr) {
    ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
    ObjectInspector[] arguments = { valueOI, PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, new Text(typeStr)) };
    return arguments;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Text(org.apache.hadoop.io.Text)

Example 100 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestLazySimpleFast method testLazySimpleDeserializeRowEmptyArray.

@Test
public void testLazySimpleDeserializeRowEmptyArray() throws Throwable {
    HiveConf hconf = new HiveConf();
    // set the escaping related properties
    Properties props = new Properties();
    props.setProperty(serdeConstants.FIELD_DELIM, ",");
    LazySerDeParameters lazyParams = new LazySerDeParameters(hconf, props, LazySimpleSerDe.class.getName());
    TypeInfo[] typeInfos = new TypeInfo[] { TypeInfoFactory.getListTypeInfo(TypeInfoFactory.intTypeInfo), TypeInfoFactory.getListTypeInfo(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.stringTypeInfo)) };
    LazySimpleDeserializeRead deserializeRead = new LazySimpleDeserializeRead(typeInfos, null, true, lazyParams);
    byte[] bytes = ",".getBytes();
    deserializeRead.set(bytes, 0, bytes.length);
    verifyRead(deserializeRead, typeInfos[0], Collections.emptyList());
    verifyRead(deserializeRead, typeInfos[1], Collections.emptyList());
    assertTrue(deserializeRead.isEndOfInputReached());
}
Also used : LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Properties(java.util.Properties) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)65 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)44 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)36 Text (org.apache.hadoop.io.Text)34 ArrayList (java.util.ArrayList)19 HiveConf (org.apache.hadoop.hive.conf.HiveConf)19 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)17 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)16 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)16 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)14 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)14 SmallTableGenerationParameters (org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters)13 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)13 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)12 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)12 Properties (java.util.Properties)11 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)11 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)11 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)11 Configuration (org.apache.hadoop.conf.Configuration)10