Search in sources :

Example 46 with LazySerDeParameters

use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.

the class TestLazySimpleFast method testLazySimpleFastCase.

public void testLazySimpleFastCase(int caseNum, boolean doNonRandomFill, Random r, SerdeRandomRowSource.SupportedTypes supportedTypes, int depth) throws Throwable {
    SerdeRandomRowSource source = new SerdeRandomRowSource();
    source.init(r, supportedTypes, depth);
    int rowCount = 100;
    Object[][] rows = source.randomRows(rowCount);
    if (doNonRandomFill) {
        MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
    }
    StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
    TypeInfo[] typeInfos = source.typeInfos();
    int columnCount = typeInfos.length;
    int writeColumnCount = columnCount;
    StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
    boolean doWriteFewerColumns = r.nextBoolean();
    if (doWriteFewerColumns) {
        writeColumnCount = 1 + r.nextInt(columnCount);
        if (writeColumnCount == columnCount) {
            doWriteFewerColumns = false;
        } else {
            writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
        }
    }
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
    // Use different separator values.
    byte[] separators = new byte[] { (byte) 9, (byte) 2, (byte) 3, (byte) 4, (byte) 5, (byte) 6, (byte) 7, (byte) 8 };
    LazySimpleSerDe serde = getSerDe(fieldNames, fieldTypes);
    LazySerDeParameters serdeParams = getSerDeParams(fieldNames, fieldTypes, separators);
    LazySimpleSerDe serde_fewer = null;
    LazySerDeParameters serdeParams_fewer = null;
    if (doWriteFewerColumns) {
        String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
        String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
        serde_fewer = getSerDe(fieldNames, fieldTypes);
        serdeParams_fewer = getSerDeParams(partialFieldNames, partialFieldTypes, separators);
    }
    testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, serdeParams, serdeParams_fewer, typeInfos, /* useIncludeColumns */
    false, /* doWriteFewerColumns */
    false, r);
    testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, serdeParams, serdeParams_fewer, typeInfos, /* useIncludeColumns */
    true, /* doWriteFewerColumns */
    false, r);
    if (doWriteFewerColumns) {
        testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, serdeParams, serdeParams_fewer, typeInfos, /* useIncludeColumns */
        false, /* doWriteFewerColumns */
        true, r);
        testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, serdeParams, serdeParams_fewer, typeInfos, /* useIncludeColumns */
        true, /* doWriteFewerColumns */
        true, r);
    }
}
Also used : SerdeRandomRowSource(org.apache.hadoop.hive.serde2.SerdeRandomRowSource) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 47 with LazySerDeParameters

use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.

the class LazySimpleSerDe method initialize.

/**
 * Initialize the SerDe given the parameters. serialization.format: separator
 * char or byte code (only supports byte-value up to 127) columns:
 * ","-separated column names columns.types: ",", ":", or ";"-separated column
 * types
 */
@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
    super.initialize(configuration, tableProperties, partitionProperties);
    serdeParams = new LazySerDeParameters(configuration, this.properties, getClass().getName());
    // Create the ObjectInspectors for the fields
    cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams.getColumnNames(), serdeParams.getColumnTypes(), new LazyObjectInspectorParametersImpl(serdeParams));
    cachedLazyStruct = (LazyStruct) LazyFactory.createLazyObject(cachedObjectInspector);
    serializedSize = 0;
    stats = new SerDeStats();
    lastOperationSerialize = false;
    lastOperationDeserialize = false;
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats) LazyObjectInspectorParametersImpl(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl)

Aggregations

LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)29 Properties (java.util.Properties)21 Text (org.apache.hadoop.io.Text)21 Configuration (org.apache.hadoop.conf.Configuration)20 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)19 Test (org.junit.Test)18 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)14 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)11 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)10 Mutation (org.apache.accumulo.core.data.Mutation)8 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)8 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)8 LazyStruct (org.apache.hadoop.hive.serde2.lazy.LazyStruct)8 LazySimpleDeserializeRead (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead)8 ArrayList (java.util.ArrayList)7 ByteStream (org.apache.hadoop.hive.serde2.ByteStream)7 LazySimpleSerializeWrite (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite)7 Entry (java.util.Map.Entry)5 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)5 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)5