Search in sources :

Example 16 with TypeDescription

use of org.apache.orc.TypeDescription in project hive by apache.

the class TestOrcFile method testUnionAndTimestamp.

/**
     * We test union, timestamp, and decimal separately since we need to make the
     * object inspector manually. (The Hive reflection-based doesn't handle
     * them properly.)
     */
@Test
public void testUnionAndTimestamp() throws Exception {
    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT).addFieldNames("time").addFieldNames("union").addFieldNames("decimal").addSubtypes(1).addSubtypes(2).addSubtypes(5).build());
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.TIMESTAMP).build());
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.UNION).addSubtypes(3).addSubtypes(4).build());
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build());
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRING).build());
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.DECIMAL).build());
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = OrcStruct.createObjectInspector(0, types);
    }
    HiveDecimal maxValue = HiveDecimal.create("10000000000000000000");
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(1000).compress(CompressionKind.NONE).batchSize(1000).bufferSize(100).blockPadding(false));
    OrcStruct row = new OrcStruct(3);
    OrcUnion union = new OrcUnion();
    row.setFieldValue(1, union);
    row.setFieldValue(0, new TimestampWritable(Timestamp.valueOf("2000-03-12 15:00:00")));
    HiveDecimal value = HiveDecimal.create("12345678.6547456");
    row.setFieldValue(2, new HiveDecimalWritable(value));
    union.set((byte) 0, new IntWritable(42));
    writer.addRow(row);
    row.setFieldValue(0, new TimestampWritable(Timestamp.valueOf("2000-03-20 12:00:00.123456789")));
    union.set((byte) 1, new Text("hello"));
    value = HiveDecimal.create("-5643.234");
    row.setFieldValue(2, new HiveDecimalWritable(value));
    writer.addRow(row);
    row.setFieldValue(0, null);
    row.setFieldValue(1, null);
    row.setFieldValue(2, null);
    writer.addRow(row);
    row.setFieldValue(1, union);
    union.set((byte) 0, null);
    writer.addRow(row);
    union.set((byte) 1, null);
    writer.addRow(row);
    union.set((byte) 0, new IntWritable(200000));
    row.setFieldValue(0, new TimestampWritable(Timestamp.valueOf("1970-01-01 00:00:00")));
    value = HiveDecimal.create("10000000000000000000");
    row.setFieldValue(2, new HiveDecimalWritable(value));
    writer.addRow(row);
    Random rand = new Random(42);
    for (int i = 1970; i < 2038; ++i) {
        row.setFieldValue(0, new TimestampWritable(Timestamp.valueOf(i + "-05-05 12:34:56." + i)));
        if ((i & 1) == 0) {
            union.set((byte) 0, new IntWritable(i * i));
        } else {
            union.set((byte) 1, new Text(Integer.toString(i * i)));
        }
        value = HiveDecimal.create(new BigInteger(64, rand), rand.nextInt(18));
        row.setFieldValue(2, new HiveDecimalWritable(value));
        if (maxValue.compareTo(value) < 0) {
            maxValue = value;
        }
        writer.addRow(row);
    }
    // let's add a lot of constant rows to test the rle
    row.setFieldValue(0, null);
    union.set((byte) 0, new IntWritable(1732050807));
    row.setFieldValue(2, null);
    for (int i = 0; i < 5000; ++i) {
        writer.addRow(row);
    }
    union.set((byte) 0, new IntWritable(0));
    writer.addRow(row);
    union.set((byte) 0, new IntWritable(10));
    writer.addRow(row);
    union.set((byte) 0, new IntWritable(138));
    writer.addRow(row);
    writer.close();
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    TypeDescription schema = writer.getSchema();
    assertEquals(5, schema.getMaximumId());
    boolean[] expected = new boolean[] { false, false, false, false, false, false };
    boolean[] included = OrcUtils.includeColumns("", schema);
    assertEquals(true, Arrays.equals(expected, included));
    expected = new boolean[] { false, true, false, false, false, true };
    included = OrcUtils.includeColumns("time,decimal", schema);
    assertEquals(true, Arrays.equals(expected, included));
    expected = new boolean[] { false, false, true, true, true, false };
    included = OrcUtils.includeColumns("union", schema);
    assertEquals(true, Arrays.equals(expected, included));
    assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
    assertEquals(5077, reader.getNumberOfRows());
    DecimalColumnStatistics stats = (DecimalColumnStatistics) reader.getStatistics()[5];
    assertEquals(71, stats.getNumberOfValues());
    assertEquals(HiveDecimal.create("-5643.234"), stats.getMinimum());
    assertEquals(maxValue, stats.getMaximum());
    // TODO: fix this
    //    assertEquals(null,stats.getSum());
    int stripeCount = 0;
    int rowCount = 0;
    long currentOffset = -1;
    for (StripeInformation stripe : reader.getStripes()) {
        stripeCount += 1;
        rowCount += stripe.getNumberOfRows();
        if (currentOffset < 0) {
            currentOffset = stripe.getOffset() + stripe.getLength();
        } else {
            assertEquals(currentOffset, stripe.getOffset());
            currentOffset += stripe.getLength();
        }
    }
    assertEquals(reader.getNumberOfRows(), rowCount);
    assertEquals(2, stripeCount);
    assertEquals(reader.getContentLength(), currentOffset);
    RecordReader rows = reader.rows();
    assertEquals(0, rows.getRowNumber());
    assertEquals(0.0, rows.getProgress(), 0.000001);
    assertEquals(true, rows.hasNext());
    row = (OrcStruct) rows.next(null);
    assertEquals(1, rows.getRowNumber());
    inspector = reader.getObjectInspector();
    assertEquals("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>", inspector.getTypeName());
    assertEquals(new TimestampWritable(Timestamp.valueOf("2000-03-12 15:00:00")), row.getFieldValue(0));
    union = (OrcUnion) row.getFieldValue(1);
    assertEquals(0, union.getTag());
    assertEquals(new IntWritable(42), union.getObject());
    assertEquals(new HiveDecimalWritable(HiveDecimal.create("12345678.6547456")), row.getFieldValue(2));
    row = (OrcStruct) rows.next(row);
    assertEquals(2, rows.getRowNumber());
    assertEquals(new TimestampWritable(Timestamp.valueOf("2000-03-20 12:00:00.123456789")), row.getFieldValue(0));
    assertEquals(1, union.getTag());
    assertEquals(new Text("hello"), union.getObject());
    assertEquals(new HiveDecimalWritable(HiveDecimal.create("-5643.234")), row.getFieldValue(2));
    row = (OrcStruct) rows.next(row);
    assertEquals(null, row.getFieldValue(0));
    assertEquals(null, row.getFieldValue(1));
    assertEquals(null, row.getFieldValue(2));
    row = (OrcStruct) rows.next(row);
    assertEquals(null, row.getFieldValue(0));
    union = (OrcUnion) row.getFieldValue(1);
    assertEquals(0, union.getTag());
    assertEquals(null, union.getObject());
    assertEquals(null, row.getFieldValue(2));
    row = (OrcStruct) rows.next(row);
    assertEquals(null, row.getFieldValue(0));
    assertEquals(1, union.getTag());
    assertEquals(null, union.getObject());
    assertEquals(null, row.getFieldValue(2));
    row = (OrcStruct) rows.next(row);
    assertEquals(new TimestampWritable(Timestamp.valueOf("1970-01-01 00:00:00")), row.getFieldValue(0));
    assertEquals(new IntWritable(200000), union.getObject());
    assertEquals(new HiveDecimalWritable(HiveDecimal.create("10000000000000000000")), row.getFieldValue(2));
    rand = new Random(42);
    for (int i = 1970; i < 2038; ++i) {
        row = (OrcStruct) rows.next(row);
        assertEquals(new TimestampWritable(Timestamp.valueOf(i + "-05-05 12:34:56." + i)), row.getFieldValue(0));
        if ((i & 1) == 0) {
            assertEquals(0, union.getTag());
            assertEquals(new IntWritable(i * i), union.getObject());
        } else {
            assertEquals(1, union.getTag());
            assertEquals(new Text(Integer.toString(i * i)), union.getObject());
        }
        assertEquals(new HiveDecimalWritable(HiveDecimal.create(new BigInteger(64, rand), rand.nextInt(18))), row.getFieldValue(2));
    }
    for (int i = 0; i < 5000; ++i) {
        row = (OrcStruct) rows.next(row);
        assertEquals(new IntWritable(1732050807), union.getObject());
    }
    row = (OrcStruct) rows.next(row);
    assertEquals(new IntWritable(0), union.getObject());
    row = (OrcStruct) rows.next(row);
    assertEquals(new IntWritable(10), union.getObject());
    row = (OrcStruct) rows.next(row);
    assertEquals(new IntWritable(138), union.getObject());
    assertEquals(false, rows.hasNext());
    assertEquals(1.0, rows.getProgress(), 0.00001);
    assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
    rows.seekToRow(1);
    row = (OrcStruct) rows.next(row);
    assertEquals(new TimestampWritable(Timestamp.valueOf("2000-03-20 12:00:00.123456789")), row.getFieldValue(0));
    assertEquals(1, union.getTag());
    assertEquals(new Text("hello"), union.getObject());
    assertEquals(new HiveDecimalWritable(HiveDecimal.create("-5643.234")), row.getFieldValue(2));
    rows.close();
}
Also used : HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) OrcProto(org.apache.orc.OrcProto) DecimalColumnStatistics(org.apache.orc.DecimalColumnStatistics) ArrayList(java.util.ArrayList) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) Text(org.apache.hadoop.io.Text) Random(java.util.Random) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) BigInteger(java.math.BigInteger) TypeDescription(org.apache.orc.TypeDescription) IntWritable(org.apache.hadoop.io.IntWritable) StripeInformation(org.apache.orc.StripeInformation) Test(org.junit.Test)

Example 17 with TypeDescription

use of org.apache.orc.TypeDescription in project hive by apache.

the class EncodedTreeReaderFactory method createEncodedTreeReader.

private static TreeReader createEncodedTreeReader(TypeDescription schema, List<OrcProto.ColumnEncoding> encodings, OrcEncodedColumnBatch batch, CompressionCodec codec, TreeReaderFactory.Context context) throws IOException {
    int columnIndex = schema.getId();
    ColumnStreamData[] streamBuffers = null;
    List<ColumnVector> vectors = null;
    if (batch.hasData(columnIndex)) {
        streamBuffers = batch.getColumnData(columnIndex);
    } else if (batch.hasVectors(columnIndex)) {
        vectors = batch.getColumnVectors(columnIndex);
    } else {
        throw new AssertionError("Batch has no data for " + columnIndex + ": " + batch);
    }
    // EncodedColumnBatch is already decompressed, we don't really need to pass codec.
    // But we need to know if the original data is compressed or not. This is used to skip
    // positions in row index properly. If the file is originally compressed,
    // then 1st position (compressed offset) in row index should be skipped to get
    // uncompressed offset, else 1st position should not be skipped.
    // TODO: there should be a better way to do this, code just needs to be modified
    OrcProto.ColumnEncoding columnEncoding = encodings.get(columnIndex);
    // stream buffers are arranged in enum order of stream kind
    ColumnStreamData present = null, data = null, dictionary = null, lengths = null, secondary = null;
    if (streamBuffers != null) {
        present = streamBuffers[OrcProto.Stream.Kind.PRESENT_VALUE];
        data = streamBuffers[OrcProto.Stream.Kind.DATA_VALUE];
        dictionary = streamBuffers[OrcProto.Stream.Kind.DICTIONARY_DATA_VALUE];
        lengths = streamBuffers[OrcProto.Stream.Kind.LENGTH_VALUE];
        secondary = streamBuffers[OrcProto.Stream.Kind.SECONDARY_VALUE];
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("columnIndex: {} columnType: {} streamBuffers.length: {} vectors: {} columnEncoding: {}" + " present: {} data: {} dictionary: {} lengths: {} secondary: {} tz: {}", columnIndex, schema, streamBuffers == null ? 0 : streamBuffers.length, vectors == null ? 0 : vectors.size(), columnEncoding, present != null, data, dictionary != null, lengths != null, secondary != null, context.getWriterTimezone());
    }
    // TODO: get rid of the builders - they serve no purpose... just call ctors directly.
    switch(schema.getCategory()) {
        case BINARY:
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
        case FLOAT:
        case DOUBLE:
        case CHAR:
        case VARCHAR:
        case STRING:
        case DECIMAL:
        case TIMESTAMP:
        case DATE:
            return getPrimitiveTreeReader(columnIndex, schema, codec, columnEncoding, present, data, dictionary, lengths, secondary, context, vectors);
        case LIST:
            // Not currently supported.
            assert vectors == null;
            TypeDescription elementType = schema.getChildren().get(0);
            TreeReader elementReader = createEncodedTreeReader(elementType, encodings, batch, codec, context);
            return ListStreamReader.builder().setColumnIndex(columnIndex).setColumnEncoding(columnEncoding).setCompressionCodec(codec).setPresentStream(present).setLengthStream(lengths).setElementReader(elementReader).setContext(context).build();
        case MAP:
            // Not currently supported.
            assert vectors == null;
            TypeDescription keyType = schema.getChildren().get(0);
            TypeDescription valueType = schema.getChildren().get(1);
            TreeReader keyReader = createEncodedTreeReader(keyType, encodings, batch, codec, context);
            TreeReader valueReader = createEncodedTreeReader(valueType, encodings, batch, codec, context);
            return MapStreamReader.builder().setColumnIndex(columnIndex).setColumnEncoding(columnEncoding).setCompressionCodec(codec).setPresentStream(present).setLengthStream(lengths).setKeyReader(keyReader).setValueReader(valueReader).setContext(context).build();
        case STRUCT:
            {
                // Not currently supported.
                assert vectors == null;
                int childCount = schema.getChildren().size();
                TreeReader[] childReaders = new TreeReader[childCount];
                for (int i = 0; i < childCount; i++) {
                    TypeDescription childType = schema.getChildren().get(i);
                    childReaders[i] = createEncodedTreeReader(childType, encodings, batch, codec, context);
                }
                return StructStreamReader.builder().setColumnIndex(columnIndex).setCompressionCodec(codec).setColumnEncoding(columnEncoding).setPresentStream(present).setChildReaders(childReaders).setContext(context).build();
            }
        case UNION:
            {
                // Not currently supported.
                assert vectors == null;
                int childCount = schema.getChildren().size();
                TreeReader[] childReaders = new TreeReader[childCount];
                for (int i = 0; i < childCount; i++) {
                    TypeDescription childType = schema.getChildren().get(i);
                    childReaders[i] = createEncodedTreeReader(childType, encodings, batch, codec, context);
                }
                return UnionStreamReader.builder().setColumnIndex(columnIndex).setCompressionCodec(codec).setColumnEncoding(columnEncoding).setPresentStream(present).setDataStream(data).setChildReaders(childReaders).setContext(context).build();
            }
        default:
            throw new UnsupportedOperationException("Data type not supported: " + schema);
    }
}
Also used : OrcProto(org.apache.orc.OrcProto) TypeDescription(org.apache.orc.TypeDescription) ColumnStreamData(org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector)

Example 18 with TypeDescription

use of org.apache.orc.TypeDescription in project hive by apache.

the class OrcInputFormat method createOptionsForReader.

static Reader.Options createOptionsForReader(Configuration conf) {
    /**
     * Do we have schema on read in the configuration variables?
     */
    TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE);
    Reader.Options readerOptions = new Reader.Options().schema(schema);
    // TODO: Convert genIncludedColumns and setSearchArgument to use TypeDescription.
    final List<OrcProto.Type> schemaTypes = OrcUtils.getOrcTypes(schema);
    readerOptions.include(OrcInputFormat.genIncludedColumns(schema, conf));
    OrcInputFormat.setSearchArgument(readerOptions, schemaTypes, conf, true);
    return readerOptions;
}
Also used : TypeDescription(org.apache.orc.TypeDescription) StatsProvidingRecordReader(org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader) BatchToRowReader(org.apache.hadoop.hive.ql.io.BatchToRowReader)

Example 19 with TypeDescription

use of org.apache.orc.TypeDescription in project hive by apache.

the class OrcInputFormat method genIncludedColumnsReverse.

/**
   * Reverses genIncludedColumns; produces the table columns indexes from ORC included columns.
   * @param readerSchema The ORC reader schema for the table.
   * @param included The included ORC columns.
   * @param isFullColumnMatch Whether full column match should be enforced (i.e. whether to expect
   *          that all the sub-columns or a complex type column should be included or excluded
   *          together in the included array. If false, any sub-column being included for a complex
   *          type is sufficient for the entire complex column to be included in the result.
   * @return The list of table column indexes.
   */
public static List<Integer> genIncludedColumnsReverse(TypeDescription readerSchema, boolean[] included, boolean isFullColumnMatch) {
    assert included != null;
    List<Integer> result = new ArrayList<>();
    List<TypeDescription> children = readerSchema.getChildren();
    for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) {
        TypeDescription child = children.get(columnNumber);
        int id = child.getId();
        int maxId = child.getMaximumId();
        if (id >= included.length || maxId >= included.length) {
            throw new AssertionError("Inconsistent includes: " + included.length + " elements; found column ID " + id);
        }
        boolean isIncluded = included[id];
        for (int col = id + 1; col <= maxId; ++col) {
            if (isFullColumnMatch && included[col] != isIncluded) {
                throw new AssertionError("Inconsistent includes: root column IDs are [" + id + ", " + maxId + "]; included[" + col + "] = " + included[col] + ", which is different " + " from the previous IDs of the same root column.");
            }
            isIncluded = isIncluded || included[col];
        }
        if (isIncluded) {
            result.add(columnNumber);
        }
    }
    return result;
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ArrayList(java.util.ArrayList) TypeDescription(org.apache.orc.TypeDescription)

Example 20 with TypeDescription

use of org.apache.orc.TypeDescription in project hive by apache.

the class OrcInputFormat method getDesiredRowTypeDescr.

/**
   * Generate the desired schema for reading the file.
   * @param conf the configuration
   * @param isAcidRead is this an acid format?
   * @param dataColumns the desired number of data columns for vectorized read
   * @return the desired schema or null if schema evolution isn't enabled
   * @throws IllegalArgumentException
   */
public static TypeDescription getDesiredRowTypeDescr(Configuration conf, boolean isAcidRead, int dataColumns) {
    String columnNameProperty = null;
    String columnTypeProperty = null;
    ArrayList<String> schemaEvolutionColumnNames = null;
    ArrayList<TypeDescription> schemaEvolutionTypeDescrs = null;
    boolean haveSchemaEvolutionProperties = false;
    if (isAcidRead || HiveConf.getBoolVar(conf, ConfVars.HIVE_SCHEMA_EVOLUTION)) {
        columnNameProperty = conf.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS);
        columnTypeProperty = conf.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES);
        haveSchemaEvolutionProperties = (columnNameProperty != null && columnTypeProperty != null);
        if (haveSchemaEvolutionProperties) {
            schemaEvolutionColumnNames = Lists.newArrayList(columnNameProperty.split(","));
            if (schemaEvolutionColumnNames.size() == 0) {
                haveSchemaEvolutionProperties = false;
            } else {
                schemaEvolutionTypeDescrs = typeDescriptionsFromHiveTypeProperty(columnTypeProperty, dataColumns);
                if (schemaEvolutionTypeDescrs.size() != Math.min(dataColumns, schemaEvolutionColumnNames.size())) {
                    haveSchemaEvolutionProperties = false;
                }
            }
        } else if (isAcidRead) {
            throw new IllegalArgumentException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg());
        }
    }
    if (haveSchemaEvolutionProperties) {
        if (LOG.isInfoEnabled()) {
            LOG.info("Using schema evolution configuration variables schema.evolution.columns " + schemaEvolutionColumnNames.toString() + " / schema.evolution.columns.types " + schemaEvolutionTypeDescrs.toString() + " (isAcidRead " + isAcidRead + ")");
        }
    } else {
        // Try regular properties;
        columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS);
        columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);
        if (columnTypeProperty == null || columnNameProperty == null) {
            return null;
        }
        schemaEvolutionColumnNames = Lists.newArrayList(columnNameProperty.split(","));
        if (schemaEvolutionColumnNames.size() == 0) {
            return null;
        }
        schemaEvolutionTypeDescrs = typeDescriptionsFromHiveTypeProperty(columnTypeProperty, dataColumns);
        if (schemaEvolutionTypeDescrs.size() != Math.min(dataColumns, schemaEvolutionColumnNames.size())) {
            return null;
        }
        // Find first virtual column and clip them off.
        int virtualColumnClipNum = -1;
        int columnNum = 0;
        for (String columnName : schemaEvolutionColumnNames) {
            if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(columnName)) {
                virtualColumnClipNum = columnNum;
                break;
            }
            columnNum++;
        }
        if (virtualColumnClipNum != -1 && virtualColumnClipNum < dataColumns) {
            schemaEvolutionColumnNames = Lists.newArrayList(schemaEvolutionColumnNames.subList(0, virtualColumnClipNum));
            schemaEvolutionTypeDescrs = Lists.newArrayList(schemaEvolutionTypeDescrs.subList(0, virtualColumnClipNum));
        }
        if (LOG.isInfoEnabled()) {
            LOG.info("Using column configuration variables columns " + schemaEvolutionColumnNames.toString() + " / columns.types " + schemaEvolutionTypeDescrs.toString() + " (isAcidRead " + isAcidRead + ")");
        }
    }
    // Desired schema does not include virtual columns or partition columns.
    TypeDescription result = TypeDescription.createStruct();
    for (int i = 0; i < schemaEvolutionTypeDescrs.size(); i++) {
        result.addField(schemaEvolutionColumnNames.get(i), schemaEvolutionTypeDescrs.get(i));
    }
    return result;
}
Also used : TypeDescription(org.apache.orc.TypeDescription)

Aggregations

TypeDescription (org.apache.orc.TypeDescription)24 ArrayList (java.util.ArrayList)6 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)5 Test (org.junit.Test)5 Path (org.apache.hadoop.fs.Path)4 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)4 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)4 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)4 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)4 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)4 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)4 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)4 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)4 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)4 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)4 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)4 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)4 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)3 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)3 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)3