Search in sources :

Example 1 with READ_ALL_COLUMNS

use of org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS in project presto by prestodb.

the class RcFileTester method assertFileContentsOld.

private static <K extends LongWritable, V extends BytesRefArrayWritable> void assertFileContentsOld(Type type, TempFile tempFile, Format format, Iterable<?> expectedValues) throws Exception {
    JobConf configuration = new JobConf(new Configuration(false));
    configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
    configuration.setBoolean(READ_ALL_COLUMNS, false);
    Properties schema = new Properties();
    schema.setProperty(META_TABLE_COLUMNS, "test");
    schema.setProperty(META_TABLE_COLUMN_TYPES, getJavaObjectInspector(type).getTypeName());
    Deserializer deserializer;
    if (format == Format.BINARY) {
        deserializer = new LazyBinaryColumnarSerDe();
    } else {
        deserializer = new ColumnarSerDe();
    }
    deserializer.initialize(configuration, schema);
    configuration.set(SERIALIZATION_LIB, deserializer.getClass().getName());
    InputFormat<K, V> inputFormat = new RCFileInputFormat<>();
    RecordReader<K, V> recordReader = inputFormat.getRecordReader(new FileSplit(new Path(tempFile.getFile().getAbsolutePath()), 0, tempFile.getFile().length(), (String[]) null), configuration, NULL);
    K key = recordReader.createKey();
    V value = recordReader.createValue();
    StructObjectInspector rowInspector = (StructObjectInspector) deserializer.getObjectInspector();
    StructField field = rowInspector.getStructFieldRef("test");
    Iterator<?> iterator = expectedValues.iterator();
    while (recordReader.next(key, value)) {
        Object expectedValue = iterator.next();
        Object rowData = deserializer.deserialize(value);
        Object actualValue = rowInspector.getStructFieldData(rowData, field);
        actualValue = decodeRecordReaderValue(type, actualValue);
        assertColumnValueEquals(type, actualValue, expectedValue);
    }
    assertFalse(iterator.hasNext());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) FileSplit(org.apache.hadoop.mapred.FileSplit) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) StructObject(org.apache.hadoop.hive.serde2.StructObject) JobConf(org.apache.hadoop.mapred.JobConf) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 2 with READ_ALL_COLUMNS

use of org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS in project presto by prestodb.

the class OrcTester method assertFileContentsDwrfHive.

private static void assertFileContentsDwrfHive(List<Type> types, TempFile tempFile, List<List<?>> expectedValues) throws Exception {
    JobConf configuration = new JobConf(new Configuration(false));
    configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
    configuration.setBoolean(READ_ALL_COLUMNS, false);
    Path path = new Path(tempFile.getFile().getAbsolutePath());
    com.facebook.hive.orc.Reader reader = com.facebook.hive.orc.OrcFile.createReader(path.getFileSystem(configuration), path, configuration);
    boolean[] include = new boolean[reader.getTypes().size() + 100000];
    Arrays.fill(include, true);
    com.facebook.hive.orc.RecordReader recordReader = reader.rows(include);
    StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector();
    List<StructField> fields = makeColumnNames(types.size()).stream().map(rowInspector::getStructFieldRef).collect(toList());
    Object rowData = null;
    int rowCount = 0;
    while (recordReader.hasNext()) {
        rowData = recordReader.next(rowData);
        for (int i = 0; i < fields.size(); i++) {
            Object actualValue = rowInspector.getStructFieldData(rowData, fields.get(i));
            actualValue = decodeRecordReaderValue(types.get(i), actualValue);
            assertColumnValueEquals(types.get(i), actualValue, expectedValues.get(i).get(rowCount));
        }
        rowCount++;
    }
    assertEquals(rowCount, expectedValues.get(0).size());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) OrcLazyObject(com.facebook.hive.orc.lazy.OrcLazyObject) JobConf(org.apache.hadoop.mapred.JobConf) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 3 with READ_ALL_COLUMNS

use of org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS in project presto by prestodb.

the class OrcTester method assertFileContentsOrcHive.

private static void assertFileContentsOrcHive(List<Type> types, TempFile tempFile, List<List<?>> expectedValues) throws Exception {
    JobConf configuration = new JobConf(new Configuration(false));
    configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
    configuration.setBoolean(READ_ALL_COLUMNS, false);
    Reader reader = OrcFile.createReader(new Path(tempFile.getFile().getAbsolutePath()), new ReaderOptions(configuration));
    org.apache.hadoop.hive.ql.io.orc.RecordReader recordReader = reader.rows();
    StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector();
    List<StructField> fields = makeColumnNames(types.size()).stream().map(rowInspector::getStructFieldRef).collect(toList());
    Object rowData = null;
    int rowCount = 0;
    while (recordReader.hasNext()) {
        rowData = recordReader.next(rowData);
        for (int i = 0; i < fields.size(); i++) {
            Object actualValue = rowInspector.getStructFieldData(rowData, fields.get(i));
            actualValue = decodeRecordReaderValue(types.get(i), actualValue);
            assertColumnValueEquals(types.get(i), actualValue, expectedValues.get(i).get(rowCount));
        }
        rowCount++;
    }
    assertEquals(rowCount, expectedValues.get(0).size());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) ReaderOptions(org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) OrcLazyObject(com.facebook.hive.orc.lazy.OrcLazyObject) JobConf(org.apache.hadoop.mapred.JobConf) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

Configuration (org.apache.hadoop.conf.Configuration)3 Path (org.apache.hadoop.fs.Path)3 ObjectInspectorFactory.getStandardStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector)3 SettableStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector)3 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3 JobConf (org.apache.hadoop.mapred.JobConf)3 OrcLazyObject (com.facebook.hive.orc.lazy.OrcLazyObject)2 Properties (java.util.Properties)1 RCFileInputFormat (org.apache.hadoop.hive.ql.io.RCFileInputFormat)1 ReaderOptions (org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions)1 Reader (org.apache.hadoop.hive.ql.io.orc.Reader)1 Deserializer (org.apache.hadoop.hive.serde2.Deserializer)1 StructObject (org.apache.hadoop.hive.serde2.StructObject)1 ColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe)1 LazyBinaryColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe)1 FileSplit (org.apache.hadoop.mapred.FileSplit)1