Search in sources :

Example 6 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFast.

private void testLazyBinaryFast(SerdeRandomRowSource source, Object[][] rows, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
    }
    LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(writeColumnCount);
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        lazyBinarySerializeWrite.set(output);
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
        // column.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyBinaryStruct lazyBinaryStruct;
        if (doWriteFewerColumns) {
            lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
        } else {
            lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
            Writable writable = (Writable) row[index];
            Object object = lazyBinaryStruct.getField(index);
            if (writable == null || object == null) {
                if (writable != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(writable)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[writeColumnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazyBinary seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < writeColumnCount; index++) {
            serdeRow[index] = row[index];
        }
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
        }
        BytesWritable bytesWritable = new BytesWritable(Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
        byte[] bytes1 = bytesWritable.getBytes();
        BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
        byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
        if (bytes1.length != bytes2.length) {
            fail("SerializeWrite length " + bytes2.length + " and " + "SerDe serialization length " + bytes1.length + " do not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // When doWriteFewerColumns, try to read more fields than exist in buffer.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serdeBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 7 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class TestLazySimpleFast method testLazySimpleFast.

private void testLazySimpleFast(SerdeRandomRowSource source, Object[][] rows, LazySimpleSerDe serde, StructObjectInspector rowOI, LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
    }
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        LazySimpleSerializeWrite lazySimpleSerializeWrite = new LazySimpleSerializeWrite(columnCount, separator, serdeParams);
        lazySimpleSerializeWrite.set(output);
        for (int index = 0; index < columnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false, separator, serdeParams);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        byte[] bytes = bytesWritable.getBytes();
        int length = bytesWritable.getLength();
        lazySimpleDeserializeRead.set(bytes, 0, length);
        char[] chars = new char[length];
        for (int c = 0; c < chars.length; c++) {
            chars[c] = (char) (bytes[c] & 0xFF);
        }
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
        Object[] row = rows[i];
        for (int index = 0; index < columnCount; index++) {
            PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
            Writable writable = (Writable) row[index];
            LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index);
            Object object;
            if (lazyPrimitive != null) {
                object = lazyPrimitive.getWritableObject();
            } else {
                object = null;
            }
            if (writable == null || object == null) {
                if (writable != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(writable)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    byte[][] serdeBytes = new byte[rowCount][];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[columnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazySimple seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < columnCount; index++) {
            serdeRow[index] = row[index];
        }
        Text serialized = (Text) serde.serialize(serdeRow, rowOI);
        byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
        byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match");
        }
        serdeBytes[i] = copyBytes(serialized);
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false, separator, serdeParams);
        byte[] bytes = serdeBytes[i];
        lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output)

Example 8 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class PlanUtils method getTableDesc.

public static TableDesc getTableDesc(Class<? extends Deserializer> serdeClass, String separatorCode, String columns, String columnTypes, boolean lastColumnTakesRestOfTheLine, boolean useDelimitedJSON, String fileFormat) {
    Properties properties = Utilities.makeProperties(serdeConstants.SERIALIZATION_FORMAT, separatorCode, serdeConstants.LIST_COLUMNS, columns);
    if (!separatorCode.equals(Integer.toString(Utilities.ctrlaCode))) {
        properties.setProperty(serdeConstants.FIELD_DELIM, separatorCode);
    }
    if (columnTypes != null) {
        properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, columnTypes);
    }
    if (lastColumnTakesRestOfTheLine) {
        properties.setProperty(serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST, "true");
    }
    // Right now, it is hard-coded in the code
    if (useDelimitedJSON) {
        serdeClass = DelimitedJSONSerDe.class;
    }
    Class inputFormat, outputFormat;
    // get the input & output file formats
    if ("SequenceFile".equalsIgnoreCase(fileFormat)) {
        inputFormat = SequenceFileInputFormat.class;
        outputFormat = SequenceFileOutputFormat.class;
    } else if ("RCFile".equalsIgnoreCase(fileFormat)) {
        inputFormat = RCFileInputFormat.class;
        outputFormat = RCFileOutputFormat.class;
        assert serdeClass == ColumnarSerDe.class;
    } else if (LLAP_OUTPUT_FORMAT_KEY.equalsIgnoreCase(fileFormat)) {
        inputFormat = TextInputFormat.class;
        outputFormat = LlapOutputFormat.class;
        properties.setProperty(hive_metastoreConstants.META_TABLE_STORAGE, LLAP_OF_SH_CLASS);
    } else {
        // use TextFile by default
        inputFormat = TextInputFormat.class;
        outputFormat = IgnoreKeyTextOutputFormat.class;
    }
    properties.setProperty(serdeConstants.SERIALIZATION_LIB, serdeClass.getName());
    return new TableDesc(inputFormat, outputFormat, properties);
}
Also used : RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Properties(java.util.Properties)

Example 9 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class DDLTask method describeTable.

/**
 * Write the description of a table to a file.
 *
 * @param db
 *          The database in question.
 * @param descTbl
 *          This is the table we're interested in.
 * @return Returns 0 when execution succeeds and above 0 if it fails.
 * @throws HiveException
 *           Throws this exception if an unexpected error occurs.
 * @throws MetaException
 */
private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, MetaException {
    String colPath = descTbl.getColumnPath();
    String tableName = descTbl.getTableName();
    // describe the table - populate the output stream
    Table tbl = db.getTable(tableName, false);
    if (tbl == null) {
        throw new HiveException(ErrorMsg.INVALID_TABLE, tableName);
    }
    Partition part = null;
    if (descTbl.getPartSpec() != null) {
        part = db.getPartition(tbl, descTbl.getPartSpec(), false);
        if (part == null) {
            throw new HiveException(ErrorMsg.INVALID_PARTITION, StringUtils.join(descTbl.getPartSpec().keySet(), ','), tableName);
        }
        tbl = part.getTable();
    }
    DataOutputStream outStream = getOutputStream(descTbl.getResFile());
    try {
        LOG.debug("DDLTask: got data for {}", tableName);
        List<FieldSchema> cols = null;
        List<ColumnStatisticsObj> colStats = null;
        Deserializer deserializer = tbl.getDeserializer(true);
        if (deserializer instanceof AbstractSerDe) {
            String errorMsgs = ((AbstractSerDe) deserializer).getConfigurationErrors();
            if (errorMsgs != null && !errorMsgs.isEmpty()) {
                throw new SQLException(errorMsgs);
            }
        }
        if (colPath.equals(tableName)) {
            cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ? tbl.getCols() : part.getCols();
            if (!descTbl.isFormatted()) {
                cols.addAll(tbl.getPartCols());
            }
            if (tbl.isPartitioned() && part == null) {
                // No partitioned specified for partitioned table, lets fetch all.
                Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
                Map<String, Long> valueMap = new HashMap<>();
                Map<String, Boolean> stateMap = new HashMap<>();
                for (String stat : StatsSetupConst.supportedStats) {
                    valueMap.put(stat, 0L);
                    stateMap.put(stat, true);
                }
                PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                int numParts = 0;
                for (Partition partition : parts) {
                    Map<String, String> props = partition.getParameters();
                    Boolean state = StatsSetupConst.areBasicStatsUptoDate(props);
                    for (String stat : StatsSetupConst.supportedStats) {
                        stateMap.put(stat, stateMap.get(stat) && state);
                        if (props != null && props.get(stat) != null) {
                            valueMap.put(stat, valueMap.get(stat) + Long.parseLong(props.get(stat)));
                        }
                    }
                    numParts++;
                }
                for (String stat : StatsSetupConst.supportedStats) {
                    StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(stateMap.get(stat)));
                    tblProps.put(stat, valueMap.get(stat).toString());
                }
                tblProps.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(numParts));
                tbl.setParameters(tblProps);
            }
        } else {
            if (descTbl.isFormatted()) {
                // when column name is specified in describe table DDL, colPath will
                // will be table_name.column_name
                String colName = colPath.split("\\.")[1];
                String[] dbTab = Utilities.getDbTableName(tableName);
                List<String> colNames = new ArrayList<String>();
                colNames.add(colName.toLowerCase());
                if (null == part) {
                    if (tbl.isPartitioned()) {
                        Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
                        if (tbl.isPartitionKey(colNames.get(0))) {
                            FieldSchema partCol = tbl.getPartColByName(colNames.get(0));
                            cols = Collections.singletonList(partCol);
                            PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                            ColumnInfo ci = new ColumnInfo(partCol.getName(), TypeInfoUtils.getTypeInfoFromTypeString(partCol.getType()), null, false);
                            ColStatistics cs = StatsUtils.getColStatsForPartCol(ci, parts, conf);
                            ColumnStatisticsData data = new ColumnStatisticsData();
                            ColStatistics.Range r = cs.getRange();
                            StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(), cs.getNumNulls(), cs.getCountDistint(), null, cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses());
                            ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data);
                            colStats = Collections.singletonList(cso);
                            StatsSetupConst.setColumnStatsState(tblProps, colNames);
                        } else {
                            cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
                            List<String> parts = db.getPartitionNames(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), (short) -1);
                            AggrStats aggrStats = db.getAggrColStatsFor(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames, parts);
                            colStats = aggrStats.getColStats();
                            if (parts.size() == aggrStats.getPartsFound()) {
                                StatsSetupConst.setColumnStatsState(tblProps, colNames);
                            } else {
                                StatsSetupConst.removeColumnStatsState(tblProps, colNames);
                            }
                        }
                        tbl.setParameters(tblProps);
                    } else {
                        cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
                        colStats = db.getTableColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames);
                    }
                } else {
                    List<String> partitions = new ArrayList<String>();
                    partitions.add(part.getName());
                    cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
                    colStats = db.getPartitionColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), partitions, colNames).get(part.getName());
                }
            } else {
                cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
            }
        }
        PrimaryKeyInfo pkInfo = null;
        ForeignKeyInfo fkInfo = null;
        UniqueConstraint ukInfo = null;
        NotNullConstraint nnInfo = null;
        DefaultConstraint dInfo = null;
        CheckConstraint cInfo = null;
        if (descTbl.isExt() || descTbl.isFormatted()) {
            pkInfo = db.getPrimaryKeys(tbl.getDbName(), tbl.getTableName());
            fkInfo = db.getForeignKeys(tbl.getDbName(), tbl.getTableName());
            ukInfo = db.getUniqueConstraints(tbl.getDbName(), tbl.getTableName());
            nnInfo = db.getNotNullConstraints(tbl.getDbName(), tbl.getTableName());
            dInfo = db.getDefaultConstraints(tbl.getDbName(), tbl.getTableName());
            cInfo = db.getCheckConstraints(tbl.getDbName(), tbl.getTableName());
        }
        fixDecimalColumnTypeName(cols);
        // In case the query is served by HiveServer2, don't pad it with spaces,
        // as HiveServer2 output is consumed by JDBC/ODBC clients.
        boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
        formatter.describeTable(outStream, colPath, tableName, tbl, part, cols, descTbl.isFormatted(), descTbl.isExt(), isOutputPadded, colStats, pkInfo, fkInfo, ukInfo, nnInfo, dInfo, cInfo);
        LOG.debug("DDLTask: written data for {}", tableName);
    } catch (SQLException e) {
        throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
    } finally {
        IOUtils.closeStream(outStream);
    }
    return 0;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLException(java.sql.SQLException) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) AggrStats(org.apache.hadoop.hive.metastore.api.AggrStats) DataOutputStream(java.io.DataOutputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) PrimaryKeyInfo(org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo) ForeignKeyInfo(org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) TextMetaDataTable(org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable) Table(org.apache.hadoop.hive.ql.metadata.Table) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 10 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class DDLTask method alterTableAlterPart.

/**
 * Alter partition column type in a table
 *
 * @param db
 *          Database to rename the partition.
 * @param alterPartitionDesc
 *          change partition column type.
 * @return Returns 0 when execution succeeds and above 0 if it fails.
 * @throws HiveException
 */
private int alterTableAlterPart(Hive db, AlterTableAlterPartDesc alterPartitionDesc) throws HiveException {
    Table tbl = db.getTable(alterPartitionDesc.getTableName(), true);
    // This is checked by DDLSemanticAnalyzer
    assert (tbl.isPartitioned());
    List<FieldSchema> newPartitionKeys = new ArrayList<FieldSchema>();
    // with a non null value before trying to alter the partition column type.
    try {
        Set<Partition> partitions = db.getAllPartitionsOf(tbl);
        int colIndex = -1;
        for (FieldSchema col : tbl.getTTable().getPartitionKeys()) {
            colIndex++;
            if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
                break;
            }
        }
        if (colIndex == -1 || colIndex == tbl.getTTable().getPartitionKeys().size()) {
            throw new HiveException("Cannot find partition column " + alterPartitionDesc.getPartKeySpec().getName());
        }
        TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(alterPartitionDesc.getPartKeySpec().getType());
        ObjectInspector outputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
        Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI);
        // For all the existing partitions, check if the value can be type casted to a non-null object
        for (Partition part : partitions) {
            if (part.getName().equals(conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME))) {
                continue;
            }
            try {
                String value = part.getValues().get(colIndex);
                Object convertedValue = converter.convert(value);
                if (convertedValue == null) {
                    throw new HiveException(" Converting from " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + value + " resulted in NULL object");
                }
            } catch (Exception e) {
                throw new HiveException("Exception while converting " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + part.getValues().get(colIndex));
            }
        }
    } catch (Exception e) {
        throw new HiveException("Exception while checking type conversion of existing partition values to " + alterPartitionDesc.getPartKeySpec() + " : " + e.getMessage());
    }
    for (FieldSchema col : tbl.getTTable().getPartitionKeys()) {
        if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
            newPartitionKeys.add(alterPartitionDesc.getPartKeySpec());
        } else {
            newPartitionKeys.add(col);
        }
    }
    tbl.getTTable().setPartitionKeys(newPartitionKeys);
    db.alterTable(tbl, null);
    work.getInputs().add(new ReadEntity(tbl));
    // We've already locked the table as the input, don't relock it as the output.
    addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
    return 0;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) TextMetaDataTable(org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) StatObjectConverter(org.apache.hadoop.hive.metastore.StatObjectConverter) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) HivePrivilegeObject(org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject) HiveLockObject(org.apache.hadoop.hive.ql.lockmgr.HiveLockObject) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)77 ArrayList (java.util.ArrayList)72 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)48 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)48 Test (org.junit.Test)44 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)43 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)42 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)41 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)41 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)37 Text (org.apache.hadoop.io.Text)35 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)32 IOException (java.io.IOException)29 DateWritableV2 (org.apache.hadoop.hive.serde2.io.DateWritableV2)24 BytesWritable (org.apache.hadoop.io.BytesWritable)23 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)22 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)22 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)21 TimestampWritableV2 (org.apache.hadoop.hive.serde2.io.TimestampWritableV2)21 List (java.util.List)18