Examples with Deserializer - org.apache.hadoop.hive.serde2.Deserializer

Example 21 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class MapOperator method initEmptyInputChildren.

/*
   * This is the same as the setChildren method below but for empty tables.
   * It takes care of the following:
   * 1. Create the right object inspector.
   * 2. Set up the childrenOpToOI with the object inspector.
   * So as to ensure that the initialization happens correctly.
   */
public void initEmptyInputChildren(List<Operator<?>> children, Configuration hconf) throws SerDeException, Exception {
    setChildOperators(children);
    Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf);
    for (Operator<?> child : children) {
        TableScanOperator tsOp = (TableScanOperator) child;
        StructObjectInspector soi = null;
        PartitionDesc partDesc = conf.getAliasToPartnInfo().get(tsOp.getConf().getAlias());
        Configuration newConf = tableNameToConf.get(partDesc.getTableDesc().getTableName());
        Deserializer serde = partDesc.getTableDesc().getDeserializer();
        partDesc.setProperties(partDesc.getProperties());
        MapOpCtx opCtx = new MapOpCtx(tsOp.getConf().getAlias(), child, partDesc);
        StructObjectInspector tableRowOI = (StructObjectInspector) serde.getObjectInspector();
        initObjectInspector(newConf, opCtx, tableRowOI);
        soi = opCtx.rowObjectInspector;
        child.getParentOperators().add(this);
        childrenOpToOI.put(child, soi);
    }
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 22 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class MapOperator method getConvertedOI.

// Return the mapping for table descriptor to the expected table OI
/**
   * Traverse all the partitions for a table, and get the OI for the table.
   * Note that a conversion is required if any of the partition OI is different
   * from the table OI. For eg. if the query references table T (partitions P1, P2),
   * and P1's schema is same as T, whereas P2's scheme is different from T, conversion
   * might be needed for both P1 and P2, since SettableOI might be needed for T
   */
private Map<TableDesc, StructObjectInspector> getConvertedOI(Map<String, Configuration> tableToConf) throws HiveException {
    Map<TableDesc, StructObjectInspector> tableDescOI = new HashMap<TableDesc, StructObjectInspector>();
    Set<TableDesc> identityConverterTableDesc = new HashSet<TableDesc>();
    try {
        Map<ObjectInspector, Boolean> oiSettableProperties = new HashMap<ObjectInspector, Boolean>();
        for (Path onefile : conf.getPathToAliases().keySet()) {
            PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile);
            TableDesc tableDesc = pd.getTableDesc();
            Configuration hconf = tableToConf.get(tableDesc.getTableName());
            Deserializer partDeserializer = pd.getDeserializer(hconf);
            StructObjectInspector partRawRowObjectInspector;
            boolean isAcid = AcidUtils.isTablePropertyTransactional(tableDesc.getProperties());
            if (Utilities.isSchemaEvolutionEnabled(hconf, isAcid) && Utilities.isInputFileFormatSelfDescribing(pd)) {
                Deserializer tblDeserializer = tableDesc.getDeserializer(hconf);
                partRawRowObjectInspector = (StructObjectInspector) tblDeserializer.getObjectInspector();
            } else {
                partRawRowObjectInspector = (StructObjectInspector) partDeserializer.getObjectInspector();
            }
            StructObjectInspector tblRawRowObjectInspector = tableDescOI.get(tableDesc);
            if ((tblRawRowObjectInspector == null) || (identityConverterTableDesc.contains(tableDesc))) {
                Deserializer tblDeserializer = tableDesc.getDeserializer(hconf);
                tblRawRowObjectInspector = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partRawRowObjectInspector, tblDeserializer.getObjectInspector(), oiSettableProperties);
                if (identityConverterTableDesc.contains(tableDesc)) {
                    if (!partRawRowObjectInspector.equals(tblRawRowObjectInspector)) {
                        identityConverterTableDesc.remove(tableDesc);
                    }
                } else if (partRawRowObjectInspector.equals(tblRawRowObjectInspector)) {
                    identityConverterTableDesc.add(tableDesc);
                }
                tableDescOI.put(tableDesc, tblRawRowObjectInspector);
            }
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
    return tableDescOI;
}

Also used : Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HashSet(java.util.HashSet)

Example 23 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class JdbcSerDe method initialize.

/*
   * This method gets called multiple times by Hive. On some invocations, the properties will be empty.
   * We need to detect when the properties are not empty to initialise the class variables.
   *
   * @see org.apache.hadoop.hive.serde2.Deserializer#initialize(org.apache.hadoop.conf.Configuration, java.util.Properties)
   */
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    try {
        LOGGER.debug("Initializing the SerDe");
        // Hive cdh-4.3 does not provide the properties object on all calls
        if (tbl.containsKey(JdbcStorageConfig.DATABASE_TYPE.getPropertyName())) {
            Configuration tableConfig = JdbcStorageConfigManager.convertPropertiesToConfiguration(tbl);
            DatabaseAccessor dbAccessor = DatabaseAccessorFactory.getAccessor(tableConfig);
            columnNames = dbAccessor.getColumnNames(tableConfig);
            numColumns = columnNames.size();
            String[] hiveColumnNameArray = parseProperty(tbl.getProperty(serdeConstants.LIST_COLUMNS), ",");
            if (numColumns != hiveColumnNameArray.length) {
                throw new SerDeException("Expected " + numColumns + " columns. Table definition has " + hiveColumnNameArray.length + " columns");
            }
            List<String> hiveColumnNames = Arrays.asList(hiveColumnNameArray);
            hiveColumnTypeArray = parseProperty(tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES), ":");
            if (hiveColumnTypeArray.length == 0) {
                throw new SerDeException("Received an empty Hive column type definition");
            }
            List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>(numColumns);
            for (int i = 0; i < numColumns; i++) {
                fieldInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
            }
            objectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(hiveColumnNames, fieldInspectors);
            row = new ArrayList<String>(numColumns);
        }
    } catch (Exception e) {
        LOGGER.error("Caught exception while initializing the SqlSerDe", e);
        throw new SerDeException(e);
    }
}

Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) DatabaseAccessor(org.apache.hive.storage.jdbc.dao.DatabaseAccessor) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 24 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class HiveInputFormat method wrapForLlap.

public static InputFormat<WritableComparable, Writable> wrapForLlap(InputFormat<WritableComparable, Writable> inputFormat, Configuration conf, PartitionDesc part) throws HiveException {
    if (!HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ENABLED, LlapProxy.isDaemon())) {
        // LLAP not enabled, no-op.
        return inputFormat;
    }
    String ifName = inputFormat.getClass().getCanonicalName();
    boolean isSupported = inputFormat instanceof LlapWrappableInputFormatInterface;
    boolean isVectorized = Utilities.getUseVectorizedInputFileFormat(conf);
    if (!isVectorized) {
        // Pretend it's vectorized if the non-vector wrapped is enabled.
        isVectorized = HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_NONVECTOR_WRAPPER_ENABLED) && (Utilities.getPlanPath(conf) != null);
    }
    boolean isSerdeBased = false;
    if (isVectorized && !isSupported && HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ENCODE_ENABLED)) {
        // See if we can use re-encoding to read the format thru IO elevator.
        String formatList = HiveConf.getVar(conf, ConfVars.LLAP_IO_ENCODE_FORMATS);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Checking " + ifName + " against " + formatList);
        }
        String[] formats = StringUtils.getStrings(formatList);
        if (formats != null) {
            for (String format : formats) {
                // TODO: should we check isAssignableFrom?
                if (ifName.equals(format)) {
                    if (LOG.isInfoEnabled()) {
                        LOG.info("Using SerDe-based LLAP reader for " + ifName);
                    }
                    isSupported = isSerdeBased = true;
                    break;
                }
            }
        }
    }
    if (!isSupported || !isVectorized) {
        if (LOG.isInfoEnabled()) {
            LOG.info("Not using llap for " + ifName + ": supported = " + isSupported + ", vectorized = " + isVectorized);
        }
        return inputFormat;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Wrapping " + ifName);
    }
    @SuppressWarnings("unchecked") LlapIo<VectorizedRowBatch> llapIo = LlapProxy.getIo();
    if (llapIo == null) {
        if (LOG.isInfoEnabled()) {
            LOG.info("Not using LLAP IO because it is not initialized");
        }
        return inputFormat;
    }
    Deserializer serde = null;
    if (isSerdeBased) {
        if (part == null) {
            if (LOG.isInfoEnabled()) {
                LOG.info("Not using LLAP IO because there's no partition spec for SerDe-based IF");
            }
            return inputFormat;
        }
        VectorPartitionDesc vpart = part.getVectorPartitionDesc();
        if (vpart != null) {
            VectorMapOperatorReadType old = vpart.getVectorMapOperatorReadType();
            if (old != VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) {
                if (LOG.isInfoEnabled()) {
                    LOG.info("Resetting VectorMapOperatorReadType from " + old + " for partition " + part.getTableName() + " " + part.getPartSpec());
                }
                vpart.setVectorMapOperatorReadType(VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT);
            }
        }
        try {
            serde = part.getDeserializer(conf);
        } catch (Exception e) {
            throw new HiveException("Error creating SerDe for LLAP IO", e);
        }
    }
    InputFormat<?, ?> wrappedIf = llapIo.getInputFormat(inputFormat, serde);
    if (wrappedIf == null) {
        // We cannot wrap; the cause is logged inside.
        return inputFormat;
    }
    return castInputFormat(wrappedIf);
}

Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) VectorMapOperatorReadType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType)

Example 25 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class TestSerdeWithFieldComments method testFieldComments.

public void testFieldComments() throws MetaException, SerDeException {
    StructObjectInspector mockSOI = mock(StructObjectInspector.class);
    when(mockSOI.getCategory()).thenReturn(ObjectInspector.Category.STRUCT);
    List fieldRefs = new ArrayList<StructField>();
    // Add field with a comment...
    fieldRefs.add(mockedStructField("first", "type name 1", "this is a comment"));
    // ... and one without
    fieldRefs.add(mockedStructField("second", "type name 2", null));
    when(mockSOI.getAllStructFieldRefs()).thenReturn(fieldRefs);
    Deserializer mockDe = mock(Deserializer.class);
    when(mockDe.getObjectInspector()).thenReturn(mockSOI);
    List<FieldSchema> result = MetaStoreUtils.getFieldsFromDeserializer("testTable", mockDe);
    assertEquals(2, result.size());
    assertEquals("first", result.get(0).getName());
    assertEquals("this is a comment", result.get(0).getComment());
    assertEquals("second", result.get(1).getName());
    assertEquals("from deserializer", result.get(1).getComment());
}

Also used : FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

Deserializer (org.apache.hadoop.hive.serde2.Deserializer)19 ArrayList (java.util.ArrayList)14 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)14 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)13 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)10 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)10 IOException (java.io.IOException)7 Properties (java.util.Properties)7 Path (org.apache.hadoop.fs.Path)6 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)6 Configuration (org.apache.hadoop.conf.Configuration)5 HashMap (java.util.HashMap)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)4 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)4 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)4 List (java.util.List)3 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)3 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)3 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)3