Search in sources :

Example 16 with LazySimpleSerDe

use of org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe in project hive by apache.

the class TestRegexSerDe method testRegexSerDe.

/**
 * Test the LazySimpleSerDe class.
 */
public void testRegexSerDe() throws Throwable {
    try {
        // Create the SerDe
        AbstractSerDe serDe = createSerDe("host,identity,user,time,request,status,size,referer,agent", "string,string,string,string,string,string,string,string,string", "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") " + "([0-9]*) ([0-9]*) ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\")", "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s");
        // Data
        Text t = new Text("127.0.0.1 - - [26/May/2009:00:00:00 +0000] " + "\"GET /someurl/?track=Blabla(Main) HTTP/1.1\" 200 5864 - " + "\"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) " + "AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.65 Safari/525.19\"");
        // Deserialize
        Object row = serDe.deserialize(t);
        ObjectInspector rowOI = serDe.getObjectInspector();
        System.out.println("Deserialized row: " + row);
        // Serialize
        Text serialized = (Text) serDe.serialize(row, rowOI);
        assertEquals(t, serialized);
        // Do some changes (optional)
        ObjectInspector standardWritableRowOI = ObjectInspectorUtils.getStandardObjectInspector(rowOI, ObjectInspectorCopyOption.WRITABLE);
        Object standardWritableRow = ObjectInspectorUtils.copyToStandardObject(row, rowOI, ObjectInspectorCopyOption.WRITABLE);
        // Serialize
        serialized = (Text) serDe.serialize(standardWritableRow, standardWritableRowOI);
        assertEquals(t, serialized);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Text(org.apache.hadoop.io.Text) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe)

Example 17 with LazySimpleSerDe

use of org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe in project hive by apache.

the class DDLTask method alterTableOrSinglePartition.

private List<Task<?>> alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Partition part) throws HiveException {
    EnvironmentContext environmentContext = alterTbl.getEnvironmentContext();
    if (environmentContext == null) {
        environmentContext = new EnvironmentContext();
        alterTbl.setEnvironmentContext(environmentContext);
    }
    // do not need update stats in alter table/partition operations
    if (environmentContext.getProperties() == null || environmentContext.getProperties().get(StatsSetupConst.DO_NOT_UPDATE_STATS) == null) {
        environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
    }
    if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAME) {
        tbl.setDbName(Utilities.getDatabaseName(alterTbl.getNewName()));
        tbl.setTableName(Utilities.getTableName(alterTbl.getNewName()));
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCOLS) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        String serializationLib = sd.getSerdeInfo().getSerializationLib();
        AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters());
        List<FieldSchema> oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore());
        List<FieldSchema> newCols = alterTbl.getNewCols();
        if (serializationLib.equals("org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) {
            console.printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe");
            sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
            sd.setCols(newCols);
        } else {
            // make sure the columns does not already exist
            Iterator<FieldSchema> iterNewCols = newCols.iterator();
            while (iterNewCols.hasNext()) {
                FieldSchema newCol = iterNewCols.next();
                String newColName = newCol.getName();
                Iterator<FieldSchema> iterOldCols = oldCols.iterator();
                while (iterOldCols.hasNext()) {
                    String oldColName = iterOldCols.next().getName();
                    if (oldColName.equalsIgnoreCase(newColName)) {
                        throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newColName);
                    }
                }
                oldCols.add(newCol);
            }
            sd.setCols(oldCols);
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAMECOLUMN) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        String serializationLib = sd.getSerdeInfo().getSerializationLib();
        AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters());
        List<FieldSchema> oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore());
        List<FieldSchema> newCols = new ArrayList<FieldSchema>();
        Iterator<FieldSchema> iterOldCols = oldCols.iterator();
        String oldName = alterTbl.getOldColName();
        String newName = alterTbl.getNewColName();
        String type = alterTbl.getNewColType();
        String comment = alterTbl.getNewColComment();
        boolean first = alterTbl.getFirst();
        String afterCol = alterTbl.getAfterCol();
        // if orc table, restrict reordering columns as it will break schema evolution
        boolean isOrcSchemaEvolution = sd.getInputFormat().equals(OrcInputFormat.class.getName()) && isSchemaEvolutionEnabled(tbl);
        if (isOrcSchemaEvolution && (first || (afterCol != null && !afterCol.trim().isEmpty()))) {
            throw new HiveException(ErrorMsg.CANNOT_REORDER_COLUMNS, alterTbl.getOldName());
        }
        FieldSchema column = null;
        boolean found = false;
        int position = -1;
        if (first) {
            position = 0;
        }
        int i = 1;
        while (iterOldCols.hasNext()) {
            FieldSchema col = iterOldCols.next();
            String oldColName = col.getName();
            if (oldColName.equalsIgnoreCase(newName) && !oldColName.equalsIgnoreCase(oldName)) {
                throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newName);
            } else if (oldColName.equalsIgnoreCase(oldName)) {
                col.setName(newName);
                if (type != null && !type.trim().equals("")) {
                    col.setType(type);
                }
                if (comment != null) {
                    col.setComment(comment);
                }
                found = true;
                if (first || (afterCol != null && !afterCol.trim().equals(""))) {
                    column = col;
                    continue;
                }
            }
            if (afterCol != null && !afterCol.trim().equals("") && oldColName.equalsIgnoreCase(afterCol)) {
                position = i;
            }
            i++;
            newCols.add(col);
        }
        // did not find the column
        if (!found) {
            throw new HiveException(ErrorMsg.INVALID_COLUMN, oldName);
        }
        // after column is not null, but we did not find it.
        if ((afterCol != null && !afterCol.trim().equals("")) && position < 0) {
            throw new HiveException(ErrorMsg.INVALID_COLUMN, afterCol);
        }
        if (position >= 0) {
            newCols.add(position, column);
        }
        sd.setCols(newCols);
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.REPLACECOLS) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        // change SerDe to LazySimpleSerDe if it is columnsetSerDe
        String serializationLib = sd.getSerdeInfo().getSerializationLib();
        if (serializationLib.equals("org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) {
            console.printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe");
            sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
        } else if (!serializationLib.equals(MetadataTypedColumnsetSerDe.class.getName()) && !serializationLib.equals(LazySimpleSerDe.class.getName()) && !serializationLib.equals(ColumnarSerDe.class.getName()) && !serializationLib.equals(DynamicSerDe.class.getName()) && !serializationLib.equals(ParquetHiveSerDe.class.getName()) && !serializationLib.equals(OrcSerde.class.getName())) {
            throw new HiveException(ErrorMsg.CANNOT_REPLACE_COLUMNS, alterTbl.getOldName());
        }
        final boolean isOrcSchemaEvolution = serializationLib.equals(OrcSerde.class.getName()) && isSchemaEvolutionEnabled(tbl);
        // adding columns and limited integer type promotion is supported for ORC schema evolution
        if (isOrcSchemaEvolution) {
            final List<FieldSchema> existingCols = sd.getCols();
            final List<FieldSchema> replaceCols = alterTbl.getNewCols();
            if (replaceCols.size() < existingCols.size()) {
                throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, alterTbl.getOldName());
            }
        }
        boolean partitioned = tbl.isPartitioned();
        boolean droppingColumns = alterTbl.getNewCols().size() < sd.getCols().size();
        if (ParquetHiveSerDe.isParquetTable(tbl) && isSchemaEvolutionEnabled(tbl) && !alterTbl.getIsCascade() && droppingColumns && partitioned) {
            LOG.warn("Cannot drop columns from a partitioned parquet table without the CASCADE option");
            throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, alterTbl.getOldName());
        }
        sd.setCols(alterTbl.getNewCols());
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS) {
        return alterTableAddProps(alterTbl, tbl, part, environmentContext);
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.DROPPROPS) {
        return alterTableDropProps(alterTbl, tbl, part, environmentContext);
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDEPROPS) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps());
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDE) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        String serdeName = alterTbl.getSerdeName();
        String oldSerdeName = sd.getSerdeInfo().getSerializationLib();
        // if orc table, restrict changing the serde as it can break schema evolution
        if (isSchemaEvolutionEnabled(tbl) && oldSerdeName.equalsIgnoreCase(OrcSerde.class.getName()) && !serdeName.equalsIgnoreCase(OrcSerde.class.getName())) {
            throw new HiveException(ErrorMsg.CANNOT_CHANGE_SERDE, OrcSerde.class.getSimpleName(), alterTbl.getOldName());
        }
        sd.getSerdeInfo().setSerializationLib(serdeName);
        if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) {
            sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps());
        }
        if (part != null) {
            // TODO: wtf? This doesn't do anything.
            part.getTPartition().getSd().setCols(part.getTPartition().getSd().getCols());
        } else {
            if (Table.shouldStoreFieldsInMetastore(conf, serdeName, tbl.getParameters()) && !Table.hasMetastoreBasedSchema(conf, oldSerdeName)) {
                // from old SerDe are too long to be stored in metastore, but there's nothing we can do.
                try {
                    Deserializer oldSerde = HiveMetaStoreUtils.getDeserializer(conf, tbl.getTTable(), false, oldSerdeName);
                    tbl.setFields(Hive.getFieldsFromDeserializer(tbl.getTableName(), oldSerde));
                } catch (MetaException ex) {
                    throw new HiveException(ex);
                }
            }
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDFILEFORMAT) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        // if orc table, restrict changing the file format as it can break schema evolution
        if (isSchemaEvolutionEnabled(tbl) && sd.getInputFormat().equals(OrcInputFormat.class.getName()) && !alterTbl.getInputFormat().equals(OrcInputFormat.class.getName())) {
            throw new HiveException(ErrorMsg.CANNOT_CHANGE_FILEFORMAT, "ORC", alterTbl.getOldName());
        }
        sd.setInputFormat(alterTbl.getInputFormat());
        sd.setOutputFormat(alterTbl.getOutputFormat());
        if (alterTbl.getSerdeName() != null) {
            sd.getSerdeInfo().setSerializationLib(alterTbl.getSerdeName());
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCLUSTERSORTCOLUMN) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        // validate sort columns and bucket columns
        List<String> columns = Utilities.getColumnNamesFromFieldSchema(tbl.getCols());
        if (!alterTbl.isTurnOffSorting()) {
            Utilities.validateColumnNames(columns, alterTbl.getBucketColumns());
        }
        if (alterTbl.getSortColumns() != null) {
            Utilities.validateColumnNames(columns, Utilities.getColumnNamesFromSortCols(alterTbl.getSortColumns()));
        }
        if (alterTbl.isTurnOffSorting()) {
            sd.setSortCols(new ArrayList<Order>());
        } else if (alterTbl.getNumberBuckets() == -1) {
            // -1 buckets means to turn off bucketing
            sd.setBucketCols(new ArrayList<String>());
            sd.setNumBuckets(-1);
            sd.setSortCols(new ArrayList<Order>());
        } else {
            sd.setBucketCols(alterTbl.getBucketColumns());
            sd.setNumBuckets(alterTbl.getNumberBuckets());
            sd.setSortCols(alterTbl.getSortColumns());
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        String newLocation = alterTbl.getNewLocation();
        try {
            URI locUri = new URI(newLocation);
            if (!new Path(locUri).isAbsolute()) {
                throw new HiveException(ErrorMsg.BAD_LOCATION_VALUE, newLocation);
            }
            sd.setLocation(newLocation);
        } catch (URISyntaxException e) {
            throw new HiveException(e);
        }
        environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSKEWEDBY) {
        // Validation's been done at compile time. no validation is needed here.
        List<String> skewedColNames = null;
        List<List<String>> skewedValues = null;
        if (alterTbl.isTurnOffSkewed()) {
            // Convert skewed table to non-skewed table.
            skewedColNames = new ArrayList<String>();
            skewedValues = new ArrayList<List<String>>();
        } else {
            skewedColNames = alterTbl.getSkewedColNames();
            skewedValues = alterTbl.getSkewedColValues();
        }
        if (null == tbl.getSkewedInfo()) {
            // Convert non-skewed table to skewed table.
            SkewedInfo skewedInfo = new SkewedInfo();
            skewedInfo.setSkewedColNames(skewedColNames);
            skewedInfo.setSkewedColValues(skewedValues);
            tbl.setSkewedInfo(skewedInfo);
        } else {
            tbl.setSkewedColNames(skewedColNames);
            tbl.setSkewedColValues(skewedValues);
        }
        tbl.setStoredAsSubDirectories(alterTbl.isStoredAsSubDirectories());
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERSKEWEDLOCATION) {
        // process location one-by-one
        Map<List<String>, String> locMaps = alterTbl.getSkewedLocations();
        Set<List<String>> keys = locMaps.keySet();
        for (List<String> key : keys) {
            String newLocation = locMaps.get(key);
            try {
                URI locUri = new URI(newLocation);
                if (part != null) {
                    List<String> slk = new ArrayList<String>(key);
                    part.setSkewedValueLocationMap(slk, locUri.toString());
                } else {
                    List<String> slk = new ArrayList<String>(key);
                    tbl.setSkewedValueLocationMap(slk, locUri.toString());
                }
            } catch (URISyntaxException e) {
                throw new HiveException(e);
            }
        }
        environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
    } else if (alterTbl.getOp() == AlterTableTypes.ALTERBUCKETNUM) {
        if (part != null) {
            if (part.getBucketCount() == alterTbl.getNumberBuckets()) {
                return null;
            }
            part.setBucketCount(alterTbl.getNumberBuckets());
        } else {
            if (tbl.getNumBuckets() == alterTbl.getNumberBuckets()) {
                return null;
            }
            tbl.setNumBuckets(alterTbl.getNumberBuckets());
        }
    } else {
        throw new HiveException(ErrorMsg.UNSUPPORTED_ALTER_TBL_OP, alterTbl.getOp().toString());
    }
    return null;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) Iterator(java.util.Iterator) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) ArrayList(java.util.ArrayList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) AbstractList(java.util.AbstractList) List(java.util.List) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Order(org.apache.hadoop.hive.metastore.api.Order) Path(org.apache.hadoop.fs.Path) DynamicSerDe(org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) MetadataTypedColumnsetSerDe(org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) Deserializer(org.apache.hadoop.hive.serde2.Deserializer)

Example 18 with LazySimpleSerDe

use of org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe in project hive by apache.

the class VectorDeserializeOrcWriter method create.

// TODO: if more writers are added, separate out an EncodingWriterFactory
public static EncodingWriter create(InputFormat<?, ?> sourceIf, Deserializer serDe, Map<Path, PartitionDesc> parts, Configuration daemonConf, Configuration jobConf, Path splitPath, StructObjectInspector sourceOi, List<Integer> sourceIncludes, boolean[] cacheIncludes, int allocSize) throws IOException {
    // Vector SerDe can be disabled both on client and server side.
    if (!HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !HiveConf.getBoolVar(jobConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !(sourceIf instanceof TextInputFormat) || !(serDe instanceof LazySimpleSerDe)) {
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    Path path = splitPath.getFileSystem(daemonConf).makeQualified(splitPath);
    PartitionDesc partDesc = HiveFileFormatUtils.getFromPathRecursively(parts, path, null);
    if (partDesc == null) {
        LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: no partition desc for " + path);
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    Properties tblProps = partDesc.getTableDesc().getProperties();
    if ("true".equalsIgnoreCase(tblProps.getProperty(serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST))) {
        LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter due to " + serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST);
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    for (StructField sf : sourceOi.getAllStructFieldRefs()) {
        Category c = sf.getFieldObjectInspector().getCategory();
        if (c != Category.PRIMITIVE) {
            LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: " + c + " is not supported");
            return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
        }
    }
    LlapIoImpl.LOG.info("Creating VertorDeserializeOrcWriter for " + path);
    return new VectorDeserializeOrcWriter(daemonConf, tblProps, sourceOi, sourceIncludes, cacheIncludes, allocSize);
}
Also used : DeserializerOrcWriter(org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.DeserializerOrcWriter) Path(org.apache.hadoop.fs.Path) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) Properties(java.util.Properties)

Example 19 with LazySimpleSerDe

use of org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe in project hive by apache.

the class TestSimpleMapEqualComparer method testIncompatibleType.

public void testIncompatibleType() throws SerDeException, IOException {
    // empty maps
    StringTextMapHolder o1 = new StringTextMapHolder();
    StructObjectInspector oi1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringTextMapHolder.class, ObjectInspectorOptions.JAVA);
    LazySimpleSerDe serde = new LazySimpleSerDe();
    Configuration conf = new Configuration();
    Properties tbl = new Properties();
    tbl.setProperty(serdeConstants.LIST_COLUMNS, ObjectInspectorUtils.getFieldNames(oi1));
    tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1));
    LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName());
    SerDeUtils.initializeSerDe(serde, conf, tbl, null);
    ObjectInspector oi2 = serde.getObjectInspector();
    Object o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams);
    int rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new SimpleMapEqualComparer());
    assertEquals(0, rc);
    // equal maps
    o1.mMap.put("42", new Text("The answer to Life, Universe And Everything"));
    o1.mMap.put("1729", new Text("A taxi cab number"));
    o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams);
    rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new SimpleMapEqualComparer());
    assertFalse(0 == rc);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) Text(org.apache.hadoop.io.Text) Properties(java.util.Properties)

Example 20 with LazySimpleSerDe

use of org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe in project hive by apache.

the class TestCrossMapEqualComparer method testCompatibleType.

public void testCompatibleType() throws SerDeException, IOException {
    // empty maps
    TextStringMapHolder o1 = new TextStringMapHolder();
    StructObjectInspector oi1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(TextStringMapHolder.class, ObjectInspectorOptions.JAVA);
    LazySimpleSerDe serde = new LazySimpleSerDe();
    Configuration conf = new Configuration();
    Properties tbl = new Properties();
    tbl.setProperty(serdeConstants.LIST_COLUMNS, ObjectInspectorUtils.getFieldNames(oi1));
    tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1));
    LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName());
    SerDeUtils.initializeSerDe(serde, conf, tbl, null);
    ObjectInspector oi2 = serde.getObjectInspector();
    Object o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams);
    int rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new CrossMapEqualComparer());
    assertEquals(0, rc);
    // equal maps
    o1.mMap.put(new Text("42"), "The answer to Life, Universe And Everything");
    o1.mMap.put(new Text("1729"), "A taxi cab number");
    o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams);
    rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new CrossMapEqualComparer());
    assertEquals(0, rc);
    // unequal maps
    o1.mMap.put(new Text("1729"), "Hardy-Ramanujan Number");
    rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new CrossMapEqualComparer());
    assertFalse(0 == rc);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) Text(org.apache.hadoop.io.Text) Properties(java.util.Properties)

Aggregations

Text (org.apache.hadoop.io.Text)24 Properties (java.util.Properties)17 Configuration (org.apache.hadoop.conf.Configuration)14 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)14 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)8 ByteStream (org.apache.hadoop.hive.serde2.ByteStream)7 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)6 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)5 IntWritable (org.apache.hadoop.io.IntWritable)5 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)4 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Path (org.apache.hadoop.fs.Path)3 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2