Search in sources :

Example 26 with LazySimpleSerDe

use of org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe in project hive by apache.

the class TestCrossMapEqualComparer method serializeAndDeserialize.

Object serializeAndDeserialize(StringTextMapHolder o1, StructObjectInspector oi1, LazySimpleSerDe serde, LazySerDeParameters serdeParams) throws IOException, SerDeException {
    ByteStream.Output serializeStream = new ByteStream.Output();
    LazySimpleSerDe.serialize(serializeStream, o1, oi1, serdeParams.getSeparators(), 0, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams.getNeedsEscape());
    Text t = new Text(serializeStream.toByteArray());
    return serde.deserialize(t);
}
Also used : ByteStream(org.apache.hadoop.hive.serde2.ByteStream) Text(org.apache.hadoop.io.Text)

Example 27 with LazySimpleSerDe

use of org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe in project hive by apache.

the class TestCrossMapEqualComparer method testCompatibleType.

public void testCompatibleType() throws SerDeException, IOException {
    // empty maps
    TextStringMapHolder o1 = new TextStringMapHolder();
    StructObjectInspector oi1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(TextStringMapHolder.class, ObjectInspectorOptions.JAVA);
    LazySimpleSerDe serde = new LazySimpleSerDe();
    Configuration conf = new Configuration();
    Properties tbl = new Properties();
    tbl.setProperty(serdeConstants.LIST_COLUMNS, ObjectInspectorUtils.getFieldNames(oi1));
    tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1));
    LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName());
    SerDeUtils.initializeSerDe(serde, conf, tbl, null);
    ObjectInspector oi2 = serde.getObjectInspector();
    Object o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams);
    int rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new CrossMapEqualComparer());
    assertEquals(0, rc);
    // equal maps
    o1.mMap.put(new Text("42"), "The answer to Life, Universe And Everything");
    o1.mMap.put(new Text("1729"), "A taxi cab number");
    o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams);
    rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new CrossMapEqualComparer());
    assertEquals(0, rc);
    // unequal maps
    o1.mMap.put(new Text("1729"), "Hardy-Ramanujan Number");
    rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new CrossMapEqualComparer());
    assertFalse(0 == rc);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) Text(org.apache.hadoop.io.Text) Properties(java.util.Properties)

Example 28 with LazySimpleSerDe

use of org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe in project hive by apache.

the class TestCrossMapEqualComparer method testIncompatibleType.

public void testIncompatibleType() throws SerDeException, IOException {
    // empty maps
    StringTextMapHolder o1 = new StringTextMapHolder();
    StructObjectInspector oi1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringTextMapHolder.class, ObjectInspectorOptions.JAVA);
    LazySimpleSerDe serde = new LazySimpleSerDe();
    Configuration conf = new Configuration();
    Properties tbl = new Properties();
    tbl.setProperty(serdeConstants.LIST_COLUMNS, ObjectInspectorUtils.getFieldNames(oi1));
    tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1));
    LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName());
    SerDeUtils.initializeSerDe(serde, conf, tbl, null);
    ObjectInspector oi2 = serde.getObjectInspector();
    Object o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams);
    int rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new CrossMapEqualComparer());
    assertEquals(0, rc);
    // equal maps
    o1.mMap.put("42", new Text("The answer to Life, Universe And Everything"));
    o1.mMap.put("1729", new Text("A taxi cab number"));
    o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams);
    rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new CrossMapEqualComparer());
    assertEquals(0, rc);
    // unequal maps
    o1.mMap.put("1729", new Text("Hardy-Ramanujan Number"));
    rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new CrossMapEqualComparer());
    assertFalse(0 == rc);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) Text(org.apache.hadoop.io.Text) Properties(java.util.Properties)

Example 29 with LazySimpleSerDe

use of org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe in project hive by apache.

the class TestHCatRecordSerDe method testRW.

public void testRW() throws Exception {
    Configuration conf = new Configuration();
    for (Entry<Properties, HCatRecord> e : getData().entrySet()) {
        Properties tblProps = e.getKey();
        HCatRecord r = e.getValue();
        HCatRecordSerDe hrsd = new HCatRecordSerDe();
        SerDeUtils.initializeSerDe(hrsd, conf, tblProps, null);
        LOG.info("ORIG: {}", r);
        Writable s = hrsd.serialize(r, hrsd.getObjectInspector());
        LOG.info("ONE: {}", s);
        HCatRecord r2 = (HCatRecord) hrsd.deserialize(s);
        Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, r2));
        // If it went through correctly, then s is also a HCatRecord,
        // and also equal to the above, and a deepcopy, and this holds
        // through for multiple levels more of serialization as well.
        Writable s2 = hrsd.serialize(s, hrsd.getObjectInspector());
        LOG.info("TWO: {}", s2);
        Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s));
        Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s2));
        // serialize using another serde, and read out that object repr.
        LazySimpleSerDe testSD = new LazySimpleSerDe();
        SerDeUtils.initializeSerDe(testSD, conf, tblProps, null);
        Writable s3 = testSD.serialize(s, hrsd.getObjectInspector());
        LOG.info("THREE: {}", s3);
        Object o3 = testSD.deserialize(s3);
        Assert.assertFalse(r.getClass().equals(o3.getClass()));
        // then serialize again using hrsd, and compare results
        HCatRecord s4 = (HCatRecord) hrsd.serialize(o3, testSD.getObjectInspector());
        LOG.info("FOUR: {}", s4);
        // Test LazyHCatRecord init and read
        LazyHCatRecord s5 = new LazyHCatRecord(o3, testSD.getObjectInspector());
        LOG.info("FIVE: {}", s5);
        LazyHCatRecord s6 = new LazyHCatRecord(s4, hrsd.getObjectInspector());
        LOG.info("SIX: {}", s6);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) Writable(org.apache.hadoop.io.Writable) Properties(java.util.Properties)

Example 30 with LazySimpleSerDe

use of org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe in project hive by apache.

the class CreateTableDesc method toTable.

public Table toTable(HiveConf conf) throws HiveException {
    String databaseName = getDatabaseName();
    String tableName = getTableName();
    if (databaseName == null || tableName.contains(".")) {
        String[] names = Utilities.getDbTableName(tableName);
        databaseName = names[0];
        tableName = names[1];
    }
    Table tbl = new Table(databaseName, tableName);
    if (getTblProps() != null) {
        tbl.getTTable().getParameters().putAll(getTblProps());
    }
    if (getPartCols() != null) {
        tbl.setPartCols(getPartCols());
    }
    if (getNumBuckets() != -1) {
        tbl.setNumBuckets(getNumBuckets());
    }
    if (getStorageHandler() != null) {
        tbl.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, getStorageHandler());
    }
    HiveStorageHandler storageHandler = tbl.getStorageHandler();
    /*
     * We use LazySimpleSerDe by default.
     *
     * If the user didn't specify a SerDe, and any of the columns are not simple
     * types, we will have to use DynamicSerDe instead.
     */
    if (getSerName() == null) {
        if (storageHandler == null) {
            LOG.info("Default to LazySimpleSerDe for table " + tableName);
            tbl.setSerializationLib(LazySimpleSerDe.class.getName());
        } else {
            String serDeClassName = storageHandler.getSerDeClass().getName();
            LOG.info("Use StorageHandler-supplied " + serDeClassName + " for table " + tableName);
            tbl.setSerializationLib(serDeClassName);
        }
    } else {
        // let's validate that the serde exists
        DDLTask.validateSerDe(getSerName(), conf);
        tbl.setSerializationLib(getSerName());
    }
    if (getFieldDelim() != null) {
        tbl.setSerdeParam(serdeConstants.FIELD_DELIM, getFieldDelim());
        tbl.setSerdeParam(serdeConstants.SERIALIZATION_FORMAT, getFieldDelim());
    }
    if (getFieldEscape() != null) {
        tbl.setSerdeParam(serdeConstants.ESCAPE_CHAR, getFieldEscape());
    }
    if (getCollItemDelim() != null) {
        tbl.setSerdeParam(serdeConstants.COLLECTION_DELIM, getCollItemDelim());
    }
    if (getMapKeyDelim() != null) {
        tbl.setSerdeParam(serdeConstants.MAPKEY_DELIM, getMapKeyDelim());
    }
    if (getLineDelim() != null) {
        tbl.setSerdeParam(serdeConstants.LINE_DELIM, getLineDelim());
    }
    if (getNullFormat() != null) {
        tbl.setSerdeParam(serdeConstants.SERIALIZATION_NULL_FORMAT, getNullFormat());
    }
    if (getSerdeProps() != null) {
        Iterator<Map.Entry<String, String>> iter = getSerdeProps().entrySet().iterator();
        while (iter.hasNext()) {
            Map.Entry<String, String> m = iter.next();
            tbl.setSerdeParam(m.getKey(), m.getValue());
        }
    }
    if (getCols() != null) {
        tbl.setFields(getCols());
    }
    if (getBucketCols() != null) {
        tbl.setBucketCols(getBucketCols());
    }
    if (getSortCols() != null) {
        tbl.setSortCols(getSortCols());
    }
    if (getComment() != null) {
        tbl.setProperty("comment", getComment());
    }
    if (getLocation() != null) {
        tbl.setDataLocation(new Path(getLocation()));
    }
    if (getSkewedColNames() != null) {
        tbl.setSkewedColNames(getSkewedColNames());
    }
    if (getSkewedColValues() != null) {
        tbl.setSkewedColValues(getSkewedColValues());
    }
    tbl.getTTable().setTemporary(isTemporary());
    tbl.setStoredAsSubDirectories(isStoredAsSubDirectories());
    tbl.setInputFormatClass(getInputFormat());
    tbl.setOutputFormatClass(getOutputFormat());
    // Otherwise, load lazily via StorageHandler at query time.
    if (getInputFormat() != null && !getInputFormat().isEmpty()) {
        tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
    }
    if (getOutputFormat() != null && !getOutputFormat().isEmpty()) {
        tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
    }
    if (!Utilities.isDefaultNameNode(conf) && DDLTask.doesTableNeedLocation(tbl)) {
        // If location is specified - ensure that it is a full qualified name
        DDLTask.makeLocationQualified(tbl.getDbName(), tbl.getTTable().getSd(), tableName, conf);
    }
    if (isExternal()) {
        tbl.setProperty("EXTERNAL", "TRUE");
        tbl.setTableType(TableType.EXTERNAL_TABLE);
    }
    // 'n' columns where 'n' is the length of the bucketed columns.
    if ((tbl.getBucketCols() != null) && (tbl.getSortCols() != null)) {
        List<String> bucketCols = tbl.getBucketCols();
        List<Order> sortCols = tbl.getSortCols();
        if ((sortCols.size() > 0) && (sortCols.size() >= bucketCols.size())) {
            boolean found = true;
            Iterator<String> iterBucketCols = bucketCols.iterator();
            while (iterBucketCols.hasNext()) {
                String bucketCol = iterBucketCols.next();
                boolean colFound = false;
                for (int i = 0; i < bucketCols.size(); i++) {
                    if (bucketCol.equals(sortCols.get(i).getCol())) {
                        colFound = true;
                        break;
                    }
                }
                if (colFound == false) {
                    found = false;
                    break;
                }
            }
            if (found) {
                tbl.setProperty("SORTBUCKETCOLSPREFIX", "TRUE");
            }
        }
    }
    if (getLocation() == null && !this.isCTAS) {
        if (!tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
            StatsSetupConst.setBasicStatsStateForCreateTable(tbl.getTTable().getParameters(), StatsSetupConst.TRUE);
        }
    } else {
        StatsSetupConst.setBasicStatsStateForCreateTable(tbl.getTTable().getParameters(), StatsSetupConst.FALSE);
    }
    return tbl;
}
Also used : Path(org.apache.hadoop.fs.Path) Order(org.apache.hadoop.hive.metastore.api.Order) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) Table(org.apache.hadoop.hive.ql.metadata.Table) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) Map(java.util.Map)

Aggregations

Text (org.apache.hadoop.io.Text)23 Properties (java.util.Properties)18 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)16 Configuration (org.apache.hadoop.conf.Configuration)14 ByteStream (org.apache.hadoop.hive.serde2.ByteStream)7 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)7 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)6 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)5 IntWritable (org.apache.hadoop.io.IntWritable)5 Path (org.apache.hadoop.fs.Path)4 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)4 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Map (java.util.Map)3 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 Entry (java.util.Map.Entry)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2 Table (org.apache.hadoop.hive.ql.metadata.Table)2