Search in sources :

Example 6 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class HBaseUtils method deserializeStorageDescriptor.

static StorageDescriptor deserializeStorageDescriptor(byte[] serialized) throws InvalidProtocolBufferException {
    HbaseMetastoreProto.StorageDescriptor proto = HbaseMetastoreProto.StorageDescriptor.parseFrom(serialized);
    StorageDescriptor sd = new StorageDescriptor();
    sd.setCols(convertFieldSchemaListFromProto(proto.getColsList()));
    if (proto.hasInputFormat())
        sd.setInputFormat(proto.getInputFormat());
    if (proto.hasOutputFormat())
        sd.setOutputFormat(proto.getOutputFormat());
    sd.setCompressed(proto.getIsCompressed());
    sd.setNumBuckets(proto.getNumBuckets());
    if (proto.hasSerdeInfo()) {
        SerDeInfo serde = new SerDeInfo();
        serde.setName(proto.getSerdeInfo().hasName() ? proto.getSerdeInfo().getName() : null);
        serde.setSerializationLib(proto.getSerdeInfo().hasSerializationLib() ? proto.getSerdeInfo().getSerializationLib() : null);
        serde.setParameters(buildParameters(proto.getSerdeInfo().getParameters()));
        sd.setSerdeInfo(serde);
    }
    sd.setBucketCols(new ArrayList<>(proto.getBucketColsList()));
    List<Order> sortCols = new ArrayList<>();
    for (HbaseMetastoreProto.StorageDescriptor.Order protoOrder : proto.getSortColsList()) {
        sortCols.add(new Order(protoOrder.getColumnName(), protoOrder.getOrder()));
    }
    sd.setSortCols(sortCols);
    if (proto.hasSkewedInfo()) {
        SkewedInfo skewed = new SkewedInfo();
        skewed.setSkewedColNames(new ArrayList<>(proto.getSkewedInfo().getSkewedColNamesList()));
        for (HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueList innerList : proto.getSkewedInfo().getSkewedColValuesList()) {
            skewed.addToSkewedColValues(new ArrayList<>(innerList.getSkewedColValueList()));
        }
        Map<List<String>, String> colMaps = new HashMap<>();
        for (HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueLocationMap map : proto.getSkewedInfo().getSkewedColValueLocationMapsList()) {
            colMaps.put(new ArrayList<>(map.getKeyList()), map.getValue());
        }
        skewed.setSkewedColValueLocationMaps(colMaps);
        sd.setSkewedInfo(skewed);
    }
    if (proto.hasStoredAsSubDirectories()) {
        sd.setStoredAsSubDirectories(proto.getStoredAsSubDirectories());
    }
    return sd;
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) HashMap(java.util.HashMap) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) List(java.util.List) ArrayList(java.util.ArrayList)

Example 7 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class HBaseUtils method hashStorageDescriptor.

/**
   * Produce a hash for the storage descriptor
   * @param sd storage descriptor to hash
   * @param md message descriptor to use to generate the hash
   * @return the hash as a byte array
   */
static byte[] hashStorageDescriptor(StorageDescriptor sd, MessageDigest md) {
    // Note all maps and lists have to be absolutely sorted.  Otherwise we'll produce different
    // results for hashes based on the OS or JVM being used.
    md.reset();
    for (FieldSchema fs : sd.getCols()) {
        md.update(fs.getName().getBytes(ENCODING));
        md.update(fs.getType().getBytes(ENCODING));
        if (fs.getComment() != null)
            md.update(fs.getComment().getBytes(ENCODING));
    }
    if (sd.getInputFormat() != null) {
        md.update(sd.getInputFormat().getBytes(ENCODING));
    }
    if (sd.getOutputFormat() != null) {
        md.update(sd.getOutputFormat().getBytes(ENCODING));
    }
    md.update(sd.isCompressed() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING));
    md.update(Integer.toString(sd.getNumBuckets()).getBytes(ENCODING));
    if (sd.getSerdeInfo() != null) {
        SerDeInfo serde = sd.getSerdeInfo();
        if (serde.getName() != null) {
            md.update(serde.getName().getBytes(ENCODING));
        }
        if (serde.getSerializationLib() != null) {
            md.update(serde.getSerializationLib().getBytes(ENCODING));
        }
        if (serde.getParameters() != null) {
            SortedMap<String, String> params = new TreeMap<>(serde.getParameters());
            for (Map.Entry<String, String> param : params.entrySet()) {
                md.update(param.getKey().getBytes(ENCODING));
                md.update(param.getValue().getBytes(ENCODING));
            }
        }
    }
    if (sd.getBucketCols() != null) {
        SortedSet<String> bucketCols = new TreeSet<>(sd.getBucketCols());
        for (String bucket : bucketCols) md.update(bucket.getBytes(ENCODING));
    }
    if (sd.getSortCols() != null) {
        SortedSet<Order> orders = new TreeSet<>(sd.getSortCols());
        for (Order order : orders) {
            md.update(order.getCol().getBytes(ENCODING));
            md.update(Integer.toString(order.getOrder()).getBytes(ENCODING));
        }
    }
    if (sd.getSkewedInfo() != null) {
        SkewedInfo skewed = sd.getSkewedInfo();
        if (skewed.getSkewedColNames() != null) {
            SortedSet<String> colnames = new TreeSet<>(skewed.getSkewedColNames());
            for (String colname : colnames) md.update(colname.getBytes(ENCODING));
        }
        if (skewed.getSkewedColValues() != null) {
            SortedSet<String> sortedOuterList = new TreeSet<>();
            for (List<String> innerList : skewed.getSkewedColValues()) {
                SortedSet<String> sortedInnerList = new TreeSet<>(innerList);
                sortedOuterList.add(StringUtils.join(sortedInnerList, "."));
            }
            for (String colval : sortedOuterList) md.update(colval.getBytes(ENCODING));
        }
        if (skewed.getSkewedColValueLocationMaps() != null) {
            SortedMap<String, String> sortedMap = new TreeMap<>();
            for (Map.Entry<List<String>, String> smap : skewed.getSkewedColValueLocationMaps().entrySet()) {
                SortedSet<String> sortedKey = new TreeSet<>(smap.getKey());
                sortedMap.put(StringUtils.join(sortedKey, "."), smap.getValue());
            }
            for (Map.Entry<String, String> e : sortedMap.entrySet()) {
                md.update(e.getKey().getBytes(ENCODING));
                md.update(e.getValue().getBytes(ENCODING));
            }
        }
    }
    return md.digest();
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ByteString(com.google.protobuf.ByteString) TreeMap(java.util.TreeMap) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) TreeSet(java.util.TreeSet) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 8 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class TestSharedStorageDescriptor method changeOnUnset.

@Test
public void changeOnUnset() {
    StorageDescriptor sd = new StorageDescriptor();
    SkewedInfo skew = new SkewedInfo();
    sd.setSkewedInfo(skew);
    SharedStorageDescriptor ssd = new SharedStorageDescriptor();
    ssd.setShared(sd);
    ssd.unsetSkewedInfo();
    Assert.assertFalse(sd.getSkewedInfo() == ssd.getSkewedInfo());
}
Also used : SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Test(org.junit.Test)

Example 9 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class TestSharedStorageDescriptor method changeOnSkewed.

@Test
public void changeOnSkewed() {
    StorageDescriptor sd = new StorageDescriptor();
    SkewedInfo skew = new SkewedInfo();
    sd.setSkewedInfo(skew);
    SharedStorageDescriptor ssd = new SharedStorageDescriptor();
    ssd.setShared(sd);
    ssd.setSkewedInfo(new SkewedInfo());
    Assert.assertFalse(sd.getSkewedInfo() == ssd.getSkewedInfo());
}
Also used : SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Test(org.junit.Test)

Example 10 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class ObjectStore method convertToStorageDescriptor.

// MSD and SD should be same objects. Not sure how to make then same right now
// MSerdeInfo *& SerdeInfo should be same as well
private StorageDescriptor convertToStorageDescriptor(MStorageDescriptor msd, boolean noFS) throws MetaException {
    if (msd == null) {
        return null;
    }
    List<MFieldSchema> mFieldSchemas = msd.getCD() == null ? null : msd.getCD().getCols();
    StorageDescriptor sd = new StorageDescriptor(noFS ? null : convertToFieldSchemas(mFieldSchemas), msd.getLocation(), msd.getInputFormat(), msd.getOutputFormat(), msd.isCompressed(), msd.getNumBuckets(), convertToSerDeInfo(msd.getSerDeInfo()), convertList(msd.getBucketCols()), convertToOrders(msd.getSortCols()), convertMap(msd.getParameters()));
    SkewedInfo skewedInfo = new SkewedInfo(convertList(msd.getSkewedColNames()), convertToSkewedValues(msd.getSkewedColValues()), covertToSkewedMap(msd.getSkewedColValueLocationMaps()));
    sd.setSkewedInfo(skewedInfo);
    sd.setStoredAsSubDirectories(msd.isStoredAsSubDirectories());
    return sd;
}
Also used : SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) MFieldSchema(org.apache.hadoop.hive.metastore.model.MFieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) MStorageDescriptor(org.apache.hadoop.hive.metastore.model.MStorageDescriptor)

Aggregations

SkewedInfo (org.apache.hadoop.hive.metastore.api.SkewedInfo)16 ArrayList (java.util.ArrayList)12 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)12 List (java.util.List)10 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)9 HashMap (java.util.HashMap)8 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)7 Order (org.apache.hadoop.hive.metastore.api.Order)7 Path (org.apache.hadoop.fs.Path)4 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)4 Test (org.junit.Test)4 ByteString (com.google.protobuf.ByteString)3 IOException (java.io.IOException)3 Map (java.util.Map)3 TreeMap (java.util.TreeMap)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)3 LinkedHashMap (java.util.LinkedHashMap)2 LinkedList (java.util.LinkedList)2 SortedMap (java.util.SortedMap)2