use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class HBaseUtils method deserializeStorageDescriptor.
static StorageDescriptor deserializeStorageDescriptor(byte[] serialized) throws InvalidProtocolBufferException {
HbaseMetastoreProto.StorageDescriptor proto = HbaseMetastoreProto.StorageDescriptor.parseFrom(serialized);
StorageDescriptor sd = new StorageDescriptor();
sd.setCols(convertFieldSchemaListFromProto(proto.getColsList()));
if (proto.hasInputFormat())
sd.setInputFormat(proto.getInputFormat());
if (proto.hasOutputFormat())
sd.setOutputFormat(proto.getOutputFormat());
sd.setCompressed(proto.getIsCompressed());
sd.setNumBuckets(proto.getNumBuckets());
if (proto.hasSerdeInfo()) {
SerDeInfo serde = new SerDeInfo();
serde.setName(proto.getSerdeInfo().hasName() ? proto.getSerdeInfo().getName() : null);
serde.setSerializationLib(proto.getSerdeInfo().hasSerializationLib() ? proto.getSerdeInfo().getSerializationLib() : null);
serde.setParameters(buildParameters(proto.getSerdeInfo().getParameters()));
sd.setSerdeInfo(serde);
}
sd.setBucketCols(new ArrayList<>(proto.getBucketColsList()));
List<Order> sortCols = new ArrayList<>();
for (HbaseMetastoreProto.StorageDescriptor.Order protoOrder : proto.getSortColsList()) {
sortCols.add(new Order(protoOrder.getColumnName(), protoOrder.getOrder()));
}
sd.setSortCols(sortCols);
if (proto.hasSkewedInfo()) {
SkewedInfo skewed = new SkewedInfo();
skewed.setSkewedColNames(new ArrayList<>(proto.getSkewedInfo().getSkewedColNamesList()));
for (HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueList innerList : proto.getSkewedInfo().getSkewedColValuesList()) {
skewed.addToSkewedColValues(new ArrayList<>(innerList.getSkewedColValueList()));
}
Map<List<String>, String> colMaps = new HashMap<>();
for (HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueLocationMap map : proto.getSkewedInfo().getSkewedColValueLocationMapsList()) {
colMaps.put(new ArrayList<>(map.getKeyList()), map.getValue());
}
skewed.setSkewedColValueLocationMaps(colMaps);
sd.setSkewedInfo(skewed);
}
if (proto.hasStoredAsSubDirectories()) {
sd.setStoredAsSubDirectories(proto.getStoredAsSubDirectories());
}
return sd;
}
use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class HBaseUtils method hashStorageDescriptor.
/**
* Produce a hash for the storage descriptor
* @param sd storage descriptor to hash
* @param md message descriptor to use to generate the hash
* @return the hash as a byte array
*/
static byte[] hashStorageDescriptor(StorageDescriptor sd, MessageDigest md) {
// Note all maps and lists have to be absolutely sorted. Otherwise we'll produce different
// results for hashes based on the OS or JVM being used.
md.reset();
for (FieldSchema fs : sd.getCols()) {
md.update(fs.getName().getBytes(ENCODING));
md.update(fs.getType().getBytes(ENCODING));
if (fs.getComment() != null)
md.update(fs.getComment().getBytes(ENCODING));
}
if (sd.getInputFormat() != null) {
md.update(sd.getInputFormat().getBytes(ENCODING));
}
if (sd.getOutputFormat() != null) {
md.update(sd.getOutputFormat().getBytes(ENCODING));
}
md.update(sd.isCompressed() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING));
md.update(Integer.toString(sd.getNumBuckets()).getBytes(ENCODING));
if (sd.getSerdeInfo() != null) {
SerDeInfo serde = sd.getSerdeInfo();
if (serde.getName() != null) {
md.update(serde.getName().getBytes(ENCODING));
}
if (serde.getSerializationLib() != null) {
md.update(serde.getSerializationLib().getBytes(ENCODING));
}
if (serde.getParameters() != null) {
SortedMap<String, String> params = new TreeMap<>(serde.getParameters());
for (Map.Entry<String, String> param : params.entrySet()) {
md.update(param.getKey().getBytes(ENCODING));
md.update(param.getValue().getBytes(ENCODING));
}
}
}
if (sd.getBucketCols() != null) {
SortedSet<String> bucketCols = new TreeSet<>(sd.getBucketCols());
for (String bucket : bucketCols) md.update(bucket.getBytes(ENCODING));
}
if (sd.getSortCols() != null) {
SortedSet<Order> orders = new TreeSet<>(sd.getSortCols());
for (Order order : orders) {
md.update(order.getCol().getBytes(ENCODING));
md.update(Integer.toString(order.getOrder()).getBytes(ENCODING));
}
}
if (sd.getSkewedInfo() != null) {
SkewedInfo skewed = sd.getSkewedInfo();
if (skewed.getSkewedColNames() != null) {
SortedSet<String> colnames = new TreeSet<>(skewed.getSkewedColNames());
for (String colname : colnames) md.update(colname.getBytes(ENCODING));
}
if (skewed.getSkewedColValues() != null) {
SortedSet<String> sortedOuterList = new TreeSet<>();
for (List<String> innerList : skewed.getSkewedColValues()) {
SortedSet<String> sortedInnerList = new TreeSet<>(innerList);
sortedOuterList.add(StringUtils.join(sortedInnerList, "."));
}
for (String colval : sortedOuterList) md.update(colval.getBytes(ENCODING));
}
if (skewed.getSkewedColValueLocationMaps() != null) {
SortedMap<String, String> sortedMap = new TreeMap<>();
for (Map.Entry<List<String>, String> smap : skewed.getSkewedColValueLocationMaps().entrySet()) {
SortedSet<String> sortedKey = new TreeSet<>(smap.getKey());
sortedMap.put(StringUtils.join(sortedKey, "."), smap.getValue());
}
for (Map.Entry<String, String> e : sortedMap.entrySet()) {
md.update(e.getKey().getBytes(ENCODING));
md.update(e.getValue().getBytes(ENCODING));
}
}
}
return md.digest();
}
use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class TestSharedStorageDescriptor method changeOnUnset.
@Test
public void changeOnUnset() {
StorageDescriptor sd = new StorageDescriptor();
SkewedInfo skew = new SkewedInfo();
sd.setSkewedInfo(skew);
SharedStorageDescriptor ssd = new SharedStorageDescriptor();
ssd.setShared(sd);
ssd.unsetSkewedInfo();
Assert.assertFalse(sd.getSkewedInfo() == ssd.getSkewedInfo());
}
use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class TestSharedStorageDescriptor method changeOnSkewed.
@Test
public void changeOnSkewed() {
StorageDescriptor sd = new StorageDescriptor();
SkewedInfo skew = new SkewedInfo();
sd.setSkewedInfo(skew);
SharedStorageDescriptor ssd = new SharedStorageDescriptor();
ssd.setShared(sd);
ssd.setSkewedInfo(new SkewedInfo());
Assert.assertFalse(sd.getSkewedInfo() == ssd.getSkewedInfo());
}
use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class ObjectStore method convertToStorageDescriptor.
// MSD and SD should be same objects. Not sure how to make then same right now
// MSerdeInfo *& SerdeInfo should be same as well
private StorageDescriptor convertToStorageDescriptor(MStorageDescriptor msd, boolean noFS) throws MetaException {
if (msd == null) {
return null;
}
List<MFieldSchema> mFieldSchemas = msd.getCD() == null ? null : msd.getCD().getCols();
StorageDescriptor sd = new StorageDescriptor(noFS ? null : convertToFieldSchemas(mFieldSchemas), msd.getLocation(), msd.getInputFormat(), msd.getOutputFormat(), msd.isCompressed(), msd.getNumBuckets(), convertToSerDeInfo(msd.getSerDeInfo()), convertList(msd.getBucketCols()), convertToOrders(msd.getSortCols()), convertMap(msd.getParameters()));
SkewedInfo skewedInfo = new SkewedInfo(convertList(msd.getSkewedColNames()), convertToSkewedValues(msd.getSkewedColValues()), covertToSkewedMap(msd.getSkewedColValueLocationMaps()));
sd.setSkewedInfo(skewedInfo);
sd.setStoredAsSubDirectories(msd.isStoredAsSubDirectories());
return sd;
}
Aggregations