Search in sources :

Example 21 with PartitionSpec

use of org.apache.hadoop.hive.metastore.api.PartitionSpec in project hive by apache.

the class MetadataJSONSerializer method deserializePartitionSpec.

@Override
public HCatPartitionSpec deserializePartitionSpec(List<String> hcatPartitionSpecStrings) throws HCatException {
    try {
        List<PartitionSpec> partitionSpecList = new ArrayList<PartitionSpec>();
        TDeserializer deserializer = new TDeserializer(new TJSONProtocol.Factory());
        for (String stringRep : hcatPartitionSpecStrings) {
            PartitionSpec partSpec = new PartitionSpec();
            deserializer.deserialize(partSpec, stringRep, "UTF-8");
            partitionSpecList.add(partSpec);
        }
        return new HCatPartitionSpec(null, PartitionSpecProxy.Factory.get(partitionSpecList));
    } catch (TException deserializationException) {
        throw new HCatException("Failed to deserialize!", deserializationException);
    }
}
Also used : TException(org.apache.thrift.TException) TJSONProtocol(org.apache.thrift.protocol.TJSONProtocol) TDeserializer(org.apache.thrift.TDeserializer) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) PartitionSpec(org.apache.hadoop.hive.metastore.api.PartitionSpec)

Example 22 with PartitionSpec

use of org.apache.hadoop.hive.metastore.api.PartitionSpec in project hive by apache.

the class TestAddPartitionsFromPartSpec method buildPartitionSpec.

private PartitionSpecProxy buildPartitionSpec(List<Partition> partitions, List<PartitionWithoutSD> partitionsWithoutSD) throws MetaException {
    List<PartitionSpec> partitionSpecs = new ArrayList<>();
    PartitionSpec partitionSpec = new PartitionSpec();
    partitionSpec.setDbName(DB_NAME);
    partitionSpec.setTableName(TABLE_NAME);
    PartitionListComposingSpec partitionListComposingSpec = new PartitionListComposingSpec();
    partitionListComposingSpec.setPartitions(partitions);
    partitionSpec.setPartitionList(partitionListComposingSpec);
    PartitionSpec partitionSpecSharedSD = new PartitionSpec();
    partitionSpecSharedSD.setDbName(DB_NAME);
    partitionSpecSharedSD.setTableName(TABLE_NAME);
    PartitionSpecWithSharedSD partitionSpecWithSharedSD = new PartitionSpecWithSharedSD();
    partitionSpecWithSharedSD.setPartitions(partitionsWithoutSD);
    partitionSpecWithSharedSD.setSd(buildSD(metaStore.getWarehouseRoot() + "/" + TABLE_NAME + "/sharedSDTest/"));
    partitionSpecSharedSD.setSharedSDPartitionSpec(partitionSpecWithSharedSD);
    partitionSpecs.add(partitionSpec);
    partitionSpecs.add(partitionSpecSharedSD);
    return PartitionSpecProxy.Factory.get(partitionSpecs);
}
Also used : ArrayList(java.util.ArrayList) PartitionListComposingSpec(org.apache.hadoop.hive.metastore.api.PartitionListComposingSpec) PartitionSpec(org.apache.hadoop.hive.metastore.api.PartitionSpec) PartitionSpecWithSharedSD(org.apache.hadoop.hive.metastore.api.PartitionSpecWithSharedSD)

Example 23 with PartitionSpec

use of org.apache.hadoop.hive.metastore.api.PartitionSpec in project hive by apache.

the class TestMetaStoreServerUtils method testGetPartitionspecsGroupedBySDOnePartitionInTable.

/**
 * Test getPartitionspecsGroupedByStorageDescriptor() for partitions with a single
 * partition which is located under table location.
 */
@Test
public void testGetPartitionspecsGroupedBySDOnePartitionInTable() throws MetaException {
    // Create database and table
    Table tbl = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").setLocation("/foo").build(null);
    Partition p1 = new PartitionBuilder().setDbName("DB_NAME").setTableName(TABLE_NAME).setLocation("/foo/bar").addCol("a", "int").addValue("val1").setInputFormat("foo").build(null);
    List<PartitionSpec> result = MetaStoreServerUtils.getPartitionspecsGroupedByStorageDescriptor(tbl, Collections.singleton(p1));
    assertThat(result.size(), is(1));
    PartitionSpec ps = result.get(0);
    assertThat(ps.getRootPath(), is(tbl.getSd().getLocation()));
    List<PartitionWithoutSD> partitions = ps.getSharedSDPartitionSpec().getPartitions();
    assertThat(partitions.size(), is(1));
    PartitionWithoutSD partition = partitions.get(0);
    assertThat(partition.getRelativePath(), is("/bar"));
    assertThat(partition.getValues(), is(Collections.singletonList("val1")));
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) PartitionBuilder(org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder) PartitionWithoutSD(org.apache.hadoop.hive.metastore.api.PartitionWithoutSD) TableBuilder(org.apache.hadoop.hive.metastore.client.builder.TableBuilder) PartitionSpec(org.apache.hadoop.hive.metastore.api.PartitionSpec) MetastoreUnitTest(org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest) Test(org.junit.Test)

Example 24 with PartitionSpec

use of org.apache.hadoop.hive.metastore.api.PartitionSpec in project hive by apache.

the class TestMetaStoreServerUtils method testGetPartitionspecsGroupedBySDNullSD.

/**
 * Test getPartitionspecsGroupedByStorageDescriptor() for partitions with null SDs.
 */
@Test
public void testGetPartitionspecsGroupedBySDNullSD() throws MetaException {
    // Create database and table
    Table tbl = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").setLocation("/foo").build(null);
    Partition p1 = new PartitionBuilder().setDbName("DB_NAME").setTableName(TABLE_NAME).addCol("a", "int").addValue("val1").setInputFormat("foo").build(null);
    // Set SD to null
    p1.unsetSd();
    assertThat(p1.getSd(), is((StorageDescriptor) null));
    List<PartitionSpec> result = MetaStoreServerUtils.getPartitionspecsGroupedByStorageDescriptor(tbl, Collections.singleton(p1));
    assertThat(result.size(), is(1));
    PartitionSpec ps = result.get(0);
    assertThat(ps.getRootPath(), is((String) null));
    List<PartitionWithoutSD> partitions = ps.getSharedSDPartitionSpec().getPartitions();
    assertThat(partitions.size(), is(1));
    PartitionWithoutSD partition = partitions.get(0);
    assertThat(partition.getRelativePath(), is((String) null));
    assertThat(partition.getValues(), is(Collections.singletonList("val1")));
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) PartitionBuilder(org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder) PartitionWithoutSD(org.apache.hadoop.hive.metastore.api.PartitionWithoutSD) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) TableBuilder(org.apache.hadoop.hive.metastore.client.builder.TableBuilder) PartitionSpec(org.apache.hadoop.hive.metastore.api.PartitionSpec) MetastoreUnitTest(org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest) Test(org.junit.Test)

Example 25 with PartitionSpec

use of org.apache.hadoop.hive.metastore.api.PartitionSpec in project hive by apache.

the class MetaStoreServerUtils method getPartitionspecsGroupedByStorageDescriptor.

/**
 * Coalesce list of partitions belonging to a table into a more compact PartitionSpec
 * representation.
 *
 * @param table Table thrift object
 * @param partitions List of partition objects
 * @return collection PartitionSpec objects which is a compressed representation of original
 * partition list.
 */
public static List<PartitionSpec> getPartitionspecsGroupedByStorageDescriptor(Table table, Collection<Partition> partitions) {
    final String tablePath = table.getSd().getLocation();
    ImmutableListMultimap<StorageDescriptorKey, Partition> partitionsWithinTableDirectory = Multimaps.index(partitions, input -> {
        // can be just grouped in PartitionSpec object
        if (input.getSd() == null) {
            return StorageDescriptorKey.UNSET_KEY;
        }
        // could have different skewed info like skewed location
        if (input.getSd().getSkewedInfo() != null && input.getSd().getSkewedInfo().getSkewedColNames() != null && !input.getSd().getSkewedInfo().getSkewedColNames().isEmpty()) {
            return new StorageDescriptorKey(input.getSd());
        }
        // this could lead to incorrect number of buckets
        if (input.getSd().getNumBuckets() != partitions.iterator().next().getSd().getNumBuckets()) {
            return new StorageDescriptorKey(input.getSd());
        }
        // within the table directory
        if (input.getSd().getLocation() != null && input.getSd().getLocation().startsWith(tablePath)) {
            return new StorageDescriptorKey(tablePath, input.getSd());
        }
        // if the location is not set partitions are grouped according to the rest of the SD fields
        return new StorageDescriptorKey(input.getSd());
    });
    List<PartitionSpec> partSpecs = new ArrayList<>();
    // Classify partitions based on shared SD properties.
    Map<StorageDescriptorKey, List<PartitionWithoutSD>> sdToPartList = new HashMap<>();
    // we don't expect partitions to exist outside directory in most cases
    List<Partition> partitionsOutsideTableDir = new ArrayList<>(0);
    for (StorageDescriptorKey key : partitionsWithinTableDirectory.keySet()) {
        boolean isUnsetKey = key.equals(StorageDescriptorKey.UNSET_KEY);
        // case III : when sd.location is set and it is located within table directory
        if (isUnsetKey || key.baseLocation == null || key.baseLocation.equals(tablePath)) {
            for (Partition partition : partitionsWithinTableDirectory.get(key)) {
                PartitionWithoutSD partitionWithoutSD = new PartitionWithoutSD();
                partitionWithoutSD.setValues(partition.getValues());
                partitionWithoutSD.setCreateTime(partition.getCreateTime());
                partitionWithoutSD.setLastAccessTime(partition.getLastAccessTime());
                partitionWithoutSD.setRelativePath((isUnsetKey || !partition.getSd().isSetLocation()) ? null : partition.getSd().getLocation().substring(tablePath.length()));
                partitionWithoutSD.setParameters(partition.getParameters());
                if (!sdToPartList.containsKey(key)) {
                    sdToPartList.put(key, new ArrayList<>());
                }
                sdToPartList.get(key).add(partitionWithoutSD);
            }
        } else {
            // Lump all partitions outside the tablePath into one PartSpec.
            // if non-standard partitions need not be deDuped create PartitionListComposingSpec
            // this will be used mostly for keeping backwards compatibility with  some HMS APIs which use
            // PartitionListComposingSpec for non-standard partitions located outside table
            partitionsOutsideTableDir.addAll(partitionsWithinTableDirectory.get(key));
        }
    }
    // create sharedSDPartSpec for all the groupings
    for (Map.Entry<StorageDescriptorKey, List<PartitionWithoutSD>> entry : sdToPartList.entrySet()) {
        partSpecs.add(getSharedSDPartSpec(table, entry.getKey(), entry.getValue()));
    }
    if (!partitionsOutsideTableDir.isEmpty()) {
        PartitionSpec partListSpec = new PartitionSpec();
        partListSpec.setCatName(table.getCatName());
        partListSpec.setDbName(table.getDbName());
        partListSpec.setTableName(table.getTableName());
        partListSpec.setPartitionList(new PartitionListComposingSpec(partitionsOutsideTableDir));
        partSpecs.add(partListSpec);
    }
    return partSpecs;
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) PartitionListComposingSpec(org.apache.hadoop.hive.metastore.api.PartitionListComposingSpec) PartitionSpec(org.apache.hadoop.hive.metastore.api.PartitionSpec) PartitionWithoutSD(org.apache.hadoop.hive.metastore.api.PartitionWithoutSD) MachineList(org.apache.hadoop.util.MachineList) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Aggregations

PartitionSpec (org.apache.hadoop.hive.metastore.api.PartitionSpec)27 Test (org.junit.Test)14 Partition (org.apache.hadoop.hive.metastore.api.Partition)13 PartitionWithoutSD (org.apache.hadoop.hive.metastore.api.PartitionWithoutSD)12 ArrayList (java.util.ArrayList)11 MetastoreCheckinTest (org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)10 PartitionSpecWithSharedSD (org.apache.hadoop.hive.metastore.api.PartitionSpecWithSharedSD)10 Table (org.apache.hadoop.hive.metastore.api.Table)9 PartitionListComposingSpec (org.apache.hadoop.hive.metastore.api.PartitionListComposingSpec)6 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)6 PartitionsByExprRequest (org.apache.hadoop.hive.metastore.api.PartitionsByExprRequest)5 List (java.util.List)4 MetastoreUnitTest (org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest)4 PartitionBuilder (org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder)4 TableBuilder (org.apache.hadoop.hive.metastore.client.builder.TableBuilder)4 PartitionSpecProxy (org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy)4 HashMap (java.util.HashMap)3 GetPartitionsRequest (org.apache.hadoop.hive.metastore.api.GetPartitionsRequest)3 GetPartitionsResponse (org.apache.hadoop.hive.metastore.api.GetPartitionsResponse)3 GetProjectionsSpec (org.apache.hadoop.hive.metastore.api.GetProjectionsSpec)3