Search in sources :

Example 76 with Partition

use of org.apache.hadoop.hive.metastore.api.Partition in project incubator-gobblin by apache.

the class LocalHiveMetastoreTestUtils method createDummyPartition.

public org.apache.hadoop.hive.ql.metadata.Partition createDummyPartition(long createTime) {
    org.apache.hadoop.hive.ql.metadata.Partition partition = new org.apache.hadoop.hive.ql.metadata.Partition();
    Partition tPartition = new Partition();
    tPartition.setCreateTime((int) TimeUnit.SECONDS.convert(createTime, TimeUnit.MILLISECONDS));
    partition.setTPartition(tPartition);
    return partition;
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition)

Example 77 with Partition

use of org.apache.hadoop.hive.metastore.api.Partition in project incubator-gobblin by apache.

the class HiveRetentionTest method testTimeBasedHiveRetention.

private void testTimeBasedHiveRetention(String purgedDbName, String purgedTableName, String configFileName, boolean isReplacementTest) throws Exception {
    try {
        DateTimeUtils.setCurrentMillisProvider(new FixedThreadLocalMillisProvider(FORMATTER.parseDateTime("2016-01-10-00").getMillis()));
        // Setup db, table to purge. Creating 4 partitions. 2 will be deleted and 2 will be retained
        String purgedTableSdLoc = new Path(testTempPath, purgedDbName + purgedTableName).toString();
        this.hiveMetastoreTestUtils.dropDatabaseIfExists(purgedDbName);
        final Table purgedTbl = this.hiveMetastoreTestUtils.createTestAvroTable(purgedDbName, purgedTableName, purgedTableSdLoc, ImmutableList.of("datepartition"), false);
        // Setup db, table and partitions to act as replacement partitions source
        String replacementSourceTableSdLoc = new Path(testTempPath, purgedDbName + purgedTableName + "_source").toString();
        String replacementDbName = purgedDbName + "_source";
        String replacementTableName = purgedTableName + "_source";
        this.hiveMetastoreTestUtils.dropDatabaseIfExists(replacementDbName);
        final Table replacementTbl = this.hiveMetastoreTestUtils.createTestAvroTable(replacementDbName, replacementTableName, replacementSourceTableSdLoc, ImmutableList.of("datepartition"), false);
        String deleted1 = "2016-01-01-00";
        String deleted2 = "2016-01-02-02";
        String retained1 = "2016-01-03-04";
        String retained2 = "2016-01-07-06";
        // Create partitions in table being purged
        Partition pDeleted1 = this.hiveMetastoreTestUtils.addTestPartition(purgedTbl, ImmutableList.of(deleted1), (int) System.currentTimeMillis());
        Partition pDeleted2 = this.hiveMetastoreTestUtils.addTestPartition(purgedTbl, ImmutableList.of(deleted2), (int) System.currentTimeMillis());
        Partition pRetained1 = this.hiveMetastoreTestUtils.addTestPartition(purgedTbl, ImmutableList.of(retained1), (int) System.currentTimeMillis());
        Partition pRetained2 = this.hiveMetastoreTestUtils.addTestPartition(purgedTbl, ImmutableList.of(retained2), (int) System.currentTimeMillis());
        this.fs.mkdirs(new Path(pDeleted1.getSd().getLocation()));
        this.fs.mkdirs(new Path(pDeleted2.getSd().getLocation()));
        this.fs.mkdirs(new Path(pRetained1.getSd().getLocation()));
        this.fs.mkdirs(new Path(pRetained2.getSd().getLocation()));
        // Create partitions in table that is replacement source
        Partition rReplaced1 = this.hiveMetastoreTestUtils.addTestPartition(replacementTbl, ImmutableList.of(deleted1), (int) System.currentTimeMillis());
        Partition rReplaced2 = this.hiveMetastoreTestUtils.addTestPartition(replacementTbl, ImmutableList.of(deleted2), (int) System.currentTimeMillis());
        Partition rUntouched1 = this.hiveMetastoreTestUtils.addTestPartition(replacementTbl, ImmutableList.of(retained1), (int) System.currentTimeMillis());
        Partition rUntouched2 = this.hiveMetastoreTestUtils.addTestPartition(replacementTbl, ImmutableList.of(retained2), (int) System.currentTimeMillis());
        this.fs.mkdirs(new Path(rReplaced1.getSd().getLocation()));
        this.fs.mkdirs(new Path(rReplaced2.getSd().getLocation()));
        this.fs.mkdirs(new Path(rUntouched1.getSd().getLocation()));
        this.fs.mkdirs(new Path(rUntouched2.getSd().getLocation()));
        List<Partition> pPartitions = this.hiveMetastoreTestUtils.getLocalMetastoreClient().listPartitions(purgedDbName, purgedTableName, (short) 10);
        Assert.assertEquals(pPartitions.size(), 4);
        List<Partition> rPartitions = this.hiveMetastoreTestUtils.getLocalMetastoreClient().listPartitions(replacementDbName, replacementTableName, (short) 10);
        Assert.assertEquals(rPartitions.size(), 4);
        // Run retention
        RetentionTestHelper.clean(fs, PathUtils.combinePaths(RetentionIntegrationTest.TEST_PACKAGE_RESOURCE_NAME, "testHiveTimeBasedRetention", configFileName), Optional.of(PathUtils.combinePaths(RetentionIntegrationTest.TEST_PACKAGE_RESOURCE_NAME, "testHiveTimeBasedRetention", "jobProps.properties")), testTempPath);
        pPartitions = this.hiveMetastoreTestUtils.getLocalMetastoreClient().listPartitions(purgedDbName, purgedTableName, (short) 10);
        String[] expectedRetainedPartitions;
        if (isReplacementTest) {
            // If replacement test, 2 partitions must be replaced - hence total count must be 4
            Assert.assertEquals(pPartitions.size(), 4);
            expectedRetainedPartitions = new String[] { getQlPartition(purgedTbl, pRetained1).getName(), getQlPartition(purgedTbl, pRetained2).getName(), getQlPartition(purgedTbl, pDeleted1).getName(), getQlPartition(purgedTbl, pDeleted2).getName() };
        } else {
            // If not a replacement test, 2 partitions must be purged
            Assert.assertEquals(pPartitions.size(), 2);
            expectedRetainedPartitions = new String[] { getQlPartition(purgedTbl, pRetained1).getName(), getQlPartition(purgedTbl, pRetained2).getName() };
        }
        // Check if all available partitions are that which are expected
        assertThat(FluentIterable.from(pPartitions).transform(new Function<Partition, String>() {

            @Override
            public String apply(Partition input) {
                return getQlPartition(purgedTbl, input).getName();
            }
        }).toList(), containsInAnyOrder(expectedRetainedPartitions));
        // Check that replaced partitions are pointing to correct physical location
        if (isReplacementTest) {
            for (Partition partition : pPartitions) {
                if (getQlPartition(purgedTbl, partition).getName().equalsIgnoreCase(getQlPartition(purgedTbl, pDeleted1).getName())) {
                    Assert.assertEquals(partition.getSd().getLocation(), rReplaced1.getSd().getLocation(), "Replaced partition location not updated.");
                }
                if (getQlPartition(purgedTbl, partition).getName().equalsIgnoreCase(getQlPartition(purgedTbl, pDeleted2).getName())) {
                    Assert.assertEquals(partition.getSd().getLocation(), rReplaced2.getSd().getLocation(), "Replaced partition location not updated.");
                }
            }
        }
        // Irrespective of whether it is a replacement test, purged partition directories must be deleted
        Assert.assertTrue(this.fs.exists(new Path(pRetained1.getSd().getLocation())));
        Assert.assertTrue(this.fs.exists(new Path(pRetained2.getSd().getLocation())));
        Assert.assertFalse(this.fs.exists(new Path(pDeleted1.getSd().getLocation())));
        Assert.assertFalse(this.fs.exists(new Path(pDeleted2.getSd().getLocation())));
        // Replacement source partition directories must be left untouched
        Assert.assertTrue(this.fs.exists(new Path(rReplaced1.getSd().getLocation())));
        Assert.assertTrue(this.fs.exists(new Path(rReplaced2.getSd().getLocation())));
        Assert.assertTrue(this.fs.exists(new Path(rUntouched1.getSd().getLocation())));
        Assert.assertTrue(this.fs.exists(new Path(rUntouched2.getSd().getLocation())));
    } finally {
        DateTimeUtils.setCurrentMillisSystem();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) FixedThreadLocalMillisProvider(org.apache.gobblin.util.test.RetentionTestDataGenerator.FixedThreadLocalMillisProvider)

Example 78 with Partition

use of org.apache.hadoop.hive.metastore.api.Partition in project drill by axbaretto.

the class HiveScan method getSpecificScan.

@Override
public SubScan getSpecificScan(final int minorFragmentId) throws ExecutionSetupException {
    try {
        final List<LogicalInputSplit> splits = mappings.get(minorFragmentId);
        List<HivePartitionWrapper> parts = Lists.newArrayList();
        final List<List<String>> encodedInputSplits = Lists.newArrayList();
        final List<String> splitTypes = Lists.newArrayList();
        for (final LogicalInputSplit split : splits) {
            final Partition splitPartition = split.getPartition();
            if (splitPartition != null) {
                HiveTableWithColumnCache table = hiveReadEntry.getTable();
                parts.add(createPartitionWithSpecColumns(new HiveTableWithColumnCache(table, new ColumnListsCache(table)), splitPartition));
            }
            encodedInputSplits.add(split.serialize());
            splitTypes.add(split.getType());
        }
        if (parts.size() <= 0) {
            parts = null;
        }
        final HiveReadEntry subEntry = new HiveReadEntry(hiveReadEntry.getTableWrapper(), parts);
        return new HiveSubScan(getUserName(), encodedInputSplits, subEntry, splitTypes, columns, hiveStoragePlugin);
    } catch (IOException | ReflectiveOperationException e) {
        throw new ExecutionSetupException(e);
    }
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) Partition(org.apache.hadoop.hive.metastore.api.Partition) LogicalInputSplit(org.apache.drill.exec.store.hive.HiveMetadataProvider.LogicalInputSplit) IOException(java.io.IOException) HivePartitionWrapper(org.apache.drill.exec.store.hive.HiveTableWrapper.HivePartitionWrapper) ArrayList(java.util.ArrayList) List(java.util.List)

Example 79 with Partition

use of org.apache.hadoop.hive.metastore.api.Partition in project drill by apache.

the class HiveUtilities method getPartitionMetadata.

/**
 * Wrapper around {@code MetaStoreUtils#getPartitionMetadata(org.apache.hadoop.hive.metastore.api.Partition, Table)}
 * which also adds parameters from table to properties returned by that method.
 *
 * @param partition the source of partition level parameters
 * @param table     the source of table level parameters
 * @return properties
 */
public static Properties getPartitionMetadata(final HivePartition partition, final HiveTableWithColumnCache table) {
    restoreColumns(table, partition);
    try {
        Properties properties = new org.apache.hadoop.hive.ql.metadata.Partition(new org.apache.hadoop.hive.ql.metadata.Table(table), partition).getMetadataFromPartitionSchema();
        // SerDe expects properties from Table, but above call doesn't add Table properties.
        // Include Table properties in final list in order to not to break SerDes that depend on
        // Table properties. For example AvroSerDe gets the schema from properties (passed as second argument)
        table.getParameters().entrySet().stream().filter(e -> e.getKey() != null && e.getValue() != null).forEach(e -> properties.put(e.getKey(), e.getValue()));
        return properties;
    } catch (HiveException e) {
        throw new DrillRuntimeException(e);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExecErrorConstants(org.apache.drill.exec.work.ExecErrorConstants) UserException(org.apache.drill.common.exceptions.UserException) NullableTimeStampVector(org.apache.drill.exec.vector.NullableTimeStampVector) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) NullableVarCharVector(org.apache.drill.exec.vector.NullableVarCharVector) MapColumnMetadata(org.apache.drill.exec.record.metadata.MapColumnMetadata) HiveToRelDataTypeConverter(org.apache.drill.exec.planner.types.HiveToRelDataTypeConverter) NullableVarBinaryVector(org.apache.drill.exec.vector.NullableVarBinaryVector) BigDecimal(java.math.BigDecimal) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) Decimal9Holder(org.apache.drill.exec.expr.holders.Decimal9Holder) HiveDecimalUtils(org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils) Map(java.util.Map) InputFormat(org.apache.hadoop.mapred.InputFormat) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) PrimitiveColumnMetadata(org.apache.drill.exec.record.metadata.PrimitiveColumnMetadata) DrillBuf(io.netty.buffer.DrillBuf) NullableBigIntVector(org.apache.drill.exec.vector.NullableBigIntVector) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TupleSchema(org.apache.drill.exec.record.metadata.TupleSchema) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) RoundingMode(java.math.RoundingMode) NullableFloat4Vector(org.apache.drill.exec.vector.NullableFloat4Vector) Decimal18Holder(org.apache.drill.exec.expr.holders.Decimal18Holder) ValueVector(org.apache.drill.exec.vector.ValueVector) Decimal28SparseHolder(org.apache.drill.exec.expr.holders.Decimal28SparseHolder) Timestamp(java.sql.Timestamp) OptionSet(org.apache.drill.exec.server.options.OptionSet) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) Decimal38SparseHolder(org.apache.drill.exec.expr.holders.Decimal38SparseHolder) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) NullableDecimal9Vector(org.apache.drill.exec.vector.NullableDecimal9Vector) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TypeInfoUtils(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) MetadataUtils(org.apache.drill.exec.record.metadata.MetadataUtils) NullableIntVector(org.apache.drill.exec.vector.NullableIntVector) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) IOConstants(org.apache.hadoop.hive.ql.io.IOConstants) MaterializedField(org.apache.drill.exec.record.MaterializedField) Partition(org.apache.hadoop.hive.metastore.api.Partition) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) DataMode(org.apache.drill.common.types.TypeProtos.DataMode) NullableDateVector(org.apache.drill.exec.vector.NullableDateVector) NullableDecimal18Vector(org.apache.drill.exec.vector.NullableDecimal18Vector) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) NullableFloat8Vector(org.apache.drill.exec.vector.NullableFloat8Vector) NullableBitVector(org.apache.drill.exec.vector.NullableBitVector) RelDataType(org.apache.calcite.rel.type.RelDataType) Logger(org.slf4j.Logger) Properties(java.util.Properties) TypeInferenceUtils(org.apache.drill.exec.planner.sql.TypeInferenceUtils) Strings(org.apache.drill.shaded.guava.com.google.common.base.Strings) HiveUtils(org.apache.hadoop.hive.ql.metadata.HiveUtils) NullableVarDecimalVector(org.apache.drill.exec.vector.NullableVarDecimalVector) HiveConf(org.apache.hadoop.hive.conf.HiveConf) DateTime(org.joda.time.DateTime) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) Table(org.apache.hadoop.hive.metastore.api.Table) Date(java.sql.Date) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) JobConf(org.apache.hadoop.mapred.JobConf) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) META_TABLE_STORAGE(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DecimalUtility(org.apache.drill.exec.util.DecimalUtility) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) NullableDecimal28SparseVector(org.apache.drill.exec.vector.NullableDecimal28SparseVector) NullableDecimal38SparseVector(org.apache.drill.exec.vector.NullableDecimal38SparseVector) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) Table(org.apache.hadoop.hive.metastore.api.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Properties(java.util.Properties) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException)

Example 80 with Partition

use of org.apache.hadoop.hive.metastore.api.Partition in project drill by apache.

the class HiveScan method getSpecificScan.

@Override
public SubScan getSpecificScan(final int minorFragmentId) throws ExecutionSetupException {
    try {
        final List<LogicalInputSplit> splits = mappings.get(minorFragmentId);
        List<HivePartitionWrapper> parts = new ArrayList<>();
        final List<List<String>> encodedInputSplits = new ArrayList<>();
        final List<String> splitTypes = new ArrayList<>();
        for (final LogicalInputSplit split : splits) {
            final Partition splitPartition = split.getPartition();
            if (splitPartition != null) {
                HiveTableWithColumnCache table = hiveReadEntry.getTable();
                parts.add(createPartitionWithSpecColumns(new HiveTableWithColumnCache(table, new ColumnListsCache(table)), splitPartition));
            }
            encodedInputSplits.add(split.serialize());
            splitTypes.add(split.getType());
        }
        if (parts.size() <= 0) {
            parts = null;
        }
        final HiveReadEntry subEntry = new HiveReadEntry(hiveReadEntry.getTableWrapper(), parts);
        return new HiveSubScan(getUserName(), encodedInputSplits, subEntry, splitTypes, columns, hiveStoragePlugin, confProperties);
    } catch (IOException | ReflectiveOperationException e) {
        throw new ExecutionSetupException(e);
    }
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) Partition(org.apache.hadoop.hive.metastore.api.Partition) LogicalInputSplit(org.apache.drill.exec.store.hive.HiveMetadataProvider.LogicalInputSplit) ArrayList(java.util.ArrayList) IOException(java.io.IOException) HivePartitionWrapper(org.apache.drill.exec.store.hive.HiveTableWrapper.HivePartitionWrapper) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

Partition (org.apache.hadoop.hive.metastore.api.Partition)730 Test (org.junit.Test)430 Table (org.apache.hadoop.hive.metastore.api.Table)312 ArrayList (java.util.ArrayList)303 MetastoreCheckinTest (org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)254 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)131 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)115 List (java.util.List)109 Path (org.apache.hadoop.fs.Path)109 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)107 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)87 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)71 HashMap (java.util.HashMap)64 PartitionBuilder (org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder)63 TException (org.apache.thrift.TException)62 IOException (java.io.IOException)61 Database (org.apache.hadoop.hive.metastore.api.Database)55 PartitionSpecProxy (org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy)52 FileSystem (org.apache.hadoop.fs.FileSystem)40 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)40