Search in sources :

Example 86 with Partition

use of org.apache.hadoop.hive.metastore.api.Partition in project flink by apache.

the class HiveTableUtil method createHivePartition.

// --------------------------------------------------------------------------------------------
// Helper methods
// --------------------------------------------------------------------------------------------
/**
 * Creates a Hive partition instance.
 */
public static Partition createHivePartition(String dbName, String tableName, List<String> values, StorageDescriptor sd, Map<String, String> parameters) {
    Partition partition = new Partition();
    partition.setDbName(dbName);
    partition.setTableName(tableName);
    partition.setValues(values);
    partition.setParameters(parameters);
    partition.setSd(sd);
    int currentTime = (int) (System.currentTimeMillis() / 1000);
    partition.setCreateTime(currentTime);
    partition.setLastAccessTime(currentTime);
    return partition;
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) UniqueConstraint(org.apache.flink.table.api.constraints.UniqueConstraint)

Example 87 with Partition

use of org.apache.hadoop.hive.metastore.api.Partition in project flink by apache.

the class PartitionMonitorTest method commitPartitionWithGivenCreateTime.

private void commitPartitionWithGivenCreateTime(List<String> partitionValues, Integer createTime) {
    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation("/tmp/test");
    Partition partition = new Partition(partitionValues, "testDb", "testTable", createTime, createTime, sd, null);
    partition.setValues(partitionValues);
    testPartitionWithOffset.add(partition);
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 88 with Partition

use of org.apache.hadoop.hive.metastore.api.Partition in project flink by apache.

the class PartitionMonitorTest method preparePartitionMonitor.

private void preparePartitionMonitor() {
    List<List<String>> seenPartitionsSinceOffset = new ArrayList<>();
    JobConf jobConf = new JobConf();
    Configuration configuration = new Configuration();
    ObjectPath tablePath = new ObjectPath("testDb", "testTable");
    configuration.setString("streaming-source.consume-order", "create-time");
    HiveContinuousPartitionContext<Partition, Long> fetcherContext = new HiveContinuousPartitionContext<Partition, Long>() {

        @Override
        public HiveTablePartition toHiveTablePartition(Partition partition) {
            StorageDescriptor sd = partition.getSd();
            Map<String, String> partitionColValues = new HashMap<>();
            for (String partCol : partition.getValues()) {
                String[] arr = partCol.split("=");
                Asserts.check(arr.length == 2, "partition string should be key=value format");
                partitionColValues.put(arr[0], arr[1]);
            }
            return new HiveTablePartition(sd, partitionColValues, new Properties());
        }

        @Override
        public ObjectPath getTablePath() {
            return null;
        }

        @Override
        public TypeSerializer<Long> getTypeSerializer() {
            return null;
        }

        @Override
        public Long getConsumeStartOffset() {
            return null;
        }

        @Override
        public void open() throws Exception {
        }

        @Override
        public Optional<Partition> getPartition(List<String> partValues) throws Exception {
            return Optional.empty();
        }

        @Override
        public List<ComparablePartitionValue> getComparablePartitionValueList() throws Exception {
            return null;
        }

        @Override
        public void close() throws Exception {
        }
    };
    ContinuousPartitionFetcher<Partition, Long> continuousPartitionFetcher = new ContinuousPartitionFetcher<Partition, Long>() {

        private static final long serialVersionUID = 1L;

        @Override
        public List<Tuple2<Partition, Long>> fetchPartitions(Context<Partition, Long> context, Long previousOffset) throws Exception {
            return testPartitionWithOffset.stream().filter(p -> (long) p.getCreateTime() >= previousOffset).map(p -> Tuple2.of(p, (long) p.getCreateTime())).collect(Collectors.toList());
        }

        @Override
        public List<Partition> fetch(PartitionFetcher.Context<Partition> context) throws Exception {
            return null;
        }
    };
    partitionMonitor = new ContinuousHiveSplitEnumerator.PartitionMonitor<>(0L, seenPartitionsSinceOffset, tablePath, configuration, jobConf, continuousPartitionFetcher, fetcherContext);
}
Also used : Arrays(java.util.Arrays) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) Properties(java.util.Properties) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collection(java.util.Collection) Configuration(org.apache.flink.configuration.Configuration) Assert.assertTrue(org.junit.Assert.assertTrue) Asserts(org.apache.http.util.Asserts) Test(org.junit.Test) HashMap(java.util.HashMap) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Partition(org.apache.hadoop.hive.metastore.api.Partition) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) JobConf(org.apache.hadoop.mapred.JobConf) HiveContinuousPartitionContext(org.apache.flink.connectors.hive.read.HiveContinuousPartitionContext) List(java.util.List) ContinuousPartitionFetcher(org.apache.flink.connector.file.table.ContinuousPartitionFetcher) Map(java.util.Map) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) Optional(java.util.Optional) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Collections(java.util.Collections) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Properties(java.util.Properties) ContinuousPartitionFetcher(org.apache.flink.connector.file.table.ContinuousPartitionFetcher) HiveContinuousPartitionContext(org.apache.flink.connectors.hive.read.HiveContinuousPartitionContext) ArrayList(java.util.ArrayList) List(java.util.List) JobConf(org.apache.hadoop.mapred.JobConf) HiveContinuousPartitionContext(org.apache.flink.connectors.hive.read.HiveContinuousPartitionContext) Partition(org.apache.hadoop.hive.metastore.api.Partition) Tuple2(org.apache.flink.api.java.tuple.Tuple2)

Example 89 with Partition

use of org.apache.hadoop.hive.metastore.api.Partition in project flink by apache.

the class HiveDialectITCase method testAddDropPartitions.

@Test
public void testAddDropPartitions() throws Exception {
    tableEnv.executeSql("create table tbl (x int,y binary) partitioned by (dt date,country string)");
    tableEnv.executeSql("alter table tbl add partition (dt='2020-04-30',country='china') partition (dt='2020-04-30',country='us')");
    ObjectPath tablePath = new ObjectPath("default", "tbl");
    assertEquals(2, hiveCatalog.listPartitions(tablePath).size());
    String partLocation = warehouse + "/part3_location";
    tableEnv.executeSql(String.format("alter table tbl add partition (dt='2020-05-01',country='belgium') location '%s'", partLocation));
    Table hiveTable = hiveCatalog.getHiveTable(tablePath);
    CatalogPartitionSpec spec = new CatalogPartitionSpec(new LinkedHashMap<String, String>() {

        {
            put("dt", "2020-05-01");
            put("country", "belgium");
        }
    });
    Partition hivePartition = hiveCatalog.getHivePartition(hiveTable, spec);
    assertEquals(partLocation, locationPath(hivePartition.getSd().getLocation()));
    tableEnv.executeSql("alter table tbl drop partition (dt='2020-04-30',country='china'),partition (dt='2020-05-01',country='belgium')");
    assertEquals(1, hiveCatalog.listPartitions(tablePath).size());
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogTable(org.apache.flink.table.catalog.CatalogTable) SqlCreateHiveTable(org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) Table(org.apache.hadoop.hive.metastore.api.Table) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) Test(org.junit.Test)

Example 90 with Partition

use of org.apache.hadoop.hive.metastore.api.Partition in project flink by apache.

the class HiveDialectITCase method testAlterTable.

@Test
public void testAlterTable() throws Exception {
    tableEnv.executeSql("create table tbl (x int) tblproperties('k1'='v1')");
    tableEnv.executeSql("alter table tbl rename to tbl1");
    ObjectPath tablePath = new ObjectPath("default", "tbl1");
    // change properties
    tableEnv.executeSql("alter table `default`.tbl1 set tblproperties ('k2'='v2')");
    Table hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals("v1", hiveTable.getParameters().get("k1"));
    assertEquals("v2", hiveTable.getParameters().get("k2"));
    // change location
    String newLocation = warehouse + "/tbl1_new_location";
    tableEnv.executeSql(String.format("alter table default.tbl1 set location '%s'", newLocation));
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals(newLocation, locationPath(hiveTable.getSd().getLocation()));
    // change file format
    tableEnv.executeSql("alter table tbl1 set fileformat orc");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals(OrcSerde.class.getName(), hiveTable.getSd().getSerdeInfo().getSerializationLib());
    assertEquals(OrcInputFormat.class.getName(), hiveTable.getSd().getInputFormat());
    assertEquals(OrcOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat());
    // change serde
    tableEnv.executeSql(String.format("alter table tbl1 set serde '%s' with serdeproperties('%s'='%s')", LazyBinarySerDe.class.getName(), serdeConstants.FIELD_DELIM, "\u0001"));
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals(LazyBinarySerDe.class.getName(), hiveTable.getSd().getSerdeInfo().getSerializationLib());
    assertEquals("\u0001", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.FIELD_DELIM));
    // replace columns
    tableEnv.executeSql("alter table tbl1 replace columns (t tinyint,s smallint,i int,b bigint,f float,d double,num decimal," + "ts timestamp,dt date,str string,var varchar(10),ch char(123),bool boolean,bin binary)");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals(14, hiveTable.getSd().getColsSize());
    assertEquals("varchar(10)", hiveTable.getSd().getCols().get(10).getType());
    assertEquals("char(123)", hiveTable.getSd().getCols().get(11).getType());
    tableEnv.executeSql("alter table tbl1 replace columns (a array<array<int>>,s struct<f1:struct<f11:int,f12:binary>, f2:map<double,date>>," + "m map<char(5),map<timestamp,decimal(20,10)>>)");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals("array<array<int>>", hiveTable.getSd().getCols().get(0).getType());
    assertEquals("struct<f1:struct<f11:int,f12:binary>,f2:map<double,date>>", hiveTable.getSd().getCols().get(1).getType());
    assertEquals("map<char(5),map<timestamp,decimal(20,10)>>", hiveTable.getSd().getCols().get(2).getType());
    // add columns
    tableEnv.executeSql("alter table tbl1 add columns (x int,y int)");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals(5, hiveTable.getSd().getColsSize());
    // change column
    tableEnv.executeSql("alter table tbl1 change column x x1 string comment 'new x col'");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals(5, hiveTable.getSd().getColsSize());
    FieldSchema newField = hiveTable.getSd().getCols().get(3);
    assertEquals("x1", newField.getName());
    assertEquals("string", newField.getType());
    tableEnv.executeSql("alter table tbl1 change column y y int first");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    newField = hiveTable.getSd().getCols().get(0);
    assertEquals("y", newField.getName());
    assertEquals("int", newField.getType());
    tableEnv.executeSql("alter table tbl1 change column x1 x2 timestamp after y");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    newField = hiveTable.getSd().getCols().get(1);
    assertEquals("x2", newField.getName());
    assertEquals("timestamp", newField.getType());
    // add/replace columns cascade
    tableEnv.executeSql("create table tbl2 (x int) partitioned by (dt date,id bigint)");
    tableEnv.executeSql("alter table tbl2 add partition (dt='2020-01-23',id=1) partition (dt='2020-04-24',id=2)");
    CatalogPartitionSpec partitionSpec1 = new CatalogPartitionSpec(new LinkedHashMap<String, String>() {

        {
            put("dt", "2020-01-23");
            put("id", "1");
        }
    });
    CatalogPartitionSpec partitionSpec2 = new CatalogPartitionSpec(new LinkedHashMap<String, String>() {

        {
            put("dt", "2020-04-24");
            put("id", "2");
        }
    });
    tableEnv.executeSql("alter table tbl2 replace columns (ti tinyint,d decimal) cascade");
    ObjectPath tablePath2 = new ObjectPath("default", "tbl2");
    hiveTable = hiveCatalog.getHiveTable(tablePath2);
    Partition hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec1);
    assertEquals(2, hivePartition.getSd().getColsSize());
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec2);
    assertEquals(2, hivePartition.getSd().getColsSize());
    tableEnv.executeSql("alter table tbl2 add columns (ch char(5),vch varchar(9)) cascade");
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec1);
    assertEquals(4, hivePartition.getSd().getColsSize());
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec2);
    assertEquals(4, hivePartition.getSd().getColsSize());
    // change column cascade
    tableEnv.executeSql("alter table tbl2 change column ch ch char(10) cascade");
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec1);
    assertEquals("char(10)", hivePartition.getSd().getCols().get(2).getType());
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec2);
    assertEquals("char(10)", hivePartition.getSd().getCols().get(2).getType());
    tableEnv.executeSql("alter table tbl2 change column vch str string first cascade");
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec1);
    assertEquals("str", hivePartition.getSd().getCols().get(0).getName());
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec2);
    assertEquals("str", hivePartition.getSd().getCols().get(0).getName());
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogTable(org.apache.flink.table.catalog.CatalogTable) SqlCreateHiveTable(org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) Table(org.apache.hadoop.hive.metastore.api.Table) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) LazyBinarySerDe(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) Test(org.junit.Test)

Aggregations

Partition (org.apache.hadoop.hive.metastore.api.Partition)730 Test (org.junit.Test)430 Table (org.apache.hadoop.hive.metastore.api.Table)312 ArrayList (java.util.ArrayList)303 MetastoreCheckinTest (org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)254 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)131 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)115 List (java.util.List)109 Path (org.apache.hadoop.fs.Path)109 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)107 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)87 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)71 HashMap (java.util.HashMap)64 PartitionBuilder (org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder)63 TException (org.apache.thrift.TException)62 IOException (java.io.IOException)61 Database (org.apache.hadoop.hive.metastore.api.Database)55 PartitionSpecProxy (org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy)52 FileSystem (org.apache.hadoop.fs.FileSystem)40 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)40