Search in sources :

Example 46 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class HCatBaseOutputFormat method configureOutputStorageHandler.

/**
 * Configure the output storage handler with allowing specification of missing dynamic partvals
 * @param jobContext the job context
 * @param dynamicPartVals
 * @throws IOException
 */
@SuppressWarnings("unchecked")
static void configureOutputStorageHandler(JobContext jobContext, List<String> dynamicPartVals) throws IOException {
    Configuration conf = jobContext.getConfiguration();
    try {
        OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO));
        HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(jobContext.getConfiguration(), jobInfo.getTableInfo().getStorerInfo());
        Map<String, String> partitionValues = jobInfo.getPartitionValues();
        String location = jobInfo.getLocation();
        if (dynamicPartVals != null) {
            // dynamic part vals specified
            List<String> dynamicPartKeys = jobInfo.getDynamicPartitioningKeys();
            if (dynamicPartVals.size() != dynamicPartKeys.size()) {
                throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Unable to configure dynamic partitioning for storage handler, mismatch between" + " number of partition values obtained[" + dynamicPartVals.size() + "] and number of partition values required[" + dynamicPartKeys.size() + "]");
            }
            for (int i = 0; i < dynamicPartKeys.size(); i++) {
                partitionValues.put(dynamicPartKeys.get(i), dynamicPartVals.get(i));
            }
            // // re-home location, now that we know the rest of the partvals
            // Table table = jobInfo.getTableInfo().getTable();
            // 
            // List<String> partitionCols = new ArrayList<String>();
            // for(FieldSchema schema : table.getPartitionKeys()) {
            // partitionCols.add(schema.getName());
            // }
            jobInfo.setPartitionValues(partitionValues);
        }
        HCatUtil.configureOutputStorageHandler(storageHandler, conf, jobInfo);
    } catch (Exception e) {
        if (e instanceof HCatException) {
            throw (HCatException) e;
        } else {
            throw new HCatException(ErrorType.ERROR_INIT_STORAGE_HANDLER, e);
        }
    }
}
Also used : HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) Configuration(org.apache.hadoop.conf.Configuration) HCatException(org.apache.hive.hcatalog.common.HCatException) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException)

Example 47 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class InitializeInput method getInputJobInfo.

/**
 * Returns the given InputJobInfo after populating with data queried from the metadata service.
 */
private static InputJobInfo getInputJobInfo(Configuration conf, InputJobInfo inputJobInfo, String locationFilter) throws Exception {
    IMetaStoreClient client = null;
    HiveConf hiveConf = null;
    try {
        if (conf != null) {
            hiveConf = HCatUtil.getHiveConf(conf);
        } else {
            hiveConf = new HiveConf(HCatInputFormat.class);
        }
        client = HCatUtil.getHiveMetastoreClient(hiveConf);
        Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(), inputJobInfo.getTableName());
        List<PartInfo> partInfoList = new ArrayList<PartInfo>();
        inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
        if (table.getPartitionKeys().size() != 0) {
            // Partitioned table
            List<Partition> parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), inputJobInfo.getFilter(), (short) -1);
            // Default to 100,000 partitions if hive.metastore.maxpartition is not defined
            int maxPart = hiveConf.getInt("hcat.metastore.maxpartitions", 100000);
            if (parts != null && parts.size() > maxPart) {
                throw new HCatException(ErrorType.ERROR_EXCEED_MAXPART, "total number of partitions is " + parts.size());
            }
            // populate partition info
            for (Partition ptn : parts) {
                HCatSchema schema = HCatUtil.extractSchema(new org.apache.hadoop.hive.ql.metadata.Partition(table, ptn));
                PartInfo partInfo = extractPartInfo(schema, ptn.getSd(), ptn.getParameters(), conf, inputJobInfo);
                partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table, ptn));
                partInfoList.add(partInfo);
            }
        } else {
            // Non partitioned table
            HCatSchema schema = HCatUtil.extractSchema(table);
            PartInfo partInfo = extractPartInfo(schema, table.getTTable().getSd(), table.getParameters(), conf, inputJobInfo);
            partInfo.setPartitionValues(new HashMap<String, String>());
            partInfoList.add(partInfo);
        }
        inputJobInfo.setPartitions(partInfoList);
        return inputJobInfo;
    } finally {
        HCatUtil.closeHiveClientQuietly(client);
    }
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Example 48 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class TestHCatDynamicPartitioned method runHCatDynamicPartitionedTable.

protected void runHCatDynamicPartitionedTable(boolean asSingleMapTask, String customDynamicPathPattern) throws Exception {
    generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0);
    runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, true, asSingleMapTask, customDynamicPathPattern);
    runMRRead(NUM_RECORDS);
    // Read with partition filter
    runMRRead(4, "p1 = \"0\"");
    runMRRead(8, "p1 = \"1\" or p1 = \"3\"");
    runMRRead(4, "p1 = \"4\"");
    // read from hive to test
    String query = "select * from " + tableName;
    int retCode = driver.run(query).getResponseCode();
    if (retCode != 0) {
        throw new Exception("Error " + retCode + " running query " + query);
    }
    ArrayList<String> res = new ArrayList<String>();
    driver.getResults(res);
    assertEquals(NUM_RECORDS, res.size());
    // Test for duplicate publish
    IOException exc = null;
    try {
        generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0);
        Job job = runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, false, true, customDynamicPathPattern);
        if (HCatUtil.isHadoop23()) {
            Assert.assertTrue(job.isSuccessful() == false);
        }
    } catch (IOException e) {
        exc = e;
    }
    if (!HCatUtil.isHadoop23()) {
        assertTrue(exc != null);
        assertTrue(exc instanceof HCatException);
        assertTrue("Got exception of type [" + ((HCatException) exc).getErrorType().toString() + "] Expected ERROR_PUBLISHING_PARTITION or ERROR_MOVE_FAILED " + "or ERROR_DUPLICATE_PARTITION", (ErrorType.ERROR_PUBLISHING_PARTITION == ((HCatException) exc).getErrorType()) || (ErrorType.ERROR_MOVE_FAILED == ((HCatException) exc).getErrorType()) || (ErrorType.ERROR_DUPLICATE_PARTITION == ((HCatException) exc).getErrorType()));
    }
    query = "show partitions " + tableName;
    retCode = driver.run(query).getResponseCode();
    if (retCode != 0) {
        throw new Exception("Error " + retCode + " running query " + query);
    }
    res = new ArrayList<String>();
    driver.getResults(res);
    assertEquals(NUM_PARTITIONS, res.size());
    query = "select * from " + tableName;
    retCode = driver.run(query).getResponseCode();
    if (retCode != 0) {
        throw new Exception("Error " + retCode + " running query " + query);
    }
    res = new ArrayList<String>();
    driver.getResults(res);
    assertEquals(NUM_RECORDS, res.size());
    query = "select count(*) from " + tableName;
    retCode = driver.run(query).getResponseCode();
    if (retCode != 0) {
        throw new Exception("Error " + retCode + " running query " + query);
    }
    res = new ArrayList<String>();
    driver.getResults(res);
    assertEquals(1, res.size());
    assertEquals("20", res.get(0));
    query = "select count(*) from " + tableName + " where p1=1";
    retCode = driver.run(query).getResponseCode();
    if (retCode != 0) {
        throw new Exception("Error " + retCode + " running query " + query);
    }
    res = new ArrayList<String>();
    driver.getResults(res);
    assertEquals(1, res.size());
    assertEquals("4", res.get(0));
}
Also used : ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException)

Example 49 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class TestHCatPartitioned method columnOrderChangeTest.

// check behavior while change the order of columns
private void columnOrderChangeTest() throws Exception {
    HCatSchema tableSchema = getTableSchema();
    assertEquals(5, tableSchema.getFields().size());
    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
    writeRecords = new ArrayList<HCatRecord>();
    for (int i = 0; i < 10; i++) {
        List<Object> objList = new ArrayList<Object>();
        objList.add(i);
        objList.add("co strvalue" + i);
        objList.add("co str2value" + i);
        writeRecords.add(new DefaultHCatRecord(objList));
    }
    Map<String, String> partitionMap = new HashMap<String, String>();
    partitionMap.put("part1", "p1value8");
    partitionMap.put("part0", "508");
    Exception exc = null;
    try {
        runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
    } catch (IOException e) {
        exc = e;
    }
    assertTrue(exc != null);
    assertTrue(exc instanceof HCatException);
    assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());
    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
    writeRecords = new ArrayList<HCatRecord>();
    for (int i = 0; i < 10; i++) {
        List<Object> objList = new ArrayList<Object>();
        objList.add(i);
        objList.add("co strvalue" + i);
        writeRecords.add(new DefaultHCatRecord(objList));
    }
    runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
    if (isTableImmutable()) {
        // Read should get 10 + 20 + 10 + 10 + 20 rows
        runMRRead(70);
    } else {
        // +20 from the duplicate publish
        runMRRead(90);
    }
}
Also used : HashMap(java.util.HashMap) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 50 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class TestHCatPartitioned method testHCatPartitionedTable.

@Test
public void testHCatPartitionedTable() throws Exception {
    Map<String, String> partitionMap = new HashMap<String, String>();
    partitionMap.put("part1", "p1value1");
    partitionMap.put("part0", "501");
    runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
    partitionMap.clear();
    partitionMap.put("PART1", "p1value2");
    partitionMap.put("PART0", "502");
    runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
    // Test for duplicate publish -- this will either fail on job creation time
    // and throw an exception, or will fail at runtime, and fail the job.
    IOException exc = null;
    try {
        Job j = runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
        assertEquals(!isTableImmutable(), j.isSuccessful());
    } catch (IOException e) {
        exc = e;
        assertTrue(exc instanceof HCatException);
        assertTrue(ErrorType.ERROR_DUPLICATE_PARTITION.equals(((HCatException) exc).getErrorType()));
    }
    if (!isTableImmutable()) {
        assertNull(exc);
    }
    // Test for publish with invalid partition key name
    exc = null;
    partitionMap.clear();
    partitionMap.put("px1", "p1value2");
    partitionMap.put("px0", "502");
    try {
        Job j = runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
        assertFalse(j.isSuccessful());
    } catch (IOException e) {
        exc = e;
        assertNotNull(exc);
        assertTrue(exc instanceof HCatException);
        assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType());
    }
    // Test for publish with missing partition key values
    exc = null;
    partitionMap.clear();
    partitionMap.put("px", "512");
    try {
        runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
    } catch (IOException e) {
        exc = e;
    }
    assertNotNull(exc);
    assertTrue(exc instanceof HCatException);
    assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType());
    // Test for null partition value map
    exc = null;
    try {
        runMRCreate(null, partitionColumns, writeRecords, 20, false);
    } catch (IOException e) {
        exc = e;
    }
    assertTrue(exc == null);
    // Read should get 10 + 20 rows if immutable, 50 (10+20+20) if mutable
    if (isTableImmutable()) {
        runMRRead(30);
    } else {
        runMRRead(50);
    }
    // Read with partition filter
    runMRRead(10, "part1 = \"p1value1\"");
    runMRRead(10, "part0 = \"501\"");
    if (isTableImmutable()) {
        runMRRead(20, "part1 = \"p1value2\"");
        runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\"");
        runMRRead(20, "part0 = \"502\"");
        runMRRead(30, "part0 = \"501\" or part0 = \"502\"");
    } else {
        runMRRead(40, "part1 = \"p1value2\"");
        runMRRead(50, "part1 = \"p1value1\" or part1 = \"p1value2\"");
        runMRRead(40, "part0 = \"502\"");
        runMRRead(50, "part0 = \"501\" or part0 = \"502\"");
    }
    tableSchemaTest();
    columnOrderChangeTest();
    hiveReadTest();
}
Also used : HashMap(java.util.HashMap) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Aggregations

HCatException (org.apache.hive.hcatalog.common.HCatException)52 IOException (java.io.IOException)23 ArrayList (java.util.ArrayList)20 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)19 TException (org.apache.thrift.TException)14 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)13 HashMap (java.util.HashMap)11 Test (org.junit.Test)11 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)10 Configuration (org.apache.hadoop.conf.Configuration)9 Path (org.apache.hadoop.fs.Path)9 Partition (org.apache.hadoop.hive.metastore.api.Partition)8 Table (org.apache.hadoop.hive.metastore.api.Table)8 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)7 Job (org.apache.hadoop.mapreduce.Job)6 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 HiveConf (org.apache.hadoop.hive.conf.HiveConf)4 HCatRecord (org.apache.hive.hcatalog.data.HCatRecord)4 Map (java.util.Map)3