use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class HCatBaseOutputFormat method configureOutputStorageHandler.
/**
* Configure the output storage handler with allowing specification of missing dynamic partvals
* @param jobContext the job context
* @param dynamicPartVals
* @throws IOException
*/
@SuppressWarnings("unchecked")
static void configureOutputStorageHandler(JobContext jobContext, List<String> dynamicPartVals) throws IOException {
Configuration conf = jobContext.getConfiguration();
try {
OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO));
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(jobContext.getConfiguration(), jobInfo.getTableInfo().getStorerInfo());
Map<String, String> partitionValues = jobInfo.getPartitionValues();
String location = jobInfo.getLocation();
if (dynamicPartVals != null) {
// dynamic part vals specified
List<String> dynamicPartKeys = jobInfo.getDynamicPartitioningKeys();
if (dynamicPartVals.size() != dynamicPartKeys.size()) {
throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Unable to configure dynamic partitioning for storage handler, mismatch between" + " number of partition values obtained[" + dynamicPartVals.size() + "] and number of partition values required[" + dynamicPartKeys.size() + "]");
}
for (int i = 0; i < dynamicPartKeys.size(); i++) {
partitionValues.put(dynamicPartKeys.get(i), dynamicPartVals.get(i));
}
// // re-home location, now that we know the rest of the partvals
// Table table = jobInfo.getTableInfo().getTable();
//
// List<String> partitionCols = new ArrayList<String>();
// for(FieldSchema schema : table.getPartitionKeys()) {
// partitionCols.add(schema.getName());
// }
jobInfo.setPartitionValues(partitionValues);
}
HCatUtil.configureOutputStorageHandler(storageHandler, conf, jobInfo);
} catch (Exception e) {
if (e instanceof HCatException) {
throw (HCatException) e;
} else {
throw new HCatException(ErrorType.ERROR_INIT_STORAGE_HANDLER, e);
}
}
}
use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class InitializeInput method getInputJobInfo.
/**
* Returns the given InputJobInfo after populating with data queried from the metadata service.
*/
private static InputJobInfo getInputJobInfo(Configuration conf, InputJobInfo inputJobInfo, String locationFilter) throws Exception {
IMetaStoreClient client = null;
HiveConf hiveConf = null;
try {
if (conf != null) {
hiveConf = HCatUtil.getHiveConf(conf);
} else {
hiveConf = new HiveConf(HCatInputFormat.class);
}
client = HCatUtil.getHiveMetastoreClient(hiveConf);
Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(), inputJobInfo.getTableName());
List<PartInfo> partInfoList = new ArrayList<PartInfo>();
inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
if (table.getPartitionKeys().size() != 0) {
// Partitioned table
List<Partition> parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), inputJobInfo.getFilter(), (short) -1);
// Default to 100,000 partitions if hive.metastore.maxpartition is not defined
int maxPart = hiveConf.getInt("hcat.metastore.maxpartitions", 100000);
if (parts != null && parts.size() > maxPart) {
throw new HCatException(ErrorType.ERROR_EXCEED_MAXPART, "total number of partitions is " + parts.size());
}
// populate partition info
for (Partition ptn : parts) {
HCatSchema schema = HCatUtil.extractSchema(new org.apache.hadoop.hive.ql.metadata.Partition(table, ptn));
PartInfo partInfo = extractPartInfo(schema, ptn.getSd(), ptn.getParameters(), conf, inputJobInfo);
partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table, ptn));
partInfoList.add(partInfo);
}
} else {
// Non partitioned table
HCatSchema schema = HCatUtil.extractSchema(table);
PartInfo partInfo = extractPartInfo(schema, table.getTTable().getSd(), table.getParameters(), conf, inputJobInfo);
partInfo.setPartitionValues(new HashMap<String, String>());
partInfoList.add(partInfo);
}
inputJobInfo.setPartitions(partInfoList);
return inputJobInfo;
} finally {
HCatUtil.closeHiveClientQuietly(client);
}
}
use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class TestHCatDynamicPartitioned method runHCatDynamicPartitionedTable.
protected void runHCatDynamicPartitionedTable(boolean asSingleMapTask, String customDynamicPathPattern) throws Exception {
generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0);
runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, true, asSingleMapTask, customDynamicPathPattern);
runMRRead(NUM_RECORDS);
// Read with partition filter
runMRRead(4, "p1 = \"0\"");
runMRRead(8, "p1 = \"1\" or p1 = \"3\"");
runMRRead(4, "p1 = \"4\"");
// read from hive to test
String query = "select * from " + tableName;
int retCode = driver.run(query).getResponseCode();
if (retCode != 0) {
throw new Exception("Error " + retCode + " running query " + query);
}
ArrayList<String> res = new ArrayList<String>();
driver.getResults(res);
assertEquals(NUM_RECORDS, res.size());
// Test for duplicate publish
IOException exc = null;
try {
generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0);
Job job = runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, false, true, customDynamicPathPattern);
if (HCatUtil.isHadoop23()) {
Assert.assertTrue(job.isSuccessful() == false);
}
} catch (IOException e) {
exc = e;
}
if (!HCatUtil.isHadoop23()) {
assertTrue(exc != null);
assertTrue(exc instanceof HCatException);
assertTrue("Got exception of type [" + ((HCatException) exc).getErrorType().toString() + "] Expected ERROR_PUBLISHING_PARTITION or ERROR_MOVE_FAILED " + "or ERROR_DUPLICATE_PARTITION", (ErrorType.ERROR_PUBLISHING_PARTITION == ((HCatException) exc).getErrorType()) || (ErrorType.ERROR_MOVE_FAILED == ((HCatException) exc).getErrorType()) || (ErrorType.ERROR_DUPLICATE_PARTITION == ((HCatException) exc).getErrorType()));
}
query = "show partitions " + tableName;
retCode = driver.run(query).getResponseCode();
if (retCode != 0) {
throw new Exception("Error " + retCode + " running query " + query);
}
res = new ArrayList<String>();
driver.getResults(res);
assertEquals(NUM_PARTITIONS, res.size());
query = "select * from " + tableName;
retCode = driver.run(query).getResponseCode();
if (retCode != 0) {
throw new Exception("Error " + retCode + " running query " + query);
}
res = new ArrayList<String>();
driver.getResults(res);
assertEquals(NUM_RECORDS, res.size());
query = "select count(*) from " + tableName;
retCode = driver.run(query).getResponseCode();
if (retCode != 0) {
throw new Exception("Error " + retCode + " running query " + query);
}
res = new ArrayList<String>();
driver.getResults(res);
assertEquals(1, res.size());
assertEquals("20", res.get(0));
query = "select count(*) from " + tableName + " where p1=1";
retCode = driver.run(query).getResponseCode();
if (retCode != 0) {
throw new Exception("Error " + retCode + " running query " + query);
}
res = new ArrayList<String>();
driver.getResults(res);
assertEquals(1, res.size());
assertEquals("4", res.get(0));
}
use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class TestHCatPartitioned method columnOrderChangeTest.
// check behavior while change the order of columns
private void columnOrderChangeTest() throws Exception {
HCatSchema tableSchema = getTableSchema();
assertEquals(5, tableSchema.getFields().size());
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 10; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("co strvalue" + i);
objList.add("co str2value" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
Map<String, String> partitionMap = new HashMap<String, String>();
partitionMap.put("part1", "p1value8");
partitionMap.put("part0", "508");
Exception exc = null;
try {
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
} catch (IOException e) {
exc = e;
}
assertTrue(exc != null);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 10; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("co strvalue" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
if (isTableImmutable()) {
// Read should get 10 + 20 + 10 + 10 + 20 rows
runMRRead(70);
} else {
// +20 from the duplicate publish
runMRRead(90);
}
}
use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class TestHCatPartitioned method testHCatPartitionedTable.
@Test
public void testHCatPartitionedTable() throws Exception {
Map<String, String> partitionMap = new HashMap<String, String>();
partitionMap.put("part1", "p1value1");
partitionMap.put("part0", "501");
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
partitionMap.clear();
partitionMap.put("PART1", "p1value2");
partitionMap.put("PART0", "502");
runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
// Test for duplicate publish -- this will either fail on job creation time
// and throw an exception, or will fail at runtime, and fail the job.
IOException exc = null;
try {
Job j = runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
assertEquals(!isTableImmutable(), j.isSuccessful());
} catch (IOException e) {
exc = e;
assertTrue(exc instanceof HCatException);
assertTrue(ErrorType.ERROR_DUPLICATE_PARTITION.equals(((HCatException) exc).getErrorType()));
}
if (!isTableImmutable()) {
assertNull(exc);
}
// Test for publish with invalid partition key name
exc = null;
partitionMap.clear();
partitionMap.put("px1", "p1value2");
partitionMap.put("px0", "502");
try {
Job j = runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
assertFalse(j.isSuccessful());
} catch (IOException e) {
exc = e;
assertNotNull(exc);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType());
}
// Test for publish with missing partition key values
exc = null;
partitionMap.clear();
partitionMap.put("px", "512");
try {
runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
} catch (IOException e) {
exc = e;
}
assertNotNull(exc);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType());
// Test for null partition value map
exc = null;
try {
runMRCreate(null, partitionColumns, writeRecords, 20, false);
} catch (IOException e) {
exc = e;
}
assertTrue(exc == null);
// Read should get 10 + 20 rows if immutable, 50 (10+20+20) if mutable
if (isTableImmutable()) {
runMRRead(30);
} else {
runMRRead(50);
}
// Read with partition filter
runMRRead(10, "part1 = \"p1value1\"");
runMRRead(10, "part0 = \"501\"");
if (isTableImmutable()) {
runMRRead(20, "part1 = \"p1value2\"");
runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\"");
runMRRead(20, "part0 = \"502\"");
runMRRead(30, "part0 = \"501\" or part0 = \"502\"");
} else {
runMRRead(40, "part1 = \"p1value2\"");
runMRRead(50, "part1 = \"p1value1\" or part1 = \"p1value2\"");
runMRRead(40, "part0 = \"502\"");
runMRRead(50, "part0 = \"501\" or part0 = \"502\"");
}
tableSchemaTest();
columnOrderChangeTest();
hiveReadTest();
}
Aggregations