use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class TestHCatInputFormatMethods method testGetPartitionAndDataColumns.
@Test
public void testGetPartitionAndDataColumns() throws Exception {
Configuration conf = new Configuration();
Job myJob = new Job(conf, "hcatTest");
HCatInputFormat.setInput(myJob, "default", "testHCIFMethods");
HCatSchema cols = HCatInputFormat.getDataColumns(myJob.getConfiguration());
Assert.assertTrue(cols.getFields() != null);
Assert.assertEquals(cols.getFields().size(), 2);
Assert.assertTrue(cols.getFields().get(0).getName().equals("a"));
Assert.assertTrue(cols.getFields().get(1).getName().equals("b"));
Assert.assertTrue(cols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING));
Assert.assertTrue(cols.getFields().get(1).getType().equals(HCatFieldSchema.Type.INT));
HCatSchema pcols = HCatInputFormat.getPartitionColumns(myJob.getConfiguration());
Assert.assertTrue(pcols.getFields() != null);
Assert.assertEquals(pcols.getFields().size(), 2);
Assert.assertTrue(pcols.getFields().get(0).getName().equals("x"));
Assert.assertTrue(pcols.getFields().get(1).getName().equals("y"));
Assert.assertTrue(pcols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING));
Assert.assertTrue(pcols.getFields().get(1).getType().equals(HCatFieldSchema.Type.STRING));
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class TestHCatPartitioned method columnOrderChangeTest.
//check behavior while change the order of columns
private void columnOrderChangeTest() throws Exception {
HCatSchema tableSchema = getTableSchema();
assertEquals(5, tableSchema.getFields().size());
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 10; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("co strvalue" + i);
objList.add("co str2value" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
Map<String, String> partitionMap = new HashMap<String, String>();
partitionMap.put("part1", "p1value8");
partitionMap.put("part0", "508");
Exception exc = null;
try {
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
} catch (IOException e) {
exc = e;
}
assertTrue(exc != null);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 10; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("co strvalue" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
if (isTableImmutable()) {
//Read should get 10 + 20 + 10 + 10 + 20 rows
runMRRead(70);
} else {
// +20 from the duplicate publish
runMRRead(90);
}
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class TestLazyHCatRecord method testGetWithName.
@Test
public void testGetWithName() throws Exception {
TypeInfo ti = getTypeInfo();
HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector(ti));
HCatSchema schema = HCatSchemaUtils.getHCatSchema(ti).get(0).getStructSubSchema();
Assert.assertEquals(INT_CONST, ((Integer) r.get("an_int", schema)).intValue());
Assert.assertEquals(LONG_CONST, ((Long) r.get("a_long", schema)).longValue());
Assert.assertEquals(DOUBLE_CONST, ((Double) r.get("a_double", schema)).doubleValue(), 0);
Assert.assertEquals(STRING_CONST, (String) r.get("a_string", schema));
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class TestHCatUtil method testGetTableSchemaWithPtnColsApi.
@Test
public void testGetTableSchemaWithPtnColsApi() throws IOException {
// Check the schema of a table with one field & no partition keys.
StorageDescriptor sd = new StorageDescriptor(Lists.newArrayList(new FieldSchema("username", serdeConstants.STRING_TYPE_NAME, null)), "location", "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", false, -1, new SerDeInfo(), new ArrayList<String>(), new ArrayList<Order>(), new HashMap<String, String>());
org.apache.hadoop.hive.metastore.api.Table apiTable = new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", 0, 0, 0, sd, new ArrayList<FieldSchema>(), new HashMap<String, String>(), "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name());
Table table = new Table(apiTable);
List<HCatFieldSchema> expectedHCatSchema = Lists.newArrayList(new HCatFieldSchema("username", HCatFieldSchema.Type.STRING, null));
Assert.assertEquals(new HCatSchema(expectedHCatSchema), HCatUtil.getTableSchemaWithPtnCols(table));
// Add a partition key & ensure its reflected in the schema.
List<FieldSchema> partitionKeys = Lists.newArrayList(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, null));
table.getTTable().setPartitionKeys(partitionKeys);
expectedHCatSchema.add(new HCatFieldSchema("dt", HCatFieldSchema.Type.STRING, null));
Assert.assertEquals(new HCatSchema(expectedHCatSchema), HCatUtil.getTableSchemaWithPtnCols(table));
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class HCatMapReduceTest method runMRCreate.
/**
* Run a local map reduce job to load data from in memory records to an HCatalog Table
* @param partitionValues
* @param partitionColumns
* @param records data to be written to HCatalog table
* @param writeCount
* @param assertWrite
* @param asSingleMapTask
* @return
* @throws Exception
*/
Job runMRCreate(Map<String, String> partitionValues, List<HCatFieldSchema> partitionColumns, List<HCatRecord> records, int writeCount, boolean assertWrite, boolean asSingleMapTask, String customDynamicPathPattern) throws Exception {
writeRecords = records;
MapCreate.writeCount = 0;
Configuration conf = new Configuration();
Job job = new Job(conf, "hcat mapreduce write test");
job.setJarByClass(this.getClass());
job.setMapperClass(HCatMapReduceTest.MapCreate.class);
// input/output settings
job.setInputFormatClass(TextInputFormat.class);
if (asSingleMapTask) {
// One input path would mean only one map task
Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
createInputFile(path, writeCount);
TextInputFormat.setInputPaths(job, path);
} else {
// Create two input paths so that two map tasks get triggered. There could be other ways
// to trigger two map tasks.
Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
createInputFile(path, writeCount / 2);
Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2");
createInputFile(path2, (writeCount - writeCount / 2));
TextInputFormat.setInputPaths(job, path, path2);
}
job.setOutputFormatClass(HCatOutputFormat.class);
OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues);
if (customDynamicPathPattern != null) {
job.getConfiguration().set(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN, customDynamicPathPattern);
}
HCatOutputFormat.setOutput(job, outputJobInfo);
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(DefaultHCatRecord.class);
job.setNumReduceTasks(0);
HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns));
boolean success = job.waitForCompletion(true);
// Ensure counters are set when data has actually been read.
if (partitionValues != null) {
assertTrue(job.getCounters().getGroup("FileSystemCounters").findCounter("FILE_BYTES_READ").getValue() > 0);
}
if (!HCatUtil.isHadoop23()) {
// Local mode outputcommitter hook is not invoked in Hadoop 1.x
if (success) {
new FileOutputCommitterContainer(job, null).commitJob(job);
} else {
new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED);
}
}
if (assertWrite) {
// we assert only if we expected to assert with this call.
Assert.assertEquals(writeCount, MapCreate.writeCount);
}
if (isTableExternal()) {
externalTableLocation = outputJobInfo.getTableInfo().getTableLocation();
}
return job;
}
Aggregations