use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class TestCommands method testMetadataReplEximCommands.
@Test
public void testMetadataReplEximCommands() throws IOException, CommandNeedRetryException {
// repl metadata export, has repl.last.id and repl.scope=metadata
// import repl metadata dump, table metadata changed, allows override, has repl.last.id
int evid = 222;
String exportLocation = TEST_PATH + File.separator + "testMetadataReplExim";
Path tempPath = new Path(TEST_PATH, "testMetadataReplEximTmp");
String tempLocation = tempPath.toUri().getPath();
String dbName = "exim";
String tableName = "basicSrc";
String importedTableName = "basicDst";
List<HCatFieldSchema> cols = HCatSchemaUtils.getHCatSchema("b:string").getFields();
client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE);
client.createDatabase(HCatCreateDBDesc.create(dbName).ifNotExists(false).build());
HCatTable table = (new HCatTable(dbName, tableName)).cols(cols).fileFormat("textfile");
client.createTable(HCatCreateTableDesc.create(table).build());
HCatTable t = client.getTable(dbName, tableName);
assertNotNull(t);
String[] data = new String[] { "eleven", "twelve" };
HcatTestUtils.createTestDataFile(tempLocation, data);
CommandProcessorResponse ret = driver.run("LOAD DATA LOCAL INPATH '" + tempLocation + "' OVERWRITE INTO TABLE " + dbName + "." + tableName);
assertEquals(ret.getResponseCode() + ":" + ret.getErrorMessage(), null, ret.getException());
CommandProcessorResponse selectRet = driver.run("SELECT * from " + dbName + "." + tableName);
assertEquals(selectRet.getResponseCode() + ":" + selectRet.getErrorMessage(), null, selectRet.getException());
List<String> values = new ArrayList<String>();
driver.getResults(values);
assertEquals(2, values.size());
assertEquals(data[0], values.get(0));
assertEquals(data[1], values.get(1));
ExportCommand exportMdCmd = new ExportCommand(dbName, tableName, null, exportLocation, true, evid);
LOG.info("About to run :" + exportMdCmd.get().get(0));
CommandProcessorResponse ret2 = driver.run(exportMdCmd.get().get(0));
assertEquals(ret2.getResponseCode() + ":" + ret2.getErrorMessage(), null, ret2.getException());
List<String> exportPaths = exportMdCmd.cleanupLocationsAfterEvent();
assertEquals(1, exportPaths.size());
String metadata = getMetadataContents(exportPaths.get(0));
LOG.info("Export returned the following _metadata contents:");
LOG.info(metadata);
assertTrue(metadata + "did not match \"repl.scope\"=\"metadata\"", metadata.matches(".*\"repl.scope\":\"metadata\".*"));
assertTrue(metadata + "has \"repl.last.id\"", metadata.matches(".*\"repl.last.id\":.*"));
ImportCommand importMdCmd = new ImportCommand(dbName, importedTableName, null, exportLocation, true, evid);
LOG.info("About to run :" + importMdCmd.get().get(0));
CommandProcessorResponse ret3 = driver.run(importMdCmd.get().get(0));
assertEquals(ret3.getResponseCode() + ":" + ret3.getErrorMessage(), null, ret3.getException());
CommandProcessorResponse selectRet2 = driver.run("SELECT * from " + dbName + "." + importedTableName);
assertEquals(selectRet2.getResponseCode() + ":" + selectRet2.getErrorMessage(), null, selectRet2.getException());
List<String> values2 = new ArrayList<String>();
driver.getResults(values2);
assertEquals(0, values2.size());
HCatTable importedTable = client.getTable(dbName, importedTableName);
assertNotNull(importedTable);
assertTrue(importedTable.getTblProps().containsKey("repl.last.id"));
}
use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class StoreNumbers method main.
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
args = new GenericOptionsParser(conf, args).getRemainingArgs();
String[] otherArgs = new String[2];
int j = 0;
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-libjars")) {
// generic options parser doesn't seem to work!
conf.set("tmpjars", args[i + 1]);
// skip it , the for loop will skip its value
i = i + 1;
} else {
otherArgs[j++] = args[i];
}
}
if (otherArgs.length != 2) {
usage();
}
String serverUri = otherArgs[0];
if (otherArgs[1] == null || (!otherArgs[1].equalsIgnoreCase("part") && !otherArgs[1].equalsIgnoreCase("nopart")) && !otherArgs[1].equalsIgnoreCase("nopart_pig")) {
usage();
}
boolean writeToPartitionedTable = (otherArgs[1].equalsIgnoreCase("part"));
boolean writeToNonPartPigTable = (otherArgs[1].equalsIgnoreCase("nopart_pig"));
String tableName = NUMBERS_TABLE_NAME;
String dbName = "default";
Map<String, String> outputPartitionKvps = new HashMap<String, String>();
String outputTableName = null;
conf.set(IS_PIG_NON_PART_TABLE, "false");
if (writeToPartitionedTable) {
outputTableName = NUMBERS_PARTITIONED_TABLE_NAME;
outputPartitionKvps.put("datestamp", "20100101");
} else {
if (writeToNonPartPigTable) {
conf.set(IS_PIG_NON_PART_TABLE, "true");
outputTableName = NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME;
} else {
outputTableName = NUMBERS_NON_PARTITIONED_TABLE_NAME;
}
// test with null or empty randomly
if (new Random().nextInt(2) == 0) {
outputPartitionKvps = null;
}
}
String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
if (principalID != null)
conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
Job job = new Job(conf, "storenumbers");
// initialize HCatInputFormat
HCatInputFormat.setInput(job, dbName, tableName);
// initialize HCatOutputFormat
HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, outputPartitionKvps));
// test with and without specifying schema randomly
HCatSchema s = HCatInputFormat.getTableSchema(job);
if (writeToNonPartPigTable) {
List<HCatFieldSchema> newHfsList = new ArrayList<HCatFieldSchema>();
// change smallint and tinyint to int
for (HCatFieldSchema hfs : s.getFields()) {
if (hfs.getTypeString().equals("smallint")) {
newHfsList.add(new HCatFieldSchema(hfs.getName(), HCatFieldSchema.Type.INT, hfs.getComment()));
} else if (hfs.getTypeString().equals("tinyint")) {
newHfsList.add(new HCatFieldSchema(hfs.getName(), HCatFieldSchema.Type.INT, hfs.getComment()));
} else {
newHfsList.add(hfs);
}
}
s = new HCatSchema(newHfsList);
}
HCatOutputFormat.setSchema(job, s);
job.setInputFormatClass(HCatInputFormat.class);
job.setOutputFormatClass(HCatOutputFormat.class);
job.setJarByClass(StoreNumbers.class);
job.setMapperClass(SumMapper.class);
job.setOutputKeyClass(IntWritable.class);
job.setNumReduceTasks(0);
job.setOutputValueClass(DefaultHCatRecord.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class WriteTextPartitioned method run.
public int run(String[] args) throws Exception {
Configuration conf = getConf();
args = new GenericOptionsParser(conf, args).getRemainingArgs();
String serverUri = args[0];
String inputTableName = args[1];
String outputTableName = args[2];
if (args.length > 3)
filter = args[3];
String dbName = null;
String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
if (principalID != null)
conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
Job job = new Job(conf, "WriteTextPartitioned");
HCatInputFormat.setInput(job, dbName, inputTableName, filter);
// initialize HCatOutputFormat
job.setInputFormatClass(HCatInputFormat.class);
job.setJarByClass(WriteTextPartitioned.class);
job.setMapperClass(Map.class);
job.setOutputKeyClass(WritableComparable.class);
job.setOutputValueClass(DefaultHCatRecord.class);
job.setNumReduceTasks(0);
java.util.Map<String, String> partitionVals = null;
if (filter != null) {
String[] s = filter.split("=");
String val = s[1].replace('"', ' ').trim();
partitionVals = new HashMap<String, String>(1);
partitionVals.put(s[0], val);
}
HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, partitionVals));
HCatSchema s = HCatInputFormat.getTableSchema(job);
// Build the schema for this table, which is slightly different than the
// schema for the input table
List<HCatFieldSchema> fss = new ArrayList<HCatFieldSchema>(3);
fss.add(s.get(0));
fss.add(s.get(1));
fss.add(s.get(3));
HCatOutputFormat.setSchema(job, new HCatSchema(fss));
job.setOutputFormatClass(HCatOutputFormat.class);
return (job.waitForCompletion(true) ? 0 : 1);
}
use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class TestPigHCatUtil method testGetBagSubSchema.
@Test
public void testGetBagSubSchema() throws Exception {
// Define the expected schema.
ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple").setDescription("The tuple in the bag").setType(DataType.TUPLE);
ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY);
bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
// Get the actual converted schema.
HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList(new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
HCatFieldSchema hCatFieldSchema = new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null);
ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema);
Assert.assertEquals(expected.toString(), actual.toString());
}
use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class PigHCatUtil method transformToBag.
private static DataBag transformToBag(List<?> list, HCatFieldSchema hfs) throws Exception {
if (list == null) {
return null;
}
HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0);
DataBag db = new DefaultDataBag();
for (Object o : list) {
Tuple tuple;
if (elementSubFieldSchema.getType() == Type.STRUCT) {
tuple = transformToTuple((List<?>) o, elementSubFieldSchema);
} else {
// bags always contain tuples
tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema));
}
db.add(tuple);
}
return db;
}
Aggregations