use of org.apache.hadoop.util.GenericOptionsParser in project BD2017 by achintya-kumar.
the class ImportFromFileCustomized method main.
/**
* Main entry point.
*
* @param args The command line parameters.
* @throws Exception When running the job fails.
*/
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
String[] otherArgs = // co ImportFromFile-7-Args Give the command line arguments to the generic parser first to handle "-Dxyz" properties.
new GenericOptionsParser(conf, args).getRemainingArgs();
CommandLine cmd = parseArgs(otherArgs);
// check debug flag and other options
if (cmd.hasOption("d"))
conf.set("conf.debug", "true");
// get details
String table = cmd.getOptionValue("t");
String input = cmd.getOptionValue("i");
Job job = Job.getInstance(conf, "Import from file " + input + " into table " + // co ImportFromFile-8-JobDef Define the job with the required classes.
table);
job.setJarByClass(ImportFromFileCustomized.class);
job.setMapperClass(ImportMapper.class);
job.setOutputFormatClass(TableOutputFormat.class);
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, table);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Writable.class);
// co ImportFromFile-9-MapOnly This is a map only job, therefore tell the framework to bypass the reduce step.
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, new Path(input));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
use of org.apache.hadoop.util.GenericOptionsParser in project hive by apache.
the class TypeDataCheck method run.
public int run(String[] args) {
try {
args = new GenericOptionsParser(conf, args).getRemainingArgs();
String[] otherArgs = new String[5];
int j = 0;
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-libjars")) {
conf.set("tmpjars", args[i + 1]);
// skip it , the for loop will skip its value
i = i + 1;
} else {
otherArgs[j++] = args[i];
}
}
if (otherArgs.length != 5) {
System.err.println("Other args:" + Arrays.asList(otherArgs));
System.err.println("Usage: hadoop jar testudf.jar typedatacheck " + "<serveruri> <tablename> <hive types of cols + delimited> " + "<output dir> <tab|ctrla> <-libjars hive-hcat jar>\n" + "The <tab|ctrla> argument controls the output delimiter.\n" + "The hcat jar location should be specified as file://<full path to jar>\n");
System.err.println(" The <tab|ctrla> argument controls the output delimiter.");
System.exit(2);
}
String serverUri = otherArgs[0];
String tableName = otherArgs[1];
String schemaStr = otherArgs[2];
String outputDir = otherArgs[3];
String outputdelim = otherArgs[4];
if (!outputdelim.equals("tab") && !outputdelim.equals("ctrla")) {
System.err.println("ERROR: Specify 'tab' or 'ctrla' for output delimiter");
}
String dbName = "default";
String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
if (principalID != null) {
conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
}
Job job = new Job(conf, "typedatacheck");
// initialize HCatInputFormat
HCatInputFormat.setInput(job, dbName, tableName);
HCatSchema s = HCatInputFormat.getTableSchema(job);
job.getConfiguration().set(SCHEMA_KEY, schemaStr);
job.getConfiguration().set(DELIM, outputdelim);
job.setInputFormatClass(HCatInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setJarByClass(TypeDataCheck.class);
job.setMapperClass(TypeDataCheckMapper.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(Long.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
System.exit(job.waitForCompletion(true) ? 0 : 1);
return 0;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.util.GenericOptionsParser in project hive by apache.
the class WriteJson method run.
public int run(String[] args) throws Exception {
Configuration conf = getConf();
args = new GenericOptionsParser(conf, args).getRemainingArgs();
String serverUri = args[0];
String inputTableName = args[1];
String outputTableName = args[2];
String dbName = null;
String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
if (principalID != null)
conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
Job job = new Job(conf, "WriteJson");
HCatInputFormat.setInput(job, dbName, inputTableName);
// initialize HCatOutputFormat
job.setInputFormatClass(HCatInputFormat.class);
job.setJarByClass(WriteJson.class);
job.setMapperClass(Map.class);
job.setOutputKeyClass(WritableComparable.class);
job.setOutputValueClass(DefaultHCatRecord.class);
job.setNumReduceTasks(0);
HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
HCatSchema s = HCatInputFormat.getTableSchema(job);
System.err.println("INFO: output schema explicitly set for writing:" + s);
HCatOutputFormat.setSchema(job, s);
job.setOutputFormatClass(HCatOutputFormat.class);
return (job.waitForCompletion(true) ? 0 : 1);
}
use of org.apache.hadoop.util.GenericOptionsParser in project hive by apache.
the class StoreDemo method main.
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
args = new GenericOptionsParser(conf, args).getRemainingArgs();
String[] otherArgs = new String[1];
int j = 0;
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-libjars")) {
// generic options parser doesn't seem to work!
conf.set("tmpjars", args[i + 1]);
// skip it , the for loop will skip its value
i = i + 1;
} else {
otherArgs[j++] = args[i];
}
}
if (otherArgs.length != 1) {
usage();
}
String serverUri = otherArgs[0];
String tableName = NUMBERS_TABLE_NAME;
String dbName = "default";
Map<String, String> outputPartitionKvps = new HashMap<String, String>();
String outputTableName = NUMBERS_PARTITIONED_TABLE_NAME;
outputPartitionKvps.put("datestamp", "20100102");
String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
if (principalID != null)
conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
Job job = new Job(conf, "storedemo");
// initialize HCatInputFormat
HCatInputFormat.setInput(job, dbName, tableName);
// initialize HCatOutputFormat
HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, outputPartitionKvps));
// test with and without specifying schema randomly
HCatSchema s = HCatInputFormat.getTableSchema(job);
System.err.println("INFO: output schema explicitly set for writing:" + s);
HCatOutputFormat.setSchema(job, s);
job.setInputFormatClass(HCatInputFormat.class);
job.setOutputFormatClass(HCatOutputFormat.class);
job.setJarByClass(StoreDemo.class);
job.setMapperClass(SumMapper.class);
job.setOutputKeyClass(IntWritable.class);
job.setNumReduceTasks(0);
job.setOutputValueClass(DefaultHCatRecord.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
use of org.apache.hadoop.util.GenericOptionsParser in project hive by apache.
the class ReadJson method run.
public int run(String[] args) throws Exception {
Configuration conf = getConf();
args = new GenericOptionsParser(conf, args).getRemainingArgs();
String serverUri = args[0];
String tableName = args[1];
String outputDir = args[2];
String dbName = null;
String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
if (principalID != null)
conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
Job job = new Job(conf, "ReadJson");
HCatInputFormat.setInput(job, dbName, tableName);
// initialize HCatOutputFormat
job.setInputFormatClass(HCatInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setJarByClass(ReadJson.class);
job.setMapperClass(Map.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(HCatRecord.class);
job.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
return (job.waitForCompletion(true) ? 0 : 1);
}
Aggregations