use of org.apache.hadoop.mapred.JobConf in project hive by apache.
the class PartialScanTask method main.
public static void main(String[] args) {
String inputPathStr = null;
String outputDir = null;
String jobConfFileName = null;
try {
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-input")) {
inputPathStr = args[++i];
} else if (args[i].equals("-jobconffile")) {
jobConfFileName = args[++i];
} else if (args[i].equals("-outputDir")) {
outputDir = args[++i];
}
}
} catch (IndexOutOfBoundsException e) {
System.err.println("Missing argument to option");
printUsage();
}
if (inputPathStr == null || outputDir == null || outputDir.trim().equals("")) {
printUsage();
}
List<Path> inputPaths = new ArrayList<Path>();
String[] paths = inputPathStr.split(INPUT_SEPERATOR);
if (paths == null || paths.length == 0) {
printUsage();
}
FileSystem fs = null;
JobConf conf = new JobConf(PartialScanTask.class);
for (String path : paths) {
try {
Path pathObj = new Path(path);
if (fs == null) {
fs = FileSystem.get(pathObj.toUri(), conf);
}
FileStatus fstatus = fs.getFileStatus(pathObj);
if (fstatus.isDir()) {
FileStatus[] fileStatus = fs.listStatus(pathObj);
for (FileStatus st : fileStatus) {
inputPaths.add(st.getPath());
}
} else {
inputPaths.add(fstatus.getPath());
}
} catch (IOException e) {
e.printStackTrace(System.err);
}
}
if (jobConfFileName != null) {
conf.addResource(new Path(jobConfFileName));
}
org.slf4j.Logger LOG = LoggerFactory.getLogger(PartialScanTask.class.getName());
boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT);
LogHelper console = new LogHelper(LOG, isSilent);
// that it's easy to find reason for local mode execution failures
for (Appender appender : ((Logger) LogManager.getRootLogger()).getAppenders().values()) {
if (appender instanceof FileAppender) {
console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName());
} else if (appender instanceof RollingFileAppender) {
console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName());
}
}
QueryState queryState = new QueryState(new HiveConf(conf, PartialScanTask.class));
PartialScanWork mergeWork = new PartialScanWork(inputPaths);
DriverContext driverCxt = new DriverContext();
PartialScanTask taskExec = new PartialScanTask();
taskExec.initialize(queryState, null, driverCxt, new CompilationOpContext());
taskExec.setWork(mergeWork);
int ret = taskExec.execute(driverCxt);
if (ret != 0) {
System.exit(2);
}
}
use of org.apache.hadoop.mapred.JobConf in project hive by apache.
the class ColumnStatsTask method initialize.
@Override
public void initialize(QueryState queryState, QueryPlan queryPlan, DriverContext ctx, CompilationOpContext opContext) {
super.initialize(queryState, queryPlan, ctx, opContext);
work.initializeForFetch(opContext);
try {
JobConf job = new JobConf(conf);
ftOp = new FetchOperator(work.getfWork(), job);
} catch (Exception e) {
LOG.error(StringUtils.stringifyException(e));
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.mapred.JobConf in project hive by apache.
the class StatsNoJobTask method initialize.
@Override
public void initialize(QueryState queryState, QueryPlan queryPlan, DriverContext driverContext, CompilationOpContext opContext) {
super.initialize(queryState, queryPlan, driverContext, opContext);
jc = new JobConf(conf);
}
use of org.apache.hadoop.mapred.JobConf in project hive by apache.
the class Utilities method createEmptyBuckets.
/**
* Check the existence of buckets according to bucket specification. Create empty buckets if
* needed.
*
* @param hconf
* @param paths A list of empty buckets to create
* @param conf The definition of the FileSink.
* @param reporter The mapreduce reporter object
* @throws HiveException
* @throws IOException
*/
private static void createEmptyBuckets(Configuration hconf, List<Path> paths, FileSinkDesc conf, Reporter reporter) throws HiveException, IOException {
JobConf jc;
if (hconf instanceof JobConf) {
jc = new JobConf(hconf);
} else {
// test code path
jc = new JobConf(hconf);
}
HiveOutputFormat<?, ?> hiveOutputFormat = null;
Class<? extends Writable> outputClass = null;
boolean isCompressed = conf.getCompressed();
TableDesc tableInfo = conf.getTableInfo();
try {
Serializer serializer = (Serializer) tableInfo.getDeserializerClass().newInstance();
serializer.initialize(null, tableInfo.getProperties());
outputClass = serializer.getSerializedClass();
hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, conf.getTableInfo());
} catch (SerDeException e) {
throw new HiveException(e);
} catch (InstantiationException e) {
throw new HiveException(e);
} catch (IllegalAccessException e) {
throw new HiveException(e);
}
for (Path path : paths) {
RecordWriter writer = HiveFileFormatUtils.getRecordWriter(jc, hiveOutputFormat, outputClass, isCompressed, tableInfo.getProperties(), path, reporter);
writer.close(false);
LOG.info("created empty bucket for enforcing bucketing at " + path);
}
}
use of org.apache.hadoop.mapred.JobConf in project hive by apache.
the class MergeFileTask method initialize.
@Override
public void initialize(QueryState queryState, QueryPlan queryPlan, DriverContext driverContext, CompilationOpContext opContext) {
super.initialize(queryState, queryPlan, driverContext, opContext);
job = new JobConf(conf, MergeFileTask.class);
jobExecHelper = new HadoopJobExecHelper(job, this.console, this, this);
}
Aggregations