use of org.apache.hadoop.mapred.InputFormat in project incubator-systemml by apache.
the class DelegatingInputFormat method getRecordReader.
@SuppressWarnings("unchecked")
public RecordReader<K, V> getRecordReader(InputSplit split, JobConf conf, Reporter reporter) throws IOException {
// Find the InputFormat and then the RecordReader from the
// TaggedInputSplit.
TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils.newInstance(taggedInputSplit.getInputFormatClass(), conf);
InputSplit inputSplit = taggedInputSplit.getInputSplit();
if (inputSplit instanceof FileSplit) {
FileSplit fileSplit = (FileSplit) inputSplit;
conf.set(MRConfigurationNames.MR_MAP_INPUT_FILE, fileSplit.getPath().toString());
conf.setLong(MRConfigurationNames.MR_MAP_INPUT_START, fileSplit.getStart());
conf.setLong(MRConfigurationNames.MR_MAP_INPUT_LENGTH, fileSplit.getLength());
}
return inputFormat.getRecordReader(taggedInputSplit.getInputSplit(), conf, reporter);
}
use of org.apache.hadoop.mapred.InputFormat in project incubator-systemml by apache.
the class DelegatingInputFormat method getSplits.
public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {
JobConf confCopy = new JobConf(conf);
List<InputSplit> splits = new ArrayList<InputSplit>();
Map<Path, InputFormat> formatMap = MultipleInputs.getInputFormatMap(conf);
Map<Path, Class<? extends Mapper>> mapperMap = MultipleInputs.getMapperTypeMap(conf);
Map<Class<? extends InputFormat>, List<Path>> formatPaths = new HashMap<Class<? extends InputFormat>, List<Path>>();
// First, build a map of InputFormats to Paths
for (Entry<Path, InputFormat> entry : formatMap.entrySet()) {
if (!formatPaths.containsKey(entry.getValue().getClass())) {
formatPaths.put(entry.getValue().getClass(), new LinkedList<Path>());
}
formatPaths.get(entry.getValue().getClass()).add(entry.getKey());
}
for (Entry<Class<? extends InputFormat>, List<Path>> formatEntry : formatPaths.entrySet()) {
Class<? extends InputFormat> formatClass = formatEntry.getKey();
InputFormat format = (InputFormat) ReflectionUtils.newInstance(formatClass, conf);
List<Path> paths = formatEntry.getValue();
Map<Class<? extends Mapper>, List<Path>> mapperPaths = new HashMap<Class<? extends Mapper>, List<Path>>();
// a map of Mappers to the paths they're used for
for (Path path : paths) {
Class<? extends Mapper> mapperClass = mapperMap.get(path);
if (!mapperPaths.containsKey(mapperClass)) {
mapperPaths.put(mapperClass, new LinkedList<Path>());
}
mapperPaths.get(mapperClass).add(path);
}
// be added to the same job, and split together.
for (Entry<Class<? extends Mapper>, List<Path>> mapEntry : mapperPaths.entrySet()) {
paths = mapEntry.getValue();
Class<? extends Mapper> mapperClass = mapEntry.getKey();
if (mapperClass == null) {
mapperClass = conf.getMapperClass();
}
FileInputFormat.setInputPaths(confCopy, paths.toArray(new Path[paths.size()]));
// Get splits for each input path and tag with InputFormat
// and Mapper types by wrapping in a TaggedInputSplit.
InputSplit[] pathSplits = format.getSplits(confCopy, numSplits);
for (InputSplit pathSplit : pathSplits) {
splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass));
}
}
}
return splits.toArray(new InputSplit[splits.size()]);
}
use of org.apache.hadoop.mapred.InputFormat in project incubator-systemml by apache.
the class MultipleInputs method getInputFormatMap.
/**
* Retrieves a map of {@link Path}s to the {@link InputFormat} class
* that should be used for them.
*
* @param conf The confuration of the job
* @see #addInputPath(JobConf, Path, Class)
* @return A map of paths to inputformats for the job
*/
static Map<Path, InputFormat> getInputFormatMap(JobConf conf) {
Map<Path, InputFormat> m = new HashMap<Path, InputFormat>();
String[] pathMappings = conf.get(MRConfigurationNames.MR_INPUT_MULTIPLEINPUTS_DIR_FORMATS).split(",");
for (String pathMapping : pathMappings) {
String[] split = pathMapping.split(";");
InputFormat inputFormat;
try {
inputFormat = (InputFormat) ReflectionUtils.newInstance(conf.getClassByName(split[1]), conf);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
m.put(new Path(split[0]), inputFormat);
}
return m;
}
use of org.apache.hadoop.mapred.InputFormat in project ignite by apache.
the class HadoopV1MapTask method run.
/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
HadoopJobEx job = taskCtx.job();
HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx;
if (taskCtx.taskInfo().hasMapperIndex())
HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
else
HadoopMapperUtils.clearMapperIndex();
try {
JobConf jobConf = taskCtx0.jobConf();
InputFormat inFormat = jobConf.getInputFormat();
HadoopInputSplit split = info().inputSplit();
InputSplit nativeSplit;
if (split instanceof HadoopFileBlock) {
HadoopFileBlock block = (HadoopFileBlock) split;
nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS);
} else
nativeSplit = (InputSplit) taskCtx0.getNativeSplit(split);
assert nativeSplit != null;
Reporter reporter = new HadoopV1Reporter(taskCtx);
HadoopV1OutputCollector collector = null;
try {
collector = collector(jobConf, taskCtx0, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(), taskCtx0.attemptId());
RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);
Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);
Object key = reader.createKey();
Object val = reader.createValue();
assert mapper != null;
try {
try {
while (reader.next(key, val)) {
if (isCancelled())
throw new HadoopTaskCancelledException("Map task cancelled.");
mapper.map(key, val, collector, reporter);
}
taskCtx.onMapperFinished();
} finally {
mapper.close();
}
} finally {
collector.closeWriter();
}
collector.commit();
} catch (Exception e) {
if (collector != null)
collector.abort();
throw new IgniteCheckedException(e);
}
} finally {
HadoopMapperUtils.clearMapperIndex();
}
}
Aggregations