use of org.apache.tez.runtime.api.InputInitializerContext in project hive by apache.
the class TestDynamicPartitionPruner method testMissingEvent.
@Test(timeout = 20000)
public void testMissingEvent() throws InterruptedException, IOException, HiveException, SerDeException {
InputInitializerContext mockInitContext = mock(InputInitializerContext.class);
doReturn(1).when(mockInitContext).getVertexNumTasks("v1");
MapWork mapWork = createMockMapWork(new TestSource("v1", 1));
DynamicPartitionPrunerForEventTesting pruner = new DynamicPartitionPrunerForEventTesting();
pruner.initialize(mockInitContext, mapWork, new JobConf());
PruneRunnable pruneRunnable = new PruneRunnable(pruner);
Thread t = new Thread(pruneRunnable);
t.start();
try {
pruneRunnable.start();
InputInitializerEvent event = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
event.setSourceVertexName("v1");
pruner.processVertex("v1");
Thread.sleep(3000l);
// The pruner should not have completed.
assertFalse(pruneRunnable.ended.get());
assertNoError(pruneRunnable);
assertEquals(0, pruner.eventsProceessed.intValue());
assertEquals(0, pruner.filteredSources.intValue());
} finally {
t.interrupt();
t.join();
}
}
use of org.apache.tez.runtime.api.InputInitializerContext in project hive by apache.
the class HiveSplitGenerator method initialize.
@SuppressWarnings("unchecked")
@Override
public List<Event> initialize() throws Exception {
if (getContext() != null) {
// called from Tez AM.
prepare(getContext());
}
// Setup the map work for this thread. Pruning modified the work instance to potentially remove
// partitions. The same work instance must be used when generating splits.
Utilities.setMapWork(jobConf, work);
try {
boolean sendSerializedEvents = conf.getBoolean("mapreduce.tez.input.initializer.serialize.event.payload", true);
// perform dynamic partition pruning
if (pruner != null) {
pruner.initialize(getContext(), work, jobConf);
pruner.prune();
}
InputSplitInfoMem inputSplitInfo = null;
boolean generateConsistentSplits = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS);
LOG.info("GenerateConsistentSplitsInHive=" + generateConsistentSplits);
String realInputFormatName = conf.get("mapred.input.format.class");
boolean groupingEnabled = userPayloadProto.getGroupingEnabled();
if (groupingEnabled) {
// Need to instantiate the realInputFormat
InputFormat<?, ?> inputFormat = (InputFormat<?, ?>) ReflectionUtils.newInstance(JavaUtils.loadClass(realInputFormatName), jobConf);
int totalResource = 0;
int taskResource = 0;
int availableSlots = 0;
// FIXME. Do the right thing Luke.
if (getContext() == null) {
// for now, totalResource = taskResource for llap
availableSlots = 1;
}
if (getContext() != null) {
totalResource = getContext().getTotalAvailableResource().getMemory();
taskResource = getContext().getVertexTaskResource().getMemory();
availableSlots = totalResource / taskResource;
}
if (HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1) <= 1) {
// broken configuration from mapred-default.xml
final long blockSize = conf.getLongBytes(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT);
final long minGrouping = conf.getLong(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT);
final long preferredSplitSize = Math.min(blockSize / 2, minGrouping);
HiveConf.setLongVar(jobConf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, preferredSplitSize);
LOG.info("The preferred split size is " + preferredSplitSize);
}
float waves;
// Create the un-grouped splits
if (numSplits.isPresent()) {
waves = numSplits.get().floatValue() / availableSlots;
} else {
waves = conf.getFloat(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT);
}
InputSplit[] splits;
if (generateSingleSplit && conf.get(HiveConf.ConfVars.HIVETEZINPUTFORMAT.varname).equals(HiveInputFormat.class.getName())) {
MapWork mapWork = Utilities.getMapWork(jobConf);
List<Path> paths = Utilities.getInputPathsTez(jobConf, mapWork);
FileSystem fs = paths.get(0).getFileSystem(jobConf);
FileStatus[] fileStatuses = fs.listStatus(paths.get(0));
if (fileStatuses.length == 0) {
// generate single split typically happens when reading data out of order by queries.
// if order by query returns no rows, no files will exists in input path
splits = new InputSplit[0];
} else {
// if files exists in input path then it has to be 1 as this code path gets triggered only
// of order by queries which is expected to write only one file (written by one reducer)
Preconditions.checkState(paths.size() == 1 && fileStatuses.length == 1 && mapWork.getAliasToPartnInfo().size() == 1, "Requested to generate single split. Paths and fileStatuses are expected to be 1. " + "Got paths: " + paths.size() + " fileStatuses: " + fileStatuses.length);
splits = new InputSplit[1];
FileStatus fileStatus = fileStatuses[0];
BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
Set<String> hostsSet = new HashSet<>();
for (BlockLocation location : locations) {
hostsSet.addAll(Lists.newArrayList(location.getHosts()));
}
String[] hosts = hostsSet.toArray(new String[0]);
FileSplit fileSplit = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen(), hosts);
String alias = mapWork.getAliases().get(0);
PartitionDesc partDesc = mapWork.getAliasToPartnInfo().get(alias);
String partIF = partDesc.getInputFileFormatClassName();
splits[0] = new HiveInputFormat.HiveInputSplit(fileSplit, partIF);
}
} else {
// Raw splits
splits = inputFormat.getSplits(jobConf, numSplits.orElse(Math.multiplyExact(availableSlots, (int) waves)));
}
// Sort the splits, so that subsequent grouping is consistent.
Arrays.sort(splits, new InputSplitComparator());
LOG.info("Number of input splits: " + splits.length + ". " + availableSlots + " available slots, " + waves + " waves. Input format is: " + realInputFormatName);
// increment/set input counters
InputInitializerContext inputInitializerContext = getContext();
TezCounters tezCounters = null;
String counterName;
String groupName = null;
String vertexName = null;
if (inputInitializerContext != null) {
try {
tezCounters = new TezCounters();
groupName = HiveInputCounters.class.getName();
vertexName = jobConf.get(Operator.CONTEXT_NAME_KEY, "");
counterName = Utilities.getVertexCounterName(HiveInputCounters.RAW_INPUT_SPLITS.name(), vertexName);
tezCounters.findCounter(groupName, counterName).increment(splits.length);
final List<Path> paths = Utilities.getInputPathsTez(jobConf, work);
counterName = Utilities.getVertexCounterName(HiveInputCounters.INPUT_DIRECTORIES.name(), vertexName);
tezCounters.findCounter(groupName, counterName).increment(paths.size());
final Set<String> files = new HashSet<>();
for (InputSplit inputSplit : splits) {
if (inputSplit instanceof FileSplit) {
final FileSplit fileSplit = (FileSplit) inputSplit;
final Path path = fileSplit.getPath();
// The assumption here is the path is a file. Only case this is different is ACID deltas.
// The isFile check is avoided here for performance reasons.
final String fileStr = path.toString();
if (!files.contains(fileStr)) {
files.add(fileStr);
}
}
}
counterName = Utilities.getVertexCounterName(HiveInputCounters.INPUT_FILES.name(), vertexName);
tezCounters.findCounter(groupName, counterName).increment(files.size());
} catch (Exception e) {
LOG.warn("Caught exception while trying to update Tez counters", e);
}
}
if (work.getIncludedBuckets() != null) {
splits = pruneBuckets(work, splits);
}
Multimap<Integer, InputSplit> groupedSplits = splitGrouper.generateGroupedSplits(jobConf, conf, splits, waves, availableSlots, splitLocationProvider);
// And finally return them in a flat array
InputSplit[] flatSplits = groupedSplits.values().toArray(new InputSplit[0]);
LOG.info("Number of split groups: " + flatSplits.length);
if (inputInitializerContext != null) {
try {
counterName = Utilities.getVertexCounterName(HiveInputCounters.GROUPED_INPUT_SPLITS.name(), vertexName);
tezCounters.findCounter(groupName, counterName).setValue(flatSplits.length);
LOG.debug("Published tez counters: {}", tezCounters);
inputInitializerContext.addCounters(tezCounters);
} catch (Exception e) {
LOG.warn("Caught exception while trying to update Tez counters", e);
}
}
List<TaskLocationHint> locationHints = splitGrouper.createTaskLocationHints(flatSplits, generateConsistentSplits);
inputSplitInfo = new InputSplitInfoMem(flatSplits, locationHints, flatSplits.length, null, jobConf);
} else {
// If this is used in the future - make sure to disable grouping in the payload, if it isn't already disabled
throw new RuntimeException("HiveInputFormat does not support non-grouped splits, InputFormatName is: " + realInputFormatName);
// inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, false, 0);
}
return createEventList(sendSerializedEvents, inputSplitInfo);
} finally {
Utilities.clearWork(jobConf);
}
}
Aggregations