use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class TestAccumuloRangeGenerator method testDateRangeConjunction.
@Test
public void testDateRangeConjunction() throws Exception {
// rowId >= '2014-01-01'
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, Date.valueOf("2014-01-01"));
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
assertNotNull(node);
// rowId <= '2014-07-01'
ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, Date.valueOf("2014-07-01"));
List<ExprNodeDesc> children2 = Lists.newArrayList();
children2.add(column2);
children2.add(constant2);
ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children2);
assertNotNull(node2);
// And UDF
List<ExprNodeDesc> bothFilters = Lists.newArrayList();
bothFilters.add(node);
bothFilters.add(node2);
ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
// Should generate [2014-01-01, 2014-07-01)
List<Range> expectedRanges = Arrays.asList(new Range(new Key("2014-01-01"), true, new Key("2014-07-01"), false));
AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid");
Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
GraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.add(both);
HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
try {
ogw.startWalking(topNodes, nodeOutput);
} catch (SemanticException ex) {
throw new RuntimeException(ex);
}
Object result = nodeOutput.get(both);
Assert.assertNotNull(result);
Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
@SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
Assert.assertEquals(expectedRanges, actualRanges);
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class TestAccumuloRangeGenerator method testRangeConjunction.
@Test
public void testRangeConjunction() throws Exception {
// rowId >= 'f'
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f");
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
assertNotNull(node);
// rowId <= 'm'
ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "m");
List<ExprNodeDesc> children2 = Lists.newArrayList();
children2.add(column2);
children2.add(constant2);
ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children2);
assertNotNull(node2);
// And UDF
List<ExprNodeDesc> bothFilters = Lists.newArrayList();
bothFilters.add(node);
bothFilters.add(node2);
ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
// Should generate [f,m]
List<Range> expectedRanges = Arrays.asList(new Range(new Key("f"), true, new Key("m\0"), false));
AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid");
Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
GraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.add(both);
HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
try {
ogw.startWalking(topNodes, nodeOutput);
} catch (SemanticException ex) {
throw new RuntimeException(ex);
}
Object result = nodeOutput.get(both);
Assert.assertNotNull(result);
Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
@SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
Assert.assertEquals(expectedRanges, actualRanges);
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class TableExport method getAuthEntities.
public AuthEntities getAuthEntities() throws SemanticException {
AuthEntities authEntities = new AuthEntities();
try {
// Return if metadata-only
if (replicationSpec.isMetadataOnly()) {
return authEntities;
}
PartitionIterable partitions = getPartitions();
if (tableSpec != null) {
if (tableSpec.tableHandle.isPartitioned()) {
if (partitions == null) {
throw new IllegalStateException("partitions cannot be null for partitionTable :" + tableSpec.tableName);
}
for (Partition partition : partitions) {
authEntities.inputs.add(new ReadEntity(partition));
}
} else {
authEntities.inputs.add(new ReadEntity(tableSpec.tableHandle));
}
}
authEntities.outputs.add(toWriteEntity(paths.getExportRootDir(), conf));
} catch (Exception e) {
throw new SemanticException(e);
}
return authEntities;
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class TableExport method writeMetaData.
private void writeMetaData(PartitionIterable partitions) throws SemanticException {
try {
EximUtil.createExportDump(paths.exportFileSystem, paths.metaDataExportFile(), tableSpec == null ? null : tableSpec.tableHandle, partitions, replicationSpec, conf);
logger.debug("_metadata file written into " + paths.metaDataExportFile().toString());
} catch (Exception e) {
// however if we want to keep the dump location clean we might want to delete the paths
throw new SemanticException(ErrorMsg.IO_ERROR.getMsg("Exception while writing out the local file"), e);
}
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class SortMergeJoinTaskDispatcher method processCurrentTask.
@Override
public Task<? extends Serializable> processCurrentTask(MapRedTask currTask, ConditionalTask conditionalTask, Context context) throws SemanticException {
// whether it contains a sort merge join operator
MapredWork currWork = currTask.getWork();
SMBMapJoinOperator originalSMBJoinOp = getSMBMapJoinOp(currWork);
if (!isEligibleForOptimization(originalSMBJoinOp)) {
return null;
}
currTask.setTaskTag(Task.CONVERTED_SORTMERGEJOIN);
// Convert the work containing to sort-merge join into a work, as if it had a regular join.
// Note that the operator tree is not changed - is still contains the SMB join, but the
// plan is changed (aliasToWork etc.) to contain all the paths as if it was a regular join.
// This is used to convert the plan to a map-join, and then the original SMB join plan is used
// as a backup task.
MapredWork currJoinWork = convertSMBWorkToJoinWork(currWork, originalSMBJoinOp);
SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(currJoinWork);
currWork.getMapWork().setLeftInputJoin(originalSMBJoinOp.getConf().isLeftInputJoin());
currWork.getMapWork().setBaseSrc(originalSMBJoinOp.getConf().getBaseSrc());
currWork.getMapWork().setMapAliases(originalSMBJoinOp.getConf().getMapAliases());
currJoinWork.getMapWork().setLeftInputJoin(originalSMBJoinOp.getConf().isLeftInputJoin());
currJoinWork.getMapWork().setBaseSrc(originalSMBJoinOp.getConf().getBaseSrc());
currJoinWork.getMapWork().setMapAliases(originalSMBJoinOp.getConf().getMapAliases());
// create conditional work list and task list
List<Serializable> listWorks = new ArrayList<Serializable>();
List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
// create task to aliases mapping and alias to input file mapping for resolver
// Must be deterministic order map for consistent q-test output across Java versions
HashMap<Task<? extends Serializable>, Set<String>> taskToAliases = new LinkedHashMap<Task<? extends Serializable>, Set<String>>();
// Note that pathToAlias will behave as if the original plan was a join plan
HashMap<Path, ArrayList<String>> pathToAliases = currJoinWork.getMapWork().getPathToAliases();
// generate a map join task for the big table
SMBJoinDesc originalSMBJoinDesc = originalSMBJoinOp.getConf();
Byte[] order = originalSMBJoinDesc.getTagOrder();
int numAliases = order.length;
Set<Integer> bigTableCandidates = MapJoinProcessor.getBigTableCandidates(originalSMBJoinDesc.getConds());
HashMap<String, Long> aliasToSize = new HashMap<String, Long>();
Configuration conf = context.getConf();
try {
long aliasTotalKnownInputSize = getTotalKnownInputSize(context, currJoinWork.getMapWork(), pathToAliases, aliasToSize);
long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVESMALLTABLESFILESIZE);
for (int bigTablePosition = 0; bigTablePosition < numAliases; bigTablePosition++) {
// this table cannot be big table
if (!bigTableCandidates.contains(bigTablePosition)) {
continue;
}
// create map join task for the given big table position
MapRedTask newTask = convertSMBTaskToMapJoinTask(currJoinWork, bigTablePosition, newSMBJoinOp);
MapWork mapWork = newTask.getWork().getMapWork();
Operator<?> parentOp = originalSMBJoinOp.getParentOperators().get(bigTablePosition);
Set<String> aliases = GenMapRedUtils.findAliases(mapWork, parentOp);
long aliasKnownSize = Utilities.sumOf(aliasToSize, aliases);
if (aliasKnownSize > 0) {
long smallTblTotalKnownSize = aliasTotalKnownInputSize - aliasKnownSize;
if (smallTblTotalKnownSize > ThresholdOfSmallTblSizeSum) {
// this table is not good to be a big table.
continue;
}
}
// add into conditional task
listWorks.add(newTask.getWork());
listTasks.add(newTask);
newTask.setTaskTag(Task.CONVERTED_MAPJOIN);
newTask.setFetchSource(currTask.isFetchSource());
// set up backup task
newTask.setBackupTask(currTask);
newTask.setBackupChildrenTasks(currTask.getChildTasks());
// put the mapping task to aliases
taskToAliases.put(newTask, aliases);
}
} catch (Exception e) {
e.printStackTrace();
throw new SemanticException("Generate Map Join Task Error: ", e);
}
// insert current common join task to conditional task
listWorks.add(currTask.getWork());
listTasks.add(currTask);
// clear JoinTree and OP Parse Context
currWork.getMapWork().setLeftInputJoin(false);
currWork.getMapWork().setBaseSrc(null);
currWork.getMapWork().setMapAliases(null);
// create conditional task and insert conditional task into task tree
ConditionalWork cndWork = new ConditionalWork(listWorks);
ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork);
cndTsk.setListTasks(listTasks);
// set resolver and resolver context
cndTsk.setResolver(new ConditionalResolverCommonJoin());
ConditionalResolverCommonJoinCtx resolverCtx = new ConditionalResolverCommonJoinCtx();
resolverCtx.setPathToAliases(pathToAliases);
resolverCtx.setAliasToKnownSize(aliasToSize);
resolverCtx.setTaskToAliases(taskToAliases);
resolverCtx.setCommonJoinTask(currTask);
resolverCtx.setLocalTmpDir(context.getLocalScratchDir(false));
resolverCtx.setHdfsTmpDir(context.getMRScratchDir());
cndTsk.setResolverCtx(resolverCtx);
// replace the current task with the new generated conditional task
replaceTaskWithConditionalTask(currTask, cndTsk);
return cndTsk;
}
Aggregations