use of org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint in project asterixdb by apache.
the class APIFrameworkTest method testGetComputationLocations.
@Test
public void testGetComputationLocations() throws AlgebricksException {
// Constructs mocked cluster nodes.
Map<String, NodeControllerInfo> map = new HashMap<>();
NodeControllerInfo nc1Info = mock(NodeControllerInfo.class);
when(nc1Info.getNumAvailableCores()).thenReturn(4);
NodeControllerInfo nc2Info = mock(NodeControllerInfo.class);
when(nc2Info.getNumAvailableCores()).thenReturn(4);
String nc1 = "nc1";
String nc2 = "nc2";
map.put(nc1, nc1Info);
map.put(nc2, nc2Info);
// Creates an APIFramework.
APIFramework apiFramework = new APIFramework(mock(ILangCompilationProvider.class));
// Tests odd number parallelism.
AlgebricksAbsolutePartitionConstraint loc = (AlgebricksAbsolutePartitionConstraint) PA.invokeMethod(apiFramework, "getComputationLocations(java.util.Map,int)", map, 5);
int nc1Count = 0, nc2Count = 0;
String[] partitions = loc.getLocations();
for (String partition : partitions) {
if (partition.equals(nc1)) {
++nc1Count;
}
if (partition.equals(nc2)) {
++nc2Count;
}
}
Assert.assertTrue(nc1Count > 0);
Assert.assertTrue(nc2Count > 0);
// Tests load balance.
Assert.assertTrue(Math.abs(nc1Count - nc2Count) == 1);
Assert.assertTrue(partitions.length == 5);
// Tests even number parallelism.
loc = (AlgebricksAbsolutePartitionConstraint) PA.invokeMethod(apiFramework, "getComputationLocations(java.util.Map,int)", map, 8);
nc1Count = 0;
nc2Count = 0;
partitions = loc.getLocations();
for (String partition : partitions) {
if (partition.equals(nc1)) {
++nc1Count;
}
if (partition.equals(nc2)) {
++nc2Count;
}
}
Assert.assertTrue(nc1Count > 0);
Assert.assertTrue(nc2Count > 0);
// Tests load balance.
Assert.assertTrue(Math.abs(nc1Count - nc2Count) == 0);
// The maximum parallelism cannot be beyond n * core, where n is the number of NCs and #core is the number
// of cores per NC.
Assert.assertTrue(partitions.length == 8);
// Tests the case when parallelism is one.
loc = (AlgebricksAbsolutePartitionConstraint) PA.invokeMethod(apiFramework, "getComputationLocations(java.util.Map,int)", map, 1);
Assert.assertTrue(loc.getLocations().length == 1);
// Tests the case when parallelism is a negative.
// In this case, the compiler has no idea and falls back to the default setting where all possible cores
// are used.
loc = (AlgebricksAbsolutePartitionConstraint) PA.invokeMethod(apiFramework, "getComputationLocations(java.util.Map,int)", map, -100);
Assert.assertTrue(loc.getLocations().length == 8);
// Tests the case when parallelism is -1.
// In this case, the compiler has no idea and falls back to the default setting where all possible cores
// are used.
loc = (AlgebricksAbsolutePartitionConstraint) PA.invokeMethod(apiFramework, "getComputationLocations(java.util.Map,int)", map, -1);
Assert.assertTrue(loc.getLocations().length == 8);
// Tests the case when parallelism is zero.
// In this case, the compiler has no idea and falls back to the default setting where all possible cores
// are used.
loc = (AlgebricksAbsolutePartitionConstraint) PA.invokeMethod(apiFramework, "getComputationLocations(java.util.Map,int)", map, 0);
Assert.assertTrue(loc.getLocations().length == 8);
}
use of org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint in project asterixdb by apache.
the class APIFramework method compileQuery.
public JobSpecification compileQuery(IClusterInfoCollector clusterInfoCollector, MetadataProvider metadataProvider, Query rwQ, int varCounter, String outputDatasetName, SessionOutput output, ICompiledDmlStatement statement) throws AlgebricksException, RemoteException, ACIDException {
SessionConfig conf = output.config();
if (!conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS) && conf.is(SessionConfig.OOB_REWRITTEN_EXPR_TREE)) {
output.out().println();
printPlanPrefix(output, "Rewritten expression tree");
if (rwQ != null) {
rwQ.accept(astPrintVisitorFactory.createLangVisitor(output.out()), 0);
}
printPlanPostfix(output);
}
org.apache.asterix.common.transactions.JobId asterixJobId = JobIdFactory.generateJobId();
metadataProvider.setJobId(asterixJobId);
ILangExpressionToPlanTranslator t = translatorFactory.createExpressionToPlanTranslator(metadataProvider, varCounter);
ILogicalPlan plan;
// statement = null when it's a query
if (statement == null || statement.getKind() != Statement.Kind.LOAD) {
plan = t.translate(rwQ, outputDatasetName, statement);
} else {
plan = t.translateLoad(statement);
}
if (!conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS) && conf.is(SessionConfig.OOB_LOGICAL_PLAN)) {
output.out().println();
printPlanPrefix(output, "Logical plan");
if (rwQ != null || (statement != null && statement.getKind() == Statement.Kind.LOAD)) {
LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor(output.out());
PlanPrettyPrinter.printPlan(plan, pvisitor, 0);
}
printPlanPostfix(output);
}
CompilerProperties compilerProperties = metadataProvider.getApplicationContext().getCompilerProperties();
int frameSize = compilerProperties.getFrameSize();
Map<String, String> querySpecificConfig = metadataProvider.getConfig();
// Validates the user-overridden query parameters.
validateConfig(querySpecificConfig);
int sortFrameLimit = getFrameLimit(CompilerProperties.COMPILER_SORTMEMORY_KEY, querySpecificConfig.get(CompilerProperties.COMPILER_SORTMEMORY_KEY), compilerProperties.getSortMemorySize(), frameSize, MIN_FRAME_LIMIT_FOR_SORT);
int groupFrameLimit = getFrameLimit(CompilerProperties.COMPILER_GROUPMEMORY_KEY, querySpecificConfig.get(CompilerProperties.COMPILER_GROUPMEMORY_KEY), compilerProperties.getGroupMemorySize(), frameSize, MIN_FRAME_LIMIT_FOR_GROUP_BY);
int joinFrameLimit = getFrameLimit(CompilerProperties.COMPILER_JOINMEMORY_KEY, querySpecificConfig.get(CompilerProperties.COMPILER_JOINMEMORY_KEY), compilerProperties.getJoinMemorySize(), frameSize, MIN_FRAME_LIMIT_FOR_JOIN);
OptimizationConfUtil.getPhysicalOptimizationConfig().setFrameSize(frameSize);
OptimizationConfUtil.getPhysicalOptimizationConfig().setMaxFramesExternalSort(sortFrameLimit);
OptimizationConfUtil.getPhysicalOptimizationConfig().setMaxFramesExternalGroupBy(groupFrameLimit);
OptimizationConfUtil.getPhysicalOptimizationConfig().setMaxFramesForJoin(joinFrameLimit);
HeuristicCompilerFactoryBuilder builder = new HeuristicCompilerFactoryBuilder(OptimizationContextFactory.INSTANCE);
builder.setPhysicalOptimizationConfig(OptimizationConfUtil.getPhysicalOptimizationConfig());
builder.setLogicalRewrites(ruleSetFactory.getLogicalRewrites(metadataProvider.getApplicationContext()));
builder.setPhysicalRewrites(ruleSetFactory.getPhysicalRewrites(metadataProvider.getApplicationContext()));
IDataFormat format = metadataProvider.getFormat();
ICompilerFactory compilerFactory = builder.create();
builder.setExpressionEvalSizeComputer(format.getExpressionEvalSizeComputer());
builder.setIMergeAggregationExpressionFactory(new MergeAggregationExpressionFactory());
builder.setPartialAggregationTypeComputer(new PartialAggregationTypeComputer());
builder.setExpressionTypeComputer(ExpressionTypeComputer.INSTANCE);
builder.setMissableTypeComputer(MissableTypeComputer.INSTANCE);
builder.setConflictingTypeResolver(ConflictingTypeResolver.INSTANCE);
int parallelism = getParallelism(querySpecificConfig.get(CompilerProperties.COMPILER_PARALLELISM_KEY), compilerProperties.getParallelism());
AlgebricksAbsolutePartitionConstraint computationLocations = chooseLocations(clusterInfoCollector, parallelism, metadataProvider.getClusterLocations());
builder.setClusterLocations(computationLocations);
ICompiler compiler = compilerFactory.createCompiler(plan, metadataProvider, t.getVarCounter());
if (conf.isOptimize()) {
compiler.optimize();
if (conf.is(SessionConfig.OOB_OPTIMIZED_LOGICAL_PLAN)) {
if (conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS)) {
// For Optimizer tests.
AlgebricksAppendable buffer = new AlgebricksAppendable(output.out());
PlanPrettyPrinter.printPhysicalOps(plan, buffer, 0);
} else {
printPlanPrefix(output, "Optimized logical plan");
if (rwQ != null || (statement != null && statement.getKind() == Statement.Kind.LOAD)) {
LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor(output.out());
PlanPrettyPrinter.printPlan(plan, pvisitor, 0);
}
printPlanPostfix(output);
}
}
}
if (rwQ != null && rwQ.isExplain()) {
try {
LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();
PlanPrettyPrinter.printPlan(plan, pvisitor, 0);
ResultUtil.printResults(metadataProvider.getApplicationContext(), pvisitor.get().toString(), output, new Stats(), null);
return null;
} catch (IOException e) {
throw new AlgebricksException(e);
}
}
if (!conf.isGenerateJobSpec()) {
return null;
}
builder.setBinaryBooleanInspectorFactory(format.getBinaryBooleanInspectorFactory());
builder.setBinaryIntegerInspectorFactory(format.getBinaryIntegerInspectorFactory());
builder.setComparatorFactoryProvider(format.getBinaryComparatorFactoryProvider());
builder.setExpressionRuntimeProvider(new ExpressionRuntimeProvider(QueryLogicalExpressionJobGen.INSTANCE));
builder.setHashFunctionFactoryProvider(format.getBinaryHashFunctionFactoryProvider());
builder.setHashFunctionFamilyProvider(format.getBinaryHashFunctionFamilyProvider());
builder.setMissingWriterFactory(format.getMissingWriterFactory());
builder.setPredicateEvaluatorFactoryProvider(format.getPredicateEvaluatorFactoryProvider());
final SessionConfig.OutputFormat outputFormat = conf.fmt();
switch(outputFormat) {
case LOSSLESS_JSON:
builder.setPrinterProvider(format.getLosslessJSONPrinterFactoryProvider());
break;
case CSV:
builder.setPrinterProvider(format.getCSVPrinterFactoryProvider());
break;
case ADM:
builder.setPrinterProvider(format.getADMPrinterFactoryProvider());
break;
case CLEAN_JSON:
builder.setPrinterProvider(format.getCleanJSONPrinterFactoryProvider());
break;
default:
throw new AlgebricksException("Unexpected OutputFormat: " + outputFormat);
}
builder.setSerializerDeserializerProvider(format.getSerdeProvider());
builder.setTypeTraitProvider(format.getTypeTraitProvider());
builder.setNormalizedKeyComputerFactoryProvider(format.getNormalizedKeyComputerFactoryProvider());
JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(asterixJobId, metadataProvider.isWriteTransaction());
JobSpecification spec = compiler.createJob(metadataProvider.getApplicationContext(), jobEventListenerFactory);
// When the top-level statement is a query, the statement parameter is null.
if (statement == null) {
// Sets a required capacity, only for read-only queries.
// DDLs and DMLs are considered not that frequent.
spec.setRequiredClusterCapacity(ResourceUtils.getRequiredCompacity(plan, computationLocations, sortFrameLimit, groupFrameLimit, joinFrameLimit, frameSize));
}
if (conf.is(SessionConfig.OOB_HYRACKS_JOB)) {
printPlanPrefix(output, "Hyracks job");
if (rwQ != null) {
try {
output.out().println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(spec.toJSON()));
} catch (IOException e) {
throw new AlgebricksException(e);
}
output.out().println(spec.getUserConstraints());
}
printPlanPostfix(output);
}
return spec;
}
use of org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint in project asterixdb by apache.
the class APIFramework method getComputationLocations.
// Computes the location constraints based on user-configured parallelism parameter.
// Note that the parallelism parameter is only a hint -- it will not be respected if it is too small or too large.
private static AlgebricksAbsolutePartitionConstraint getComputationLocations(Map<String, NodeControllerInfo> ncMap, int parallelismHint) {
// Unifies the handling of non-positive parallelism.
int parallelism = parallelismHint <= 0 ? -2 * ncMap.size() : parallelismHint;
// Calculates per node parallelism, with load balance, i.e., randomly selecting nodes with larger
// parallelism.
int numNodes = ncMap.size();
int numNodesWithOneMorePartition = parallelism % numNodes;
int perNodeParallelismMin = parallelism / numNodes;
int perNodeParallelismMax = parallelism / numNodes + 1;
List<String> allNodes = new ArrayList<>();
Set<String> selectedNodesWithOneMorePartition = new HashSet<>();
for (Map.Entry<String, NodeControllerInfo> entry : ncMap.entrySet()) {
allNodes.add(entry.getKey());
}
Random random = new Random();
for (int index = numNodesWithOneMorePartition; index >= 1; --index) {
int pick = random.nextInt(index);
selectedNodesWithOneMorePartition.add(allNodes.get(pick));
Collections.swap(allNodes, pick, index - 1);
}
// Generates cluster locations, which has duplicates for a node if it contains more than one partitions.
List<String> locations = new ArrayList<>();
for (Map.Entry<String, NodeControllerInfo> entry : ncMap.entrySet()) {
String nodeId = entry.getKey();
int availableCores = entry.getValue().getNumAvailableCores();
int nodeParallelism = selectedNodesWithOneMorePartition.contains(nodeId) ? perNodeParallelismMax : perNodeParallelismMin;
int coresToUse = nodeParallelism >= 0 && nodeParallelism < availableCores ? nodeParallelism : availableCores;
for (int count = 0; count < coresToUse; ++count) {
locations.add(nodeId);
}
}
return new AlgebricksAbsolutePartitionConstraint(locations.toArray(new String[0]));
}
use of org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint in project asterixdb by apache.
the class FeedOperations method buildRemoveFeedStorageJob.
public static JobSpecification buildRemoveFeedStorageJob(MetadataProvider metadataProvider, Feed feed) throws AsterixException {
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
AlgebricksAbsolutePartitionConstraint allCluster = ClusterStateManager.INSTANCE.getClusterLocations();
Set<String> nodes = new TreeSet<>();
for (String node : allCluster.getLocations()) {
nodes.add(node);
}
AlgebricksAbsolutePartitionConstraint locations = new AlgebricksAbsolutePartitionConstraint(nodes.toArray(new String[nodes.size()]));
FileSplit[] feedLogFileSplits = FeedUtils.splitsForAdapter(feed.getDataverseName(), feed.getFeedName(), locations);
org.apache.hyracks.algebricks.common.utils.Pair<IFileSplitProvider, AlgebricksPartitionConstraint> spC = StoragePathUtil.splitProviderAndPartitionConstraints(feedLogFileSplits);
FileRemoveOperatorDescriptor frod = new FileRemoveOperatorDescriptor(spec, spC.first, true);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, frod, spC.second);
spec.addRoot(frod);
return spec;
}
use of org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint in project asterixdb by apache.
the class FeedDataSource method buildDatasourceScanRuntime.
@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildDatasourceScanRuntime(MetadataProvider metadataProvider, IDataSource<DataSourceId> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
try {
ARecordType feedOutputType = (ARecordType) itemType;
ISerializerDeserializer payloadSerde = NonTaggedDataFormat.INSTANCE.getSerdeProvider().getSerializerDeserializer(feedOutputType);
ArrayList<ISerializerDeserializer> serdes = new ArrayList<>();
serdes.add(payloadSerde);
if (metaItemType != null) {
serdes.add(SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(metaItemType));
}
if (pkTypes != null) {
for (IAType type : pkTypes) {
serdes.add(SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(type));
}
}
RecordDescriptor feedDesc = new RecordDescriptor(serdes.toArray(new ISerializerDeserializer[serdes.size()]));
FeedPolicyEntity feedPolicy = (FeedPolicyEntity) getProperties().get(BuiltinFeedPolicies.CONFIG_FEED_POLICY_KEY);
if (feedPolicy == null) {
throw new AlgebricksException("Feed not configured with a policy");
}
feedPolicy.getProperties().put(BuiltinFeedPolicies.CONFIG_FEED_POLICY_KEY, feedPolicy.getPolicyName());
FeedConnectionId feedConnectionId = new FeedConnectionId(getId().getDataverseName(), getId().getDatasourceName(), getTargetDataset());
FeedCollectOperatorDescriptor feedCollector = new FeedCollectOperatorDescriptor(jobSpec, feedConnectionId, feedOutputType, feedDesc, feedPolicy.getProperties(), getLocation());
return new Pair<>(feedCollector, new AlgebricksAbsolutePartitionConstraint(getLocations()));
} catch (Exception e) {
throw new AlgebricksException(e);
}
}
Aggregations