use of org.apache.asterix.runtime.job.listener.JobEventListenerFactory in project asterixdb by apache.
the class RebalanceUtil method populateDataToRebalanceTarget.
// Populates the data from the source dataset to the rebalance target dataset.
private static void populateDataToRebalanceTarget(Dataset source, Dataset target, MetadataProvider metadataProvider, IHyracksClientConnection hcc) throws Exception {
JobSpecification spec = new JobSpecification();
JobId jobId = JobIdFactory.generateJobId();
JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(jobId, true);
spec.setJobletEventListenerFactory(jobEventListenerFactory);
// The pipeline starter.
IOperatorDescriptor starter = DatasetUtil.createDummyKeyProviderOp(spec, source, metadataProvider);
// Creates primary index scan op.
IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, source, jobId);
// Creates secondary BTree upsert op.
IOperatorDescriptor upsertOp = createPrimaryIndexUpsertOp(spec, metadataProvider, source, target);
// The final commit operator.
IOperatorDescriptor commitOp = createUpsertCommitOp(spec, metadataProvider, jobId, target);
// Connects empty-tuple-source and scan.
spec.connect(new OneToOneConnectorDescriptor(spec), starter, 0, primaryScanOp, 0);
// Connects scan and upsert.
int numKeys = target.getPrimaryKeys().size();
int[] keys = IntStream.range(0, numKeys).toArray();
IConnectorDescriptor connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, target.getPrimaryHashFunctionFactories(metadataProvider)));
spec.connect(connectorDescriptor, primaryScanOp, 0, upsertOp, 0);
// Connects upsert and sink.
spec.connect(new OneToOneConnectorDescriptor(spec), upsertOp, 0, commitOp, 0);
// Executes the job.
JobUtils.runJob(hcc, spec, true);
}
use of org.apache.asterix.runtime.job.listener.JobEventListenerFactory in project asterixdb by apache.
the class IndexUtil method bindJobEventListener.
/**
* Binds a job event listener to the job specification.
*
* @param spec,
* the job specification.
* @param metadataProvider,
* the metadata provider.
* @return the AsterixDB job id for transaction management.
*/
public static JobId bindJobEventListener(JobSpecification spec, MetadataProvider metadataProvider) {
JobId jobId = JobIdFactory.generateJobId();
metadataProvider.setJobId(jobId);
boolean isWriteTransaction = metadataProvider.isWriteTransaction();
IJobletEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(jobId, isWriteTransaction);
spec.setJobletEventListenerFactory(jobEventListenerFactory);
return jobId;
}
use of org.apache.asterix.runtime.job.listener.JobEventListenerFactory in project asterixdb by apache.
the class APIFramework method compileQuery.
public JobSpecification compileQuery(IClusterInfoCollector clusterInfoCollector, MetadataProvider metadataProvider, Query rwQ, int varCounter, String outputDatasetName, SessionOutput output, ICompiledDmlStatement statement) throws AlgebricksException, RemoteException, ACIDException {
SessionConfig conf = output.config();
if (!conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS) && conf.is(SessionConfig.OOB_REWRITTEN_EXPR_TREE)) {
output.out().println();
printPlanPrefix(output, "Rewritten expression tree");
if (rwQ != null) {
rwQ.accept(astPrintVisitorFactory.createLangVisitor(output.out()), 0);
}
printPlanPostfix(output);
}
org.apache.asterix.common.transactions.JobId asterixJobId = JobIdFactory.generateJobId();
metadataProvider.setJobId(asterixJobId);
ILangExpressionToPlanTranslator t = translatorFactory.createExpressionToPlanTranslator(metadataProvider, varCounter);
ILogicalPlan plan;
// statement = null when it's a query
if (statement == null || statement.getKind() != Statement.Kind.LOAD) {
plan = t.translate(rwQ, outputDatasetName, statement);
} else {
plan = t.translateLoad(statement);
}
if (!conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS) && conf.is(SessionConfig.OOB_LOGICAL_PLAN)) {
output.out().println();
printPlanPrefix(output, "Logical plan");
if (rwQ != null || (statement != null && statement.getKind() == Statement.Kind.LOAD)) {
LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor(output.out());
PlanPrettyPrinter.printPlan(plan, pvisitor, 0);
}
printPlanPostfix(output);
}
CompilerProperties compilerProperties = metadataProvider.getApplicationContext().getCompilerProperties();
int frameSize = compilerProperties.getFrameSize();
Map<String, String> querySpecificConfig = metadataProvider.getConfig();
// Validates the user-overridden query parameters.
validateConfig(querySpecificConfig);
int sortFrameLimit = getFrameLimit(CompilerProperties.COMPILER_SORTMEMORY_KEY, querySpecificConfig.get(CompilerProperties.COMPILER_SORTMEMORY_KEY), compilerProperties.getSortMemorySize(), frameSize, MIN_FRAME_LIMIT_FOR_SORT);
int groupFrameLimit = getFrameLimit(CompilerProperties.COMPILER_GROUPMEMORY_KEY, querySpecificConfig.get(CompilerProperties.COMPILER_GROUPMEMORY_KEY), compilerProperties.getGroupMemorySize(), frameSize, MIN_FRAME_LIMIT_FOR_GROUP_BY);
int joinFrameLimit = getFrameLimit(CompilerProperties.COMPILER_JOINMEMORY_KEY, querySpecificConfig.get(CompilerProperties.COMPILER_JOINMEMORY_KEY), compilerProperties.getJoinMemorySize(), frameSize, MIN_FRAME_LIMIT_FOR_JOIN);
OptimizationConfUtil.getPhysicalOptimizationConfig().setFrameSize(frameSize);
OptimizationConfUtil.getPhysicalOptimizationConfig().setMaxFramesExternalSort(sortFrameLimit);
OptimizationConfUtil.getPhysicalOptimizationConfig().setMaxFramesExternalGroupBy(groupFrameLimit);
OptimizationConfUtil.getPhysicalOptimizationConfig().setMaxFramesForJoin(joinFrameLimit);
HeuristicCompilerFactoryBuilder builder = new HeuristicCompilerFactoryBuilder(OptimizationContextFactory.INSTANCE);
builder.setPhysicalOptimizationConfig(OptimizationConfUtil.getPhysicalOptimizationConfig());
builder.setLogicalRewrites(ruleSetFactory.getLogicalRewrites(metadataProvider.getApplicationContext()));
builder.setPhysicalRewrites(ruleSetFactory.getPhysicalRewrites(metadataProvider.getApplicationContext()));
IDataFormat format = metadataProvider.getFormat();
ICompilerFactory compilerFactory = builder.create();
builder.setExpressionEvalSizeComputer(format.getExpressionEvalSizeComputer());
builder.setIMergeAggregationExpressionFactory(new MergeAggregationExpressionFactory());
builder.setPartialAggregationTypeComputer(new PartialAggregationTypeComputer());
builder.setExpressionTypeComputer(ExpressionTypeComputer.INSTANCE);
builder.setMissableTypeComputer(MissableTypeComputer.INSTANCE);
builder.setConflictingTypeResolver(ConflictingTypeResolver.INSTANCE);
int parallelism = getParallelism(querySpecificConfig.get(CompilerProperties.COMPILER_PARALLELISM_KEY), compilerProperties.getParallelism());
AlgebricksAbsolutePartitionConstraint computationLocations = chooseLocations(clusterInfoCollector, parallelism, metadataProvider.getClusterLocations());
builder.setClusterLocations(computationLocations);
ICompiler compiler = compilerFactory.createCompiler(plan, metadataProvider, t.getVarCounter());
if (conf.isOptimize()) {
compiler.optimize();
if (conf.is(SessionConfig.OOB_OPTIMIZED_LOGICAL_PLAN)) {
if (conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS)) {
// For Optimizer tests.
AlgebricksAppendable buffer = new AlgebricksAppendable(output.out());
PlanPrettyPrinter.printPhysicalOps(plan, buffer, 0);
} else {
printPlanPrefix(output, "Optimized logical plan");
if (rwQ != null || (statement != null && statement.getKind() == Statement.Kind.LOAD)) {
LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor(output.out());
PlanPrettyPrinter.printPlan(plan, pvisitor, 0);
}
printPlanPostfix(output);
}
}
}
if (rwQ != null && rwQ.isExplain()) {
try {
LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();
PlanPrettyPrinter.printPlan(plan, pvisitor, 0);
ResultUtil.printResults(metadataProvider.getApplicationContext(), pvisitor.get().toString(), output, new Stats(), null);
return null;
} catch (IOException e) {
throw new AlgebricksException(e);
}
}
if (!conf.isGenerateJobSpec()) {
return null;
}
builder.setBinaryBooleanInspectorFactory(format.getBinaryBooleanInspectorFactory());
builder.setBinaryIntegerInspectorFactory(format.getBinaryIntegerInspectorFactory());
builder.setComparatorFactoryProvider(format.getBinaryComparatorFactoryProvider());
builder.setExpressionRuntimeProvider(new ExpressionRuntimeProvider(QueryLogicalExpressionJobGen.INSTANCE));
builder.setHashFunctionFactoryProvider(format.getBinaryHashFunctionFactoryProvider());
builder.setHashFunctionFamilyProvider(format.getBinaryHashFunctionFamilyProvider());
builder.setMissingWriterFactory(format.getMissingWriterFactory());
builder.setPredicateEvaluatorFactoryProvider(format.getPredicateEvaluatorFactoryProvider());
final SessionConfig.OutputFormat outputFormat = conf.fmt();
switch(outputFormat) {
case LOSSLESS_JSON:
builder.setPrinterProvider(format.getLosslessJSONPrinterFactoryProvider());
break;
case CSV:
builder.setPrinterProvider(format.getCSVPrinterFactoryProvider());
break;
case ADM:
builder.setPrinterProvider(format.getADMPrinterFactoryProvider());
break;
case CLEAN_JSON:
builder.setPrinterProvider(format.getCleanJSONPrinterFactoryProvider());
break;
default:
throw new AlgebricksException("Unexpected OutputFormat: " + outputFormat);
}
builder.setSerializerDeserializerProvider(format.getSerdeProvider());
builder.setTypeTraitProvider(format.getTypeTraitProvider());
builder.setNormalizedKeyComputerFactoryProvider(format.getNormalizedKeyComputerFactoryProvider());
JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(asterixJobId, metadataProvider.isWriteTransaction());
JobSpecification spec = compiler.createJob(metadataProvider.getApplicationContext(), jobEventListenerFactory);
// When the top-level statement is a query, the statement parameter is null.
if (statement == null) {
// Sets a required capacity, only for read-only queries.
// DDLs and DMLs are considered not that frequent.
spec.setRequiredClusterCapacity(ResourceUtils.getRequiredCompacity(plan, computationLocations, sortFrameLimit, groupFrameLimit, joinFrameLimit, frameSize));
}
if (conf.is(SessionConfig.OOB_HYRACKS_JOB)) {
printPlanPrefix(output, "Hyracks job");
if (rwQ != null) {
try {
output.out().println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(spec.toJSON()));
} catch (IOException e) {
throw new AlgebricksException(e);
}
output.out().println(spec.getUserConstraints());
}
printPlanPostfix(output);
}
return spec;
}
use of org.apache.asterix.runtime.job.listener.JobEventListenerFactory in project asterixdb by apache.
the class InvertedIndexPOperator method buildInvertedIndexRuntime.
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildInvertedIndexRuntime(MetadataProvider metadataProvider, JobGenContext context, JobSpecification jobSpec, AbstractUnnestMapOperator unnestMap, IOperatorSchema opSchema, boolean retainInput, boolean retainMissing, String datasetName, Dataset dataset, String indexName, ATypeTag searchKeyType, int[] keyFields, SearchModifierType searchModifierType, IAlgebricksConstantValue similarityThreshold, int[] minFilterFieldIndexes, int[] maxFilterFieldIndexes, boolean isFullTextSearchQuery) throws AlgebricksException {
try {
IAObject simThresh = ((AsterixConstantValue) similarityThreshold).getObject();
int numPrimaryKeys = dataset.getPrimaryKeys().size();
Index secondaryIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getDatasetName(), indexName);
if (secondaryIndex == null) {
throw new AlgebricksException("Code generation error: no index " + indexName + " for dataset " + datasetName);
}
IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(unnestMap);
RecordDescriptor outputRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
// TODO: Here we assume there is only one search key field.
int queryField = keyFields[0];
// Get tokenizer and search modifier factories.
IInvertedIndexSearchModifierFactory searchModifierFactory = InvertedIndexAccessMethod.getSearchModifierFactory(searchModifierType, simThresh, secondaryIndex);
IBinaryTokenizerFactory queryTokenizerFactory = InvertedIndexAccessMethod.getBinaryTokenizerFactory(searchModifierType, searchKeyType, secondaryIndex);
IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondarySplitsAndConstraint.first);
LSMInvertedIndexSearchOperatorDescriptor invIndexSearchOp = new LSMInvertedIndexSearchOperatorDescriptor(jobSpec, outputRecDesc, queryField, dataflowHelperFactory, queryTokenizerFactory, searchModifierFactory, retainInput, retainMissing, context.getMissingWriterFactory(), dataset.getSearchCallbackFactory(metadataProvider.getStorageComponentProvider(), secondaryIndex, ((JobEventListenerFactory) jobSpec.getJobletEventListenerFactory()).getJobId(), IndexOperation.SEARCH, null), minFilterFieldIndexes, maxFilterFieldIndexes, isFullTextSearchQuery, numPrimaryKeys, false);
return new Pair<>(invIndexSearchOp, secondarySplitsAndConstraint.second);
} catch (MetadataException e) {
throw new AlgebricksException(e);
}
}
use of org.apache.asterix.runtime.job.listener.JobEventListenerFactory in project asterixdb by apache.
the class MetadataProvider method getBTreeRuntime.
private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getBTreeRuntime(String dataverseName, String datasetName, String indexName, IOperatorSchema propagatedSchema, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields, AsterixTupleFilterFactory filterFactory, RecordDescriptor inputRecordDesc, JobGenContext context, JobSpecification spec, IndexOperation indexOp, boolean bulkload, List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys) throws AlgebricksException {
Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
boolean temp = dataset.getDatasetDetails().isTemp();
isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
int numKeys = primaryKeys.size() + secondaryKeys.size();
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
// generate field permutations
int[] fieldPermutation = new int[numKeys + numFilterFields];
int[] modificationCallbackPrimaryKeyFields = new int[primaryKeys.size()];
int i = 0;
int j = 0;
for (LogicalVariable varKey : secondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
i++;
}
for (LogicalVariable varKey : primaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
modificationCallbackPrimaryKeyFields[j] = i;
i++;
j++;
}
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
fieldPermutation[numKeys] = idx;
}
int[] prevFieldPermutation = null;
if (indexOp == IndexOperation.UPSERT) {
// generate field permutations for prev record
prevFieldPermutation = new int[numKeys + numFilterFields];
int k = 0;
for (LogicalVariable varKey : prevSecondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
prevFieldPermutation[k] = idx;
k++;
}
for (LogicalVariable varKey : primaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
prevFieldPermutation[k] = idx;
k++;
}
// Filter can only be one field!
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(prevAdditionalFilteringKeys.get(0));
prevFieldPermutation[numKeys] = idx;
}
}
try {
// Index parameters.
Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset, secondaryIndex.getIndexName());
// prepare callback
JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storaegComponentProvider, secondaryIndex, jobId, indexOp, modificationCallbackPrimaryKeyFields);
IIndexDataflowHelperFactory idfh = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
IOperatorDescriptor op;
if (bulkload) {
long numElementsHint = getCardinalityPerPartitionHint(dataset);
op = new TreeIndexBulkLoadOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, false, numElementsHint, false, idfh);
} else if (indexOp == IndexOperation.UPSERT) {
op = new LSMSecondaryUpsertOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, idfh, filterFactory, modificationCallbackFactory, prevFieldPermutation);
} else {
op = new LSMTreeInsertDeleteOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, indexOp, idfh, filterFactory, false, modificationCallbackFactory);
}
return new Pair<>(op, splitsAndConstraint.second);
} catch (Exception e) {
throw new AlgebricksException(e);
}
}
Aggregations