use of org.apache.asterix.om.types.ARecordType in project asterixdb by apache.
the class DatasetUtil method createPrimaryIndexUpsertOp.
/**
* Creates a primary index upsert operator for a given dataset.
*
* @param spec,
* the job specification.
* @param metadataProvider,
* the metadata provider.
* @param dataset,
* the dataset to upsert.
* @param inputRecordDesc,the
* record descriptor for an input tuple.
* @param fieldPermutation,
* the field permutation according to the input.
* @param missingWriterFactory,
* the factory for customizing missing value serialization.
* @return a primary index scan operator and its location constraints.
* @throws AlgebricksException
*/
public static Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> createPrimaryIndexUpsertOp(JobSpecification spec, MetadataProvider metadataProvider, Dataset dataset, RecordDescriptor inputRecordDesc, int[] fieldPermutation, IMissingWriterFactory missingWriterFactory) throws AlgebricksException {
int numKeys = dataset.getPrimaryKeys().size();
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
ARecordType itemType = (ARecordType) metadataProvider.findType(dataset);
ARecordType metaItemType = (ARecordType) metadataProvider.findMetaType(dataset);
try {
Index primaryIndex = metadataProvider.getIndex(dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
// prepare callback
JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
int[] primaryKeyFields = new int[numKeys];
for (int i = 0; i < numKeys; i++) {
primaryKeyFields[i] = i;
}
boolean hasSecondaries = metadataProvider.getDatasetIndexes(dataset.getDataverseName(), dataset.getDatasetName()).size() > 1;
IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storageComponentProvider, primaryIndex, jobId, IndexOperation.UPSERT, primaryKeyFields);
ISearchOperationCallbackFactory searchCallbackFactory = dataset.getSearchCallbackFactory(storageComponentProvider, primaryIndex, jobId, IndexOperation.UPSERT, primaryKeyFields);
IIndexDataflowHelperFactory idfh = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), splitsAndConstraint.first);
LSMPrimaryUpsertOperatorDescriptor op;
ITypeTraits[] outputTypeTraits = new ITypeTraits[inputRecordDesc.getFieldCount() + (dataset.hasMetaPart() ? 2 : 1) + numFilterFields];
ISerializerDeserializer<?>[] outputSerDes = new ISerializerDeserializer[inputRecordDesc.getFieldCount() + (dataset.hasMetaPart() ? 2 : 1) + numFilterFields];
// add the previous record first
int f = 0;
outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(itemType);
f++;
// add the previous meta second
if (dataset.hasMetaPart()) {
outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(metaItemType);
outputTypeTraits[f] = FormatUtils.getDefaultFormat().getTypeTraitProvider().getTypeTrait(metaItemType);
f++;
}
// add the previous filter third
int fieldIdx = -1;
if (numFilterFields > 0) {
String filterField = DatasetUtil.getFilterField(dataset).get(0);
String[] fieldNames = itemType.getFieldNames();
int i = 0;
for (; i < fieldNames.length; i++) {
if (fieldNames[i].equals(filterField)) {
break;
}
}
fieldIdx = i;
outputTypeTraits[f] = FormatUtils.getDefaultFormat().getTypeTraitProvider().getTypeTrait(itemType.getFieldTypes()[fieldIdx]);
outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(itemType.getFieldTypes()[fieldIdx]);
f++;
}
for (int j = 0; j < inputRecordDesc.getFieldCount(); j++) {
outputTypeTraits[j + f] = inputRecordDesc.getTypeTraits()[j];
outputSerDes[j + f] = inputRecordDesc.getFields()[j];
}
RecordDescriptor outputRecordDesc = new RecordDescriptor(outputSerDes, outputTypeTraits);
op = new LSMPrimaryUpsertOperatorDescriptor(spec, outputRecordDesc, fieldPermutation, idfh, missingWriterFactory, modificationCallbackFactory, searchCallbackFactory, dataset.getFrameOpCallbackFactory(), numKeys, itemType, fieldIdx, hasSecondaries);
return new Pair<>(op, splitsAndConstraint.second);
} catch (MetadataException me) {
throw new AlgebricksException(me);
}
}
use of org.apache.asterix.om.types.ARecordType in project asterixdb by apache.
the class InvertedIndexResourceFactoryProvider method getTokenComparatorFactories.
private static IBinaryComparatorFactory[] getTokenComparatorFactories(Dataset dataset, Index index, ARecordType recordType, ARecordType metaType) throws AlgebricksException {
int numPrimaryKeys = dataset.getPrimaryKeys().size();
int numSecondaryKeys = index.getKeyFieldNames().size();
IndexType indexType = index.getIndexType();
// Sanity checks.
if (numPrimaryKeys > 1) {
throw new CompilationException(ErrorCode.COMPILATION_ILLEGAL_INDEX_FOR_DATASET_WITH_COMPOSITE_PRIMARY_INDEX, indexType, RecordUtil.toFullyQualifiedName(dataset.getDataverseName(), dataset.getDatasetName()));
}
if (numSecondaryKeys > 1) {
throw new CompilationException(ErrorCode.COMPILATION_ILLEGAL_INDEX_NUM_OF_FIELD, numSecondaryKeys, indexType, 1);
}
boolean isPartitioned = indexType == IndexType.LENGTH_PARTITIONED_WORD_INVIX || indexType == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX;
List<Integer> keySourceIndicators = index.getKeyFieldSourceIndicators();
ARecordType sourceType;
if (keySourceIndicators == null || keySourceIndicators.get(0) == 0) {
sourceType = recordType;
} else {
sourceType = metaType;
}
Pair<IAType, Boolean> keyTypePair = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(0), index.getKeyFieldNames().get(0), sourceType);
IAType secondaryKeyType = keyTypePair.first;
// Comparators and type traits for tokens.
int numTokenFields = (!isPartitioned) ? numSecondaryKeys : numSecondaryKeys + 1;
IBinaryComparatorFactory[] tokenComparatorFactories = new IBinaryComparatorFactory[numTokenFields];
tokenComparatorFactories[0] = NonTaggedFormatUtil.getTokenBinaryComparatorFactory(secondaryKeyType);
if (isPartitioned) {
// The partitioning field is hardcoded to be a short *without* an Asterix type tag.
tokenComparatorFactories[1] = PointableBinaryComparatorFactory.of(ShortPointable.FACTORY);
}
return tokenComparatorFactories;
}
use of org.apache.asterix.om.types.ARecordType in project asterixdb by apache.
the class FeedMetadataUtil method getPrimaryFeedFactoryAndOutput.
@SuppressWarnings("rawtypes")
public static Triple<IAdapterFactory, RecordDescriptor, AdapterType> getPrimaryFeedFactoryAndOutput(Feed feed, FeedPolicyAccessor policyAccessor, MetadataTransactionContext mdTxnCtx, ICcApplicationContext appCtx) throws AlgebricksException {
// This method needs to be re-visited
String adapterName = null;
DatasourceAdapter adapterEntity = null;
String adapterFactoryClassname = null;
IAdapterFactory adapterFactory = null;
ARecordType adapterOutputType = null;
ARecordType metaType = null;
Triple<IAdapterFactory, RecordDescriptor, IDataSourceAdapter.AdapterType> feedProps = null;
IDataSourceAdapter.AdapterType adapterType = null;
try {
adapterName = feed.getAdapterName();
Map<String, String> configuration = feed.getAdapterConfiguration();
configuration.putAll(policyAccessor.getFeedPolicy());
adapterOutputType = getOutputType(feed, configuration, ExternalDataConstants.KEY_TYPE_NAME);
metaType = getOutputType(feed, configuration, ExternalDataConstants.KEY_META_TYPE_NAME);
ExternalDataUtils.prepareFeed(configuration, feed.getDataverseName(), feed.getFeedName());
// Get adapter from metadata dataset <Metadata dataverse>
adapterEntity = MetadataManager.INSTANCE.getAdapter(mdTxnCtx, MetadataConstants.METADATA_DATAVERSE_NAME, adapterName);
// Get adapter from metadata dataset <The feed dataverse>
if (adapterEntity == null) {
adapterEntity = MetadataManager.INSTANCE.getAdapter(mdTxnCtx, feed.getDataverseName(), adapterName);
}
if (adapterEntity != null) {
adapterType = adapterEntity.getType();
adapterFactoryClassname = adapterEntity.getClassname();
switch(adapterType) {
case INTERNAL:
adapterFactory = (IAdapterFactory) Class.forName(adapterFactoryClassname).newInstance();
break;
case EXTERNAL:
String[] anameComponents = adapterName.split("#");
String libraryName = anameComponents[0];
ClassLoader cl = appCtx.getLibraryManager().getLibraryClassLoader(feed.getDataverseName(), libraryName);
adapterFactory = (IAdapterFactory) cl.loadClass(adapterFactoryClassname).newInstance();
break;
default:
throw new AsterixException("Unknown Adapter type " + adapterType);
}
adapterFactory.setOutputType(adapterOutputType);
adapterFactory.setMetaType(metaType);
adapterFactory.configure(appCtx.getServiceContext(), configuration);
} else {
adapterFactory = AdapterFactoryProvider.getAdapterFactory(appCtx.getServiceContext(), adapterName, configuration, adapterOutputType, metaType);
adapterType = IDataSourceAdapter.AdapterType.INTERNAL;
}
if (metaType == null) {
metaType = getOutputType(feed, configuration, ExternalDataConstants.KEY_META_TYPE_NAME);
}
if (adapterOutputType == null) {
if (!configuration.containsKey(ExternalDataConstants.KEY_TYPE_NAME)) {
throw new AsterixException("Unspecified feed output data type");
}
adapterOutputType = getOutputType(feed, configuration, ExternalDataConstants.KEY_TYPE_NAME);
}
int numOfOutputs = 1;
if (metaType != null) {
numOfOutputs++;
}
if (ExternalDataUtils.isChangeFeed(configuration)) {
// get number of PKs
numOfOutputs += ExternalDataUtils.getNumberOfKeys(configuration);
}
ISerializerDeserializer[] serdes = new ISerializerDeserializer[numOfOutputs];
int i = 0;
serdes[i++] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(adapterOutputType);
if (metaType != null) {
serdes[i++] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(metaType);
}
if (ExternalDataUtils.isChangeFeed(configuration)) {
getSerdesForPKs(serdes, configuration, metaType, adapterOutputType, i);
}
feedProps = new Triple<>(adapterFactory, new RecordDescriptor(serdes), adapterType);
} catch (Exception e) {
throw new AlgebricksException("unable to create adapter", e);
}
return feedProps;
}
use of org.apache.asterix.om.types.ARecordType in project asterixdb by apache.
the class SecondaryBTreeOperationsHelper method setSecondaryRecDescAndComparators.
@Override
@SuppressWarnings("rawtypes")
protected void setSecondaryRecDescAndComparators() throws AlgebricksException {
int numSecondaryKeys = index.getKeyFieldNames().size();
secondaryFieldAccessEvalFactories = new IScalarEvaluatorFactory[numSecondaryKeys + numFilterFields];
secondaryComparatorFactories = new IBinaryComparatorFactory[numSecondaryKeys + numPrimaryKeys];
secondaryBloomFilterKeyFields = new int[numSecondaryKeys];
ISerializerDeserializer[] secondaryRecFields = new ISerializerDeserializer[numPrimaryKeys + numSecondaryKeys + numFilterFields];
ISerializerDeserializer[] enforcedRecFields = new ISerializerDeserializer[1 + numPrimaryKeys + (dataset.hasMetaPart() ? 1 : 0) + numFilterFields];
ITypeTraits[] enforcedTypeTraits = new ITypeTraits[1 + numPrimaryKeys + (dataset.hasMetaPart() ? 1 : 0) + numFilterFields];
secondaryTypeTraits = new ITypeTraits[numSecondaryKeys + numPrimaryKeys];
ISerializerDeserializerProvider serdeProvider = metadataProvider.getFormat().getSerdeProvider();
ITypeTraitProvider typeTraitProvider = metadataProvider.getFormat().getTypeTraitProvider();
IBinaryComparatorFactoryProvider comparatorFactoryProvider = metadataProvider.getFormat().getBinaryComparatorFactoryProvider();
// Record column is 0 for external datasets, numPrimaryKeys for internal ones
int recordColumn = dataset.getDatasetType() == DatasetType.INTERNAL ? numPrimaryKeys : 0;
boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
for (int i = 0; i < numSecondaryKeys; i++) {
ARecordType sourceType;
int sourceColumn;
List<Integer> keySourceIndicators = index.getKeyFieldSourceIndicators();
if (keySourceIndicators == null || keySourceIndicators.get(i) == 0) {
sourceType = itemType;
sourceColumn = recordColumn;
} else {
sourceType = metaType;
sourceColumn = recordColumn + 1;
}
secondaryFieldAccessEvalFactories[i] = metadataProvider.getFormat().getFieldAccessEvaluatorFactory(isEnforcingKeyTypes ? enforcedItemType : sourceType, index.getKeyFieldNames().get(i), sourceColumn);
Pair<IAType, Boolean> keyTypePair = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(i), index.getKeyFieldNames().get(i), sourceType);
IAType keyType = keyTypePair.first;
anySecondaryKeyIsNullable = anySecondaryKeyIsNullable || keyTypePair.second;
ISerializerDeserializer keySerde = serdeProvider.getSerializerDeserializer(keyType);
secondaryRecFields[i] = keySerde;
secondaryComparatorFactories[i] = comparatorFactoryProvider.getBinaryComparatorFactory(keyType, true);
secondaryTypeTraits[i] = typeTraitProvider.getTypeTrait(keyType);
secondaryBloomFilterKeyFields[i] = i;
}
if (dataset.getDatasetType() == DatasetType.INTERNAL) {
// Add serializers and comparators for primary index fields.
for (int i = 0; i < numPrimaryKeys; i++) {
secondaryRecFields[numSecondaryKeys + i] = primaryRecDesc.getFields()[i];
enforcedRecFields[i] = primaryRecDesc.getFields()[i];
secondaryTypeTraits[numSecondaryKeys + i] = primaryRecDesc.getTypeTraits()[i];
enforcedTypeTraits[i] = primaryRecDesc.getTypeTraits()[i];
secondaryComparatorFactories[numSecondaryKeys + i] = primaryComparatorFactories[i];
}
} else {
// Add serializers and comparators for RID fields.
for (int i = 0; i < numPrimaryKeys; i++) {
secondaryRecFields[numSecondaryKeys + i] = IndexingConstants.getSerializerDeserializer(i);
enforcedRecFields[i] = IndexingConstants.getSerializerDeserializer(i);
secondaryTypeTraits[numSecondaryKeys + i] = IndexingConstants.getTypeTraits(i);
enforcedTypeTraits[i] = IndexingConstants.getTypeTraits(i);
secondaryComparatorFactories[numSecondaryKeys + i] = IndexingConstants.getComparatorFactory(i);
}
}
enforcedRecFields[numPrimaryKeys] = serdeProvider.getSerializerDeserializer(itemType);
enforcedTypeTraits[numPrimaryKeys] = typeTraitProvider.getTypeTrait(itemType);
if (dataset.hasMetaPart()) {
enforcedRecFields[numPrimaryKeys + 1] = serdeProvider.getSerializerDeserializer(metaType);
enforcedTypeTraits[numPrimaryKeys + 1] = typeTraitProvider.getTypeTrait(metaType);
}
if (numFilterFields > 0) {
secondaryFieldAccessEvalFactories[numSecondaryKeys] = metadataProvider.getFormat().getFieldAccessEvaluatorFactory(itemType, filterFieldName, numPrimaryKeys);
Pair<IAType, Boolean> keyTypePair = Index.getNonNullableKeyFieldType(filterFieldName, itemType);
IAType type = keyTypePair.first;
ISerializerDeserializer serde = serdeProvider.getSerializerDeserializer(type);
secondaryRecFields[numPrimaryKeys + numSecondaryKeys] = serde;
enforcedRecFields[numPrimaryKeys + 1 + (dataset.hasMetaPart() ? 1 : 0)] = serde;
enforcedTypeTraits[numPrimaryKeys + 1 + (dataset.hasMetaPart() ? 1 : 0)] = typeTraitProvider.getTypeTrait(type);
}
secondaryRecDesc = new RecordDescriptor(secondaryRecFields, secondaryTypeTraits);
enforcedRecDesc = new RecordDescriptor(enforcedRecFields, enforcedTypeTraits);
}
use of org.apache.asterix.om.types.ARecordType in project asterixdb by apache.
the class DatasetUtil method createDatasetJobSpec.
public static JobSpecification createDatasetJobSpec(Dataset dataset, MetadataProvider metadataProvider) throws AlgebricksException {
Index index = IndexUtil.getPrimaryIndex(dataset);
ARecordType itemType = (ARecordType) metadataProvider.findType(dataset);
// get meta item type
ARecordType metaItemType = null;
if (dataset.hasMetaPart()) {
metaItemType = (ARecordType) metadataProvider.findMetaType(dataset);
}
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
FileSplit[] fs = splitsAndConstraint.first.getFileSplits();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < fs.length; i++) {
sb.append(fs[i] + " ");
}
LOGGER.info("CREATING File Splits: " + sb.toString());
Pair<ILSMMergePolicyFactory, Map<String, String>> compactionInfo = DatasetUtil.getMergePolicyFactory(dataset, metadataProvider.getMetadataTxnContext());
//prepare a LocalResourceMetadata which will be stored in NC's local resource repository
IResourceFactory resourceFactory = dataset.getResourceFactory(metadataProvider, index, itemType, metaItemType, compactionInfo.first, compactionInfo.second);
IndexBuilderFactory indexBuilderFactory = new IndexBuilderFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first, resourceFactory, !dataset.isTemp());
IndexCreateOperatorDescriptor indexCreateOp = new IndexCreateOperatorDescriptor(spec, indexBuilderFactory);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, indexCreateOp, splitsAndConstraint.second);
spec.addRoot(indexCreateOp);
return spec;
}
Aggregations