use of org.apache.asterix.metadata.entities.ExternalDatasetDetails in project asterixdb by apache.
the class DatasetTupleTranslator method createDatasetFromARecord.
protected Dataset createDatasetFromARecord(ARecord datasetRecord) throws HyracksDataException {
String dataverseName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATAVERSENAME_FIELD_INDEX)).getStringValue();
String datasetName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATASETNAME_FIELD_INDEX)).getStringValue();
String typeName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATATYPENAME_FIELD_INDEX)).getStringValue();
String typeDataverseName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATATYPEDATAVERSENAME_FIELD_INDEX)).getStringValue();
DatasetType datasetType = DatasetType.valueOf(((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATASETTYPE_FIELD_INDEX)).getStringValue());
IDatasetDetails datasetDetails = null;
int datasetId = ((AInt32) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATASETID_FIELD_INDEX)).getIntegerValue();
int pendingOp = ((AInt32) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_PENDINGOP_FIELD_INDEX)).getIntegerValue();
String nodeGroupName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_GROUPNAME_FIELD_INDEX)).getStringValue();
String compactionPolicy = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_COMPACTION_POLICY_FIELD_INDEX)).getStringValue();
IACursor cursor = ((AOrderedList) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_COMPACTION_POLICY_PROPERTIES_FIELD_INDEX)).getCursor();
Map<String, String> compactionPolicyProperties = new LinkedHashMap<>();
String key;
String value;
while (cursor.next()) {
ARecord field = (ARecord) cursor.get();
key = ((AString) field.getValueByPos(MetadataRecordTypes.PROPERTIES_NAME_FIELD_INDEX)).getStringValue();
value = ((AString) field.getValueByPos(MetadataRecordTypes.PROPERTIES_VALUE_FIELD_INDEX)).getStringValue();
compactionPolicyProperties.put(key, value);
}
switch(datasetType) {
case INTERNAL:
{
ARecord datasetDetailsRecord = (ARecord) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_INTERNALDETAILS_FIELD_INDEX);
FileStructure fileStructure = FileStructure.valueOf(((AString) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.INTERNAL_DETAILS_ARECORD_FILESTRUCTURE_FIELD_INDEX)).getStringValue());
PartitioningStrategy partitioningStrategy = PartitioningStrategy.valueOf(((AString) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.INTERNAL_DETAILS_ARECORD_PARTITIONSTRATEGY_FIELD_INDEX)).getStringValue());
cursor = ((AOrderedList) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.INTERNAL_DETAILS_ARECORD_PARTITIONKEY_FIELD_INDEX)).getCursor();
List<List<String>> partitioningKey = new ArrayList<>();
List<IAType> partitioningKeyType = new ArrayList<>();
AOrderedList fieldNameList;
while (cursor.next()) {
fieldNameList = (AOrderedList) cursor.get();
IACursor nestedFieldNameCursor = (fieldNameList.getCursor());
List<String> nestedFieldName = new ArrayList<>();
while (nestedFieldNameCursor.next()) {
nestedFieldName.add(((AString) nestedFieldNameCursor.get()).getStringValue());
}
partitioningKey.add(nestedFieldName);
partitioningKeyType.add(BuiltinType.ASTRING);
}
boolean autogenerated = ((ABoolean) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.INTERNAL_DETAILS_ARECORD_AUTOGENERATED_FIELD_INDEX)).getBoolean();
// Check if there is a filter field.
List<String> filterField = null;
int filterFieldPos = datasetDetailsRecord.getType().getFieldIndex(InternalDatasetDetails.FILTER_FIELD_NAME);
if (filterFieldPos >= 0) {
filterField = new ArrayList<>();
cursor = ((AOrderedList) datasetDetailsRecord.getValueByPos(filterFieldPos)).getCursor();
while (cursor.next()) {
filterField.add(((AString) cursor.get()).getStringValue());
}
}
// Read a field-source-indicator field.
List<Integer> keyFieldSourceIndicator = new ArrayList<>();
int keyFieldSourceIndicatorIndex = datasetDetailsRecord.getType().getFieldIndex(InternalDatasetDetails.KEY_FILD_SOURCE_INDICATOR_FIELD_NAME);
if (keyFieldSourceIndicatorIndex >= 0) {
cursor = ((AOrderedList) datasetDetailsRecord.getValueByPos(keyFieldSourceIndicatorIndex)).getCursor();
while (cursor.next()) {
keyFieldSourceIndicator.add((int) ((AInt8) cursor.get()).getByteValue());
}
} else {
for (int index = 0; index < partitioningKey.size(); ++index) {
keyFieldSourceIndicator.add(0);
}
}
// Temporary dataset only lives in the compiler therefore the temp field is false.
// DatasetTupleTranslator always read from the metadata node, so the temp flag should be always false.
datasetDetails = new InternalDatasetDetails(fileStructure, partitioningStrategy, partitioningKey, partitioningKey, keyFieldSourceIndicator, partitioningKeyType, autogenerated, filterField, false);
break;
}
case EXTERNAL:
ARecord datasetDetailsRecord = (ARecord) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_EXTERNALDETAILS_FIELD_INDEX);
String adapter = ((AString) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_DATASOURCE_ADAPTER_FIELD_INDEX)).getStringValue();
cursor = ((AOrderedList) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_PROPERTIES_FIELD_INDEX)).getCursor();
Map<String, String> properties = new HashMap<>();
while (cursor.next()) {
ARecord field = (ARecord) cursor.get();
key = ((AString) field.getValueByPos(MetadataRecordTypes.PROPERTIES_NAME_FIELD_INDEX)).getStringValue();
value = ((AString) field.getValueByPos(MetadataRecordTypes.PROPERTIES_VALUE_FIELD_INDEX)).getStringValue();
properties.put(key, value);
}
// Timestamp
Date timestamp = new Date((((ADateTime) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_LAST_REFRESH_TIME_FIELD_INDEX))).getChrononTime());
// State
TransactionState state = TransactionState.values()[((AInt32) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_TRANSACTION_STATE_FIELD_INDEX)).getIntegerValue()];
datasetDetails = new ExternalDatasetDetails(adapter, properties, timestamp, state);
}
Map<String, String> hints = getDatasetHints(datasetRecord);
String metaTypeDataverseName = null;
String metaTypeName = null;
int metaTypeDataverseNameIndex = datasetRecord.getType().getFieldIndex(MetadataRecordTypes.FIELD_NAME_METADATA_DATAVERSE);
if (metaTypeDataverseNameIndex >= 0) {
metaTypeDataverseName = ((AString) datasetRecord.getValueByPos(metaTypeDataverseNameIndex)).getStringValue();
int metaTypeNameIndex = datasetRecord.getType().getFieldIndex(MetadataRecordTypes.FIELD_NAME_METATYPE_NAME);
metaTypeName = ((AString) datasetRecord.getValueByPos(metaTypeNameIndex)).getStringValue();
}
// Read the rebalance count if there is one.
int rebalanceCountIndex = datasetRecord.getType().getFieldIndex(REBALANCE_ID_FIELD_NAME);
long rebalanceCount = rebalanceCountIndex >= 0 ? ((AInt64) datasetRecord.getValueByPos(rebalanceCountIndex)).getLongValue() : 0;
return new Dataset(dataverseName, datasetName, typeDataverseName, typeName, metaTypeDataverseName, metaTypeName, nodeGroupName, compactionPolicy, compactionPolicyProperties, datasetDetails, hints, datasetType, datasetId, pendingOp, rebalanceCount);
}
use of org.apache.asterix.metadata.entities.ExternalDatasetDetails in project asterixdb by apache.
the class DatasetDataSource method buildDatasourceScanRuntime.
@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildDatasourceScanRuntime(MetadataProvider metadataProvider, IDataSource<DataSourceId> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
switch(dataset.getDatasetType()) {
case EXTERNAL:
Dataset externalDataset = ((DatasetDataSource) dataSource).getDataset();
String itemTypeName = externalDataset.getItemTypeName();
IAType itemType = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), externalDataset.getItemTypeDataverseName(), itemTypeName).getDatatype();
ExternalDatasetDetails edd = (ExternalDatasetDetails) externalDataset.getDatasetDetails();
IAdapterFactory adapterFactory = metadataProvider.getConfiguredAdapterFactory(externalDataset, edd.getAdapter(), edd.getProperties(), (ARecordType) itemType, null);
return metadataProvider.buildExternalDatasetDataScannerRuntime(jobSpec, itemType, adapterFactory, NonTaggedDataFormat.INSTANCE);
case INTERNAL:
DataSourceId id = getId();
String dataverseName = id.getDataverseName();
String datasetName = id.getDatasourceName();
Index primaryIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, datasetName);
int[] minFilterFieldIndexes = createFilterIndexes(minFilterVars, opSchema);
int[] maxFilterFieldIndexes = createFilterIndexes(maxFilterVars, opSchema);
return metadataProvider.buildBtreeRuntime(jobSpec, opSchema, typeEnv, context, true, false, ((DatasetDataSource) dataSource).getDataset(), primaryIndex.getIndexName(), null, null, true, true, minFilterFieldIndexes, maxFilterFieldIndexes);
default:
throw new AlgebricksException("Unknown datasource type");
}
}
use of org.apache.asterix.metadata.entities.ExternalDatasetDetails in project asterixdb by apache.
the class QueryTranslator method handleCreateIndexStatement.
protected void handleCreateIndexStatement(MetadataProvider metadataProvider, Statement stmt, IHyracksClientConnection hcc) throws Exception {
ProgressState progress = ProgressState.NO_PROGRESS;
CreateIndexStatement stmtCreateIndex = (CreateIndexStatement) stmt;
String dataverseName = getActiveDataverse(stmtCreateIndex.getDataverseName());
String datasetName = stmtCreateIndex.getDatasetName().getValue();
List<Integer> keySourceIndicators = stmtCreateIndex.getFieldSourceIndicators();
MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
boolean bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
MetadataLockManager.INSTANCE.createIndexBegin(metadataProvider.getLocks(), dataverseName, dataverseName + "." + datasetName);
String indexName = null;
JobSpecification spec = null;
Dataset ds = null;
// For external datasets
List<ExternalFile> externalFilesSnapshot = null;
boolean firstExternalDatasetIndex = false;
boolean filesIndexReplicated = false;
Index filesIndex = null;
boolean datasetLocked = false;
Index index = null;
try {
ds = metadataProvider.findDataset(dataverseName, datasetName);
if (ds == null) {
throw new AlgebricksException("There is no dataset with this name " + datasetName + " in dataverse " + dataverseName);
}
indexName = stmtCreateIndex.getIndexName().getValue();
index = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
Datatype dt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getItemTypeDataverseName(), ds.getItemTypeName());
ARecordType aRecordType = (ARecordType) dt.getDatatype();
ARecordType metaRecordType = null;
if (ds.hasMetaPart()) {
Datatype metaDt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getMetaItemTypeDataverseName(), ds.getMetaItemTypeName());
metaRecordType = (ARecordType) metaDt.getDatatype();
}
List<List<String>> indexFields = new ArrayList<>();
List<IAType> indexFieldTypes = new ArrayList<>();
int keyIndex = 0;
for (Pair<List<String>, TypeExpression> fieldExpr : stmtCreateIndex.getFieldExprs()) {
IAType fieldType = null;
ARecordType subType = KeyFieldTypeUtil.chooseSource(keySourceIndicators, keyIndex, aRecordType, metaRecordType);
boolean isOpen = subType.isOpen();
int i = 0;
if (fieldExpr.first.size() > 1 && !isOpen) {
while (i < fieldExpr.first.size() - 1 && !isOpen) {
subType = (ARecordType) subType.getFieldType(fieldExpr.first.get(i));
i++;
isOpen = subType.isOpen();
}
}
if (fieldExpr.second == null) {
fieldType = subType.getSubFieldType(fieldExpr.first.subList(i, fieldExpr.first.size()));
} else {
if (!stmtCreateIndex.isEnforced()) {
throw new AlgebricksException("Cannot create typed index on \"" + fieldExpr.first + "\" field without enforcing it's type");
}
if (!isOpen) {
throw new AlgebricksException("Typed index on \"" + fieldExpr.first + "\" field could be created only for open datatype");
}
if (stmtCreateIndex.hasMetaField()) {
throw new AlgebricksException("Typed open index can only be created on the record part");
}
Map<TypeSignature, IAType> typeMap = TypeTranslator.computeTypes(mdTxnCtx, fieldExpr.second, indexName, dataverseName);
TypeSignature typeSignature = new TypeSignature(dataverseName, indexName);
fieldType = typeMap.get(typeSignature);
}
if (fieldType == null) {
throw new AlgebricksException("Unknown type " + (fieldExpr.second == null ? fieldExpr.first : fieldExpr.second));
}
indexFields.add(fieldExpr.first);
indexFieldTypes.add(fieldType);
++keyIndex;
}
ValidateUtil.validateKeyFields(aRecordType, metaRecordType, indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getIndexType());
if (index != null) {
if (stmtCreateIndex.getIfNotExists()) {
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
return;
} else {
throw new AlgebricksException("An index with this name " + indexName + " already exists.");
}
}
// error message and stop.
if (stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_NGRAM_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
List<List<String>> partitioningKeys = ds.getPrimaryKeys();
for (List<String> partitioningKey : partitioningKeys) {
IAType keyType = aRecordType.getSubFieldType(partitioningKey);
ITypeTraits typeTrait = TypeTraitProvider.INSTANCE.getTypeTrait(keyType);
// If it is not a fixed length
if (typeTrait.getFixedLength() < 0) {
throw new AlgebricksException("The keyword or ngram index -" + indexName + " cannot be created on the dataset -" + datasetName + " due to its variable-length primary key field - " + partitioningKey);
}
}
}
if (ds.getDatasetType() == DatasetType.INTERNAL) {
validateIfResourceIsActiveInFeed(ds);
} else {
// Check if the dataset is indexible
if (!ExternalIndexingOperations.isIndexible((ExternalDatasetDetails) ds.getDatasetDetails())) {
throw new AlgebricksException("dataset using " + ((ExternalDatasetDetails) ds.getDatasetDetails()).getAdapter() + " Adapter can't be indexed");
}
// Check if the name of the index is valid
if (!ExternalIndexingOperations.isValidIndexName(datasetName, indexName)) {
throw new AlgebricksException("external dataset index name is invalid");
}
// Check if the files index exist
filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
firstExternalDatasetIndex = filesIndex == null;
// Lock external dataset
ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
datasetLocked = true;
if (firstExternalDatasetIndex) {
// Verify that no one has created an index before we acquire the lock
filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
if (filesIndex != null) {
ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
firstExternalDatasetIndex = false;
ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
}
}
if (firstExternalDatasetIndex) {
// Get snapshot from External File System
externalFilesSnapshot = ExternalIndexingOperations.getSnapshotFromExternalFileSystem(ds);
// Add an entry for the files index
filesIndex = new Index(dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName), IndexType.BTREE, ExternalIndexingOperations.FILE_INDEX_FIELD_NAMES, null, ExternalIndexingOperations.FILE_INDEX_FIELD_TYPES, false, false, MetadataUtil.PENDING_ADD_OP);
MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
// Add files to the external files index
for (ExternalFile file : externalFilesSnapshot) {
MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, file);
}
// This is the first index for the external dataset, replicate the files index
spec = ExternalIndexingOperations.buildFilesIndexCreateJobSpec(ds, externalFilesSnapshot, metadataProvider);
if (spec == null) {
throw new CompilationException("Failed to create job spec for replicating Files Index For external dataset");
}
filesIndexReplicated = true;
JobUtils.runJob(hcc, spec, true);
}
}
// check whether there exists another enforced index on the same field
if (stmtCreateIndex.isEnforced()) {
List<Index> indexes = MetadataManager.INSTANCE.getDatasetIndexes(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName);
for (Index existingIndex : indexes) {
if (existingIndex.getKeyFieldNames().equals(indexFields) && !existingIndex.getKeyFieldTypes().equals(indexFieldTypes) && existingIndex.isEnforcingKeyFileds()) {
throw new CompilationException("Cannot create index " + indexName + " , enforced index " + existingIndex.getIndexName() + " on field \"" + StringUtils.join(indexFields, ',') + "\" is already defined with type \"" + existingIndex.getKeyFieldTypes() + "\"");
}
}
}
// #. add a new index with PendingAddOp
index = new Index(dataverseName, datasetName, indexName, stmtCreateIndex.getIndexType(), indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getGramLength(), stmtCreateIndex.isEnforced(), false, MetadataUtil.PENDING_ADD_OP);
MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
// #. prepare to create the index artifact in NC.
spec = IndexUtil.buildSecondaryIndexCreationJobSpec(ds, index, metadataProvider);
if (spec == null) {
throw new CompilationException("Failed to create job spec for creating index '" + stmtCreateIndex.getDatasetName() + "." + stmtCreateIndex.getIndexName() + "'");
}
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
progress = ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA;
// #. create the index artifact in NC.
JobUtils.runJob(hcc, spec, true);
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
// #. load data into the index in NC.
spec = IndexUtil.buildSecondaryIndexLoadingJobSpec(ds, index, metadataProvider);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
JobUtils.runJob(hcc, spec, true);
// #. begin new metadataTxn
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
// #. add another new index with PendingNoOp after deleting the index with PendingAddOp
MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
index.setPendingOp(MetadataUtil.PENDING_NO_OP);
MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
// PendingAddOp
if (firstExternalDatasetIndex) {
MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, filesIndex.getIndexName());
filesIndex.setPendingOp(MetadataUtil.PENDING_NO_OP);
MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
// update transaction timestamp
((ExternalDatasetDetails) ds.getDatasetDetails()).setRefreshTimestamp(new Date());
MetadataManager.INSTANCE.updateDataset(mdTxnCtx, ds);
}
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e) {
if (bActiveTxn) {
abort(e, e, mdTxnCtx);
}
// If files index was replicated for external dataset, it should be cleaned up on NC side
if (filesIndexReplicated) {
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
try {
JobSpecification jobSpec = ExternalIndexingOperations.buildDropFilesIndexJobSpec(metadataProvider, ds);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
JobUtils.runJob(hcc, jobSpec, true);
} catch (Exception e2) {
e.addSuppressed(e2);
if (bActiveTxn) {
abort(e, e2, mdTxnCtx);
}
}
}
if (progress == ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA) {
// #. execute compensation operations
// remove the index in NC
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
JobSpecification jobSpec = IndexUtil.buildDropIndexJobSpec(index, metadataProvider, ds);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
JobUtils.runJob(hcc, jobSpec, true);
} catch (Exception e2) {
e.addSuppressed(e2);
if (bActiveTxn) {
abort(e, e2, mdTxnCtx);
}
}
if (firstExternalDatasetIndex) {
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
// Drop External Files from metadata
MetadataManager.INSTANCE.dropDatasetExternalFiles(mdTxnCtx, ds);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e2) {
e.addSuppressed(e2);
abort(e, e2, mdTxnCtx);
throw new IllegalStateException("System is inconsistent state: pending files for(" + dataverseName + "." + datasetName + ") couldn't be removed from the metadata", e);
}
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
// Drop the files index from metadata
MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e2) {
e.addSuppressed(e2);
abort(e, e2, mdTxnCtx);
throw new IllegalStateException("System is inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + IndexingConstants.getFilesIndexName(datasetName) + ") couldn't be removed from the metadata", e);
}
}
// remove the record from the metadata.
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e2) {
e.addSuppressed(e2);
abort(e, e2, mdTxnCtx);
throw new IllegalStateException("System is in inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + indexName + ") couldn't be removed from the metadata", e);
}
}
throw e;
} finally {
metadataProvider.getLocks().unlock();
if (datasetLocked) {
ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
}
}
}
use of org.apache.asterix.metadata.entities.ExternalDatasetDetails in project asterixdb by apache.
the class QueryTranslator method handleCreateDatasetStatement.
public void handleCreateDatasetStatement(MetadataProvider metadataProvider, Statement stmt, IHyracksClientConnection hcc) throws CompilationException, Exception {
MutableObject<ProgressState> progress = new MutableObject<>(ProgressState.NO_PROGRESS);
DatasetDecl dd = (DatasetDecl) stmt;
String dataverseName = getActiveDataverse(dd.getDataverse());
String datasetName = dd.getName().getValue();
DatasetType dsType = dd.getDatasetType();
String itemTypeDataverseName = getActiveDataverse(dd.getItemTypeDataverse());
String itemTypeName = dd.getItemTypeName().getValue();
String metaItemTypeDataverseName = getActiveDataverse(dd.getMetaItemTypeDataverse());
String metaItemTypeName = dd.getMetaItemTypeName().getValue();
Identifier ngNameId = dd.getNodegroupName();
String nodegroupName = ngNameId == null ? null : ngNameId.getValue();
String compactionPolicy = dd.getCompactionPolicy();
Map<String, String> compactionPolicyProperties = dd.getCompactionPolicyProperties();
boolean defaultCompactionPolicy = compactionPolicy == null;
boolean temp = dd.getDatasetDetailsDecl().isTemp();
MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
boolean bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
MetadataLockManager.INSTANCE.createDatasetBegin(metadataProvider.getLocks(), dataverseName, itemTypeDataverseName, itemTypeDataverseName + "." + itemTypeName, metaItemTypeDataverseName, metaItemTypeDataverseName + "." + metaItemTypeName, nodegroupName, compactionPolicy, dataverseName + "." + datasetName, defaultCompactionPolicy);
Dataset dataset = null;
try {
IDatasetDetails datasetDetails = null;
Dataset ds = metadataProvider.findDataset(dataverseName, datasetName);
if (ds != null) {
if (dd.getIfNotExists()) {
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
return;
} else {
throw new AlgebricksException("A dataset with this name " + datasetName + " already exists.");
}
}
Datatype dt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), itemTypeDataverseName, itemTypeName);
if (dt == null) {
throw new AlgebricksException(": type " + itemTypeName + " could not be found.");
}
String ngName = ngNameId != null ? ngNameId.getValue() : configureNodegroupForDataset(appCtx, dd.getHints(), dataverseName, datasetName, metadataProvider);
if (compactionPolicy == null) {
compactionPolicy = GlobalConfig.DEFAULT_COMPACTION_POLICY_NAME;
compactionPolicyProperties = GlobalConfig.DEFAULT_COMPACTION_POLICY_PROPERTIES;
} else {
validateCompactionPolicy(compactionPolicy, compactionPolicyProperties, mdTxnCtx, false);
}
switch(dd.getDatasetType()) {
case INTERNAL:
IAType itemType = dt.getDatatype();
if (itemType.getTypeTag() != ATypeTag.OBJECT) {
throw new AlgebricksException("Dataset type has to be a record type.");
}
IAType metaItemType = null;
if (metaItemTypeDataverseName != null && metaItemTypeName != null) {
metaItemType = metadataProvider.findType(metaItemTypeDataverseName, metaItemTypeName);
}
if (metaItemType != null && metaItemType.getTypeTag() != ATypeTag.OBJECT) {
throw new AlgebricksException("Dataset meta type has to be a record type.");
}
ARecordType metaRecType = (ARecordType) metaItemType;
List<List<String>> partitioningExprs = ((InternalDetailsDecl) dd.getDatasetDetailsDecl()).getPartitioningExprs();
List<Integer> keySourceIndicators = ((InternalDetailsDecl) dd.getDatasetDetailsDecl()).getKeySourceIndicators();
boolean autogenerated = ((InternalDetailsDecl) dd.getDatasetDetailsDecl()).isAutogenerated();
ARecordType aRecordType = (ARecordType) itemType;
List<IAType> partitioningTypes = ValidateUtil.validatePartitioningExpressions(aRecordType, metaRecType, partitioningExprs, keySourceIndicators, autogenerated);
List<String> filterField = ((InternalDetailsDecl) dd.getDatasetDetailsDecl()).getFilterField();
if (filterField != null) {
ValidateUtil.validateFilterField(aRecordType, filterField);
}
if (compactionPolicy == null && filterField != null) {
// If the dataset has a filter and the user didn't specify a merge
// policy, then we will pick the
// correlated-prefix as the default merge policy.
compactionPolicy = GlobalConfig.DEFAULT_FILTERED_DATASET_COMPACTION_POLICY_NAME;
compactionPolicyProperties = GlobalConfig.DEFAULT_COMPACTION_POLICY_PROPERTIES;
}
datasetDetails = new InternalDatasetDetails(InternalDatasetDetails.FileStructure.BTREE, InternalDatasetDetails.PartitioningStrategy.HASH, partitioningExprs, partitioningExprs, keySourceIndicators, partitioningTypes, autogenerated, filterField, temp);
break;
case EXTERNAL:
String adapter = ((ExternalDetailsDecl) dd.getDatasetDetailsDecl()).getAdapter();
Map<String, String> properties = ((ExternalDetailsDecl) dd.getDatasetDetailsDecl()).getProperties();
datasetDetails = new ExternalDatasetDetails(adapter, properties, new Date(), TransactionState.COMMIT);
break;
default:
throw new CompilationException("Unknown datatype " + dd.getDatasetType());
}
// #. initialize DatasetIdFactory if it is not initialized.
if (!DatasetIdFactory.isInitialized()) {
DatasetIdFactory.initialize(MetadataManager.INSTANCE.getMostRecentDatasetId());
}
// #. add a new dataset with PendingAddOp
dataset = new Dataset(dataverseName, datasetName, itemTypeDataverseName, itemTypeName, metaItemTypeDataverseName, metaItemTypeName, ngName, compactionPolicy, compactionPolicyProperties, datasetDetails, dd.getHints(), dsType, DatasetIdFactory.generateDatasetId(), MetadataUtil.PENDING_ADD_OP);
MetadataManager.INSTANCE.addDataset(metadataProvider.getMetadataTxnContext(), dataset);
if (dd.getDatasetType() == DatasetType.INTERNAL) {
JobSpecification jobSpec = DatasetUtil.createDatasetJobSpec(dataset, metadataProvider);
// #. make metadataTxn commit before calling runJob.
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
progress.setValue(ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA);
// #. runJob
JobUtils.runJob(hcc, jobSpec, true);
// #. begin new metadataTxn
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
}
// #. add a new dataset with PendingNoOp after deleting the dataset with PendingAddOp
MetadataManager.INSTANCE.dropDataset(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName);
dataset.setPendingOp(MetadataUtil.PENDING_NO_OP);
MetadataManager.INSTANCE.addDataset(metadataProvider.getMetadataTxnContext(), dataset);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e) {
if (bActiveTxn) {
abort(e, e, mdTxnCtx);
}
if (progress.getValue() == ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA) {
// #. execute compensation operations
// remove the index in NC
// [Notice]
// As long as we updated(and committed) metadata, we should remove any effect of the job
// because an exception occurs during runJob.
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
JobSpecification jobSpec = DatasetUtil.dropDatasetJobSpec(dataset, metadataProvider);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
JobUtils.runJob(hcc, jobSpec, true);
} catch (Exception e2) {
e.addSuppressed(e2);
if (bActiveTxn) {
abort(e, e2, mdTxnCtx);
}
}
// remove the record from the metadata.
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
MetadataManager.INSTANCE.dropDataset(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e2) {
e.addSuppressed(e2);
abort(e, e2, mdTxnCtx);
throw new IllegalStateException("System is inconsistent state: pending dataset(" + dataverseName + "." + datasetName + ") couldn't be removed from the metadata", e);
}
}
throw e;
} finally {
metadataProvider.getLocks().unlock();
}
}
use of org.apache.asterix.metadata.entities.ExternalDatasetDetails in project asterixdb by apache.
the class ExternalIndexingOperations method getIndexingOperator.
/**
* This method create an indexing operator that index records in HDFS
*
* @param jobSpec
* @param itemType
* @param dataset
* @param files
* @param indexerDesc
* @return
* @throws AlgebricksException
* @throws HyracksDataException
* @throws Exception
*/
private static Pair<ExternalScanOperatorDescriptor, AlgebricksPartitionConstraint> getIndexingOperator(MetadataProvider metadataProvider, JobSpecification jobSpec, IAType itemType, Dataset dataset, List<ExternalFile> files, RecordDescriptor indexerDesc) throws HyracksDataException, AlgebricksException {
ExternalDatasetDetails externalDatasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
Map<String, String> configuration = externalDatasetDetails.getProperties();
IAdapterFactory adapterFactory = AdapterFactoryProvider.getIndexingAdapterFactory(metadataProvider.getApplicationContext().getServiceContext(), externalDatasetDetails.getAdapter(), configuration, (ARecordType) itemType, files, true, null);
return new Pair<>(new ExternalScanOperatorDescriptor(jobSpec, indexerDesc, adapterFactory), adapterFactory.getPartitionConstraint());
}
Aggregations