use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.
the class LogMarkerTest method testInsertWithSnapshot.
@Test
public void testInsertWithSnapshot() {
try {
TestNodeController nc = new TestNodeController(null, false);
nc.init();
StorageComponentProvider storageManager = new StorageComponentProvider();
List<List<String>> partitioningKeys = new ArrayList<>();
partitioningKeys.add(Collections.singletonList("key"));
Dataset dataset = new Dataset(DATAVERSE_NAME, DATASET_NAME, DATAVERSE_NAME, DATA_TYPE_NAME, NODE_GROUP_NAME, null, null, new InternalDatasetDetails(null, PartitioningStrategy.HASH, partitioningKeys, null, null, null, false, null, false), null, DatasetType.INTERNAL, DATASET_ID, 0);
try {
nc.createPrimaryIndex(dataset, KEY_TYPES, RECORD_TYPE, META_TYPE, new NoMergePolicyFactory(), null, null, storageManager, KEY_INDEXES, KEY_INDICATORS_LIST);
IHyracksTaskContext ctx = nc.createTestContext(true);
nc.newJobId();
ITransactionContext txnCtx = nc.getTransactionManager().getTransactionContext(nc.getTxnJobId(), true);
LSMInsertDeleteOperatorNodePushable insertOp = nc.getInsertPipeline(ctx, dataset, KEY_TYPES, RECORD_TYPE, META_TYPE, new NoMergePolicyFactory(), null, null, KEY_INDEXES, KEY_INDICATORS_LIST, storageManager).getLeft();
insertOp.open();
TupleGenerator tupleGenerator = new TupleGenerator(RECORD_TYPE, META_TYPE, KEY_INDEXES, KEY_INDICATORS, RECORD_GEN_FUNCTION, UNIQUE_RECORD_FIELDS, META_GEN_FUNCTION, UNIQUE_META_FIELDS);
VSizeFrame frame = new VSizeFrame(ctx);
VSizeFrame marker = new VSizeFrame(ctx);
FrameTupleAppender tupleAppender = new FrameTupleAppender(frame);
long markerId = 0L;
for (int j = 0; j < NUM_OF_RECORDS; j++) {
if (j % SNAPSHOT_SIZE == 0) {
marker.reset();
marker.getBuffer().put(MessagingFrameTupleAppender.MARKER_MESSAGE);
marker.getBuffer().putLong(markerId);
marker.getBuffer().flip();
markerId++;
TaskUtil.putInSharedMap(HyracksConstants.KEY_MESSAGE, marker, ctx);
tupleAppender.flush(insertOp);
}
ITupleReference tuple = tupleGenerator.next();
DataflowUtils.addTupleToFrame(tupleAppender, tuple, insertOp);
}
if (tupleAppender.getTupleCount() > 0) {
tupleAppender.write(insertOp, true);
}
insertOp.close();
nc.getTransactionManager().completedTransaction(txnCtx, DatasetId.NULL, -1, true);
IIndexDataflowHelper dataflowHelper = nc.getPrimaryIndexDataflowHelper(dataset, KEY_TYPES, RECORD_TYPE, META_TYPE, new NoMergePolicyFactory(), null, null, storageManager, KEY_INDEXES, KEY_INDICATORS_LIST);
dataflowHelper.open();
LSMBTree btree = (LSMBTree) dataflowHelper.getIndexInstance();
LongPointable longPointable = LongPointable.FACTORY.createPointable();
ComponentMetadataUtil.get(btree, ComponentMetadataUtil.MARKER_LSN_KEY, longPointable);
long lsn = longPointable.getLong();
int numOfMarkers = 0;
LogReader logReader = (LogReader) nc.getTransactionSubsystem().getLogManager().getLogReader(false);
long expectedMarkerId = markerId - 1;
while (lsn >= 0) {
numOfMarkers++;
ILogRecord logRecord = logReader.read(lsn);
lsn = logRecord.getPreviousMarkerLSN();
long logMarkerId = logRecord.getMarker().getLong();
Assert.assertEquals(expectedMarkerId, logMarkerId);
expectedMarkerId--;
}
logReader.close();
dataflowHelper.close();
Assert.assertEquals(markerId, numOfMarkers);
nc.newJobId();
TestTupleCounterFrameWriter countOp = create(nc.getSearchOutputDesc(KEY_TYPES, RECORD_TYPE, META_TYPE), Collections.emptyList(), Collections.emptyList(), false);
IPushRuntime emptyTupleOp = nc.getFullScanPipeline(countOp, ctx, dataset, KEY_TYPES, RECORD_TYPE, META_TYPE, new NoMergePolicyFactory(), null, null, KEY_INDEXES, KEY_INDICATORS_LIST, storageManager);
emptyTupleOp.open();
emptyTupleOp.close();
Assert.assertEquals(NUM_OF_RECORDS, countOp.getCount());
} finally {
nc.deInit();
}
} catch (Throwable e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.
the class MetadataCache method cleanupTempDatasets.
/**
* Clean up temp datasets that are expired.
* The garbage collection will pause other dataset operations.
*/
public void cleanupTempDatasets() {
synchronized (datasets) {
for (Map<String, Dataset> map : datasets.values()) {
Iterator<Dataset> datasetIterator = map.values().iterator();
while (datasetIterator.hasNext()) {
Dataset dataset = datasetIterator.next();
if (dataset.getDatasetDetails().isTemp()) {
long currentTime = System.currentTimeMillis();
long duration = currentTime - dataset.getDatasetDetails().getLastAccessTime();
if (duration > TEMP_DATASET_INACTIVE_TIME_THRESHOLD) {
datasetIterator.remove();
}
}
}
}
}
}
use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.
the class MetadataCache method addDatasetIfNotExists.
public Dataset addDatasetIfNotExists(Dataset dataset) {
synchronized (datasets) {
synchronized (indexes) {
// internal dataset.
if (dataset.getDatasetType() == DatasetType.INTERNAL) {
Index index = IndexUtil.getPrimaryIndex(dataset);
addIndexIfNotExistsInternal(index);
}
Map<String, Dataset> m = datasets.get(dataset.getDataverseName());
if (m == null) {
m = new HashMap<>();
datasets.put(dataset.getDataverseName(), m);
}
if (!m.containsKey(dataset.getDatasetName())) {
return m.put(dataset.getDatasetName(), dataset);
}
return null;
}
}
}
use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.
the class MetadataManager method getDataset.
@Override
public Dataset getDataset(MetadataTransactionContext ctx, String dataverseName, String datasetName) throws MetadataException {
// First look in the context to see if this transaction created the
// requested dataset itself (but the dataset is still uncommitted).
Dataset dataset = ctx.getDataset(dataverseName, datasetName);
if (dataset != null) {
// uncommitted.
return dataset;
}
if (ctx.datasetIsDropped(dataverseName, datasetName)) {
// in the cache.
return null;
}
dataset = cache.getDataset(dataverseName, datasetName);
if (dataset != null) {
// Dataset is already in the cache, don't add it again.
return dataset;
}
try {
dataset = metadataNode.getDataset(ctx.getJobId(), dataverseName, datasetName);
} catch (RemoteException e) {
throw new MetadataException(e);
}
// when this transaction commits.
if (dataset != null) {
ctx.addDataset(dataset);
}
return dataset;
}
use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.
the class QueryTranslator method handleCreateIndexStatement.
protected void handleCreateIndexStatement(MetadataProvider metadataProvider, Statement stmt, IHyracksClientConnection hcc) throws Exception {
ProgressState progress = ProgressState.NO_PROGRESS;
CreateIndexStatement stmtCreateIndex = (CreateIndexStatement) stmt;
String dataverseName = getActiveDataverse(stmtCreateIndex.getDataverseName());
String datasetName = stmtCreateIndex.getDatasetName().getValue();
List<Integer> keySourceIndicators = stmtCreateIndex.getFieldSourceIndicators();
MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
boolean bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
MetadataLockManager.INSTANCE.createIndexBegin(metadataProvider.getLocks(), dataverseName, dataverseName + "." + datasetName);
String indexName = null;
JobSpecification spec = null;
Dataset ds = null;
// For external datasets
List<ExternalFile> externalFilesSnapshot = null;
boolean firstExternalDatasetIndex = false;
boolean filesIndexReplicated = false;
Index filesIndex = null;
boolean datasetLocked = false;
Index index = null;
try {
ds = metadataProvider.findDataset(dataverseName, datasetName);
if (ds == null) {
throw new AlgebricksException("There is no dataset with this name " + datasetName + " in dataverse " + dataverseName);
}
indexName = stmtCreateIndex.getIndexName().getValue();
index = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
Datatype dt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getItemTypeDataverseName(), ds.getItemTypeName());
ARecordType aRecordType = (ARecordType) dt.getDatatype();
ARecordType metaRecordType = null;
if (ds.hasMetaPart()) {
Datatype metaDt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getMetaItemTypeDataverseName(), ds.getMetaItemTypeName());
metaRecordType = (ARecordType) metaDt.getDatatype();
}
List<List<String>> indexFields = new ArrayList<>();
List<IAType> indexFieldTypes = new ArrayList<>();
int keyIndex = 0;
for (Pair<List<String>, TypeExpression> fieldExpr : stmtCreateIndex.getFieldExprs()) {
IAType fieldType = null;
ARecordType subType = KeyFieldTypeUtil.chooseSource(keySourceIndicators, keyIndex, aRecordType, metaRecordType);
boolean isOpen = subType.isOpen();
int i = 0;
if (fieldExpr.first.size() > 1 && !isOpen) {
while (i < fieldExpr.first.size() - 1 && !isOpen) {
subType = (ARecordType) subType.getFieldType(fieldExpr.first.get(i));
i++;
isOpen = subType.isOpen();
}
}
if (fieldExpr.second == null) {
fieldType = subType.getSubFieldType(fieldExpr.first.subList(i, fieldExpr.first.size()));
} else {
if (!stmtCreateIndex.isEnforced()) {
throw new AlgebricksException("Cannot create typed index on \"" + fieldExpr.first + "\" field without enforcing it's type");
}
if (!isOpen) {
throw new AlgebricksException("Typed index on \"" + fieldExpr.first + "\" field could be created only for open datatype");
}
if (stmtCreateIndex.hasMetaField()) {
throw new AlgebricksException("Typed open index can only be created on the record part");
}
Map<TypeSignature, IAType> typeMap = TypeTranslator.computeTypes(mdTxnCtx, fieldExpr.second, indexName, dataverseName);
TypeSignature typeSignature = new TypeSignature(dataverseName, indexName);
fieldType = typeMap.get(typeSignature);
}
if (fieldType == null) {
throw new AlgebricksException("Unknown type " + (fieldExpr.second == null ? fieldExpr.first : fieldExpr.second));
}
indexFields.add(fieldExpr.first);
indexFieldTypes.add(fieldType);
++keyIndex;
}
ValidateUtil.validateKeyFields(aRecordType, metaRecordType, indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getIndexType());
if (index != null) {
if (stmtCreateIndex.getIfNotExists()) {
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
return;
} else {
throw new AlgebricksException("An index with this name " + indexName + " already exists.");
}
}
// error message and stop.
if (stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_NGRAM_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
List<List<String>> partitioningKeys = ds.getPrimaryKeys();
for (List<String> partitioningKey : partitioningKeys) {
IAType keyType = aRecordType.getSubFieldType(partitioningKey);
ITypeTraits typeTrait = TypeTraitProvider.INSTANCE.getTypeTrait(keyType);
// If it is not a fixed length
if (typeTrait.getFixedLength() < 0) {
throw new AlgebricksException("The keyword or ngram index -" + indexName + " cannot be created on the dataset -" + datasetName + " due to its variable-length primary key field - " + partitioningKey);
}
}
}
if (ds.getDatasetType() == DatasetType.INTERNAL) {
validateIfResourceIsActiveInFeed(ds);
} else {
// Check if the dataset is indexible
if (!ExternalIndexingOperations.isIndexible((ExternalDatasetDetails) ds.getDatasetDetails())) {
throw new AlgebricksException("dataset using " + ((ExternalDatasetDetails) ds.getDatasetDetails()).getAdapter() + " Adapter can't be indexed");
}
// Check if the name of the index is valid
if (!ExternalIndexingOperations.isValidIndexName(datasetName, indexName)) {
throw new AlgebricksException("external dataset index name is invalid");
}
// Check if the files index exist
filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
firstExternalDatasetIndex = filesIndex == null;
// Lock external dataset
ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
datasetLocked = true;
if (firstExternalDatasetIndex) {
// Verify that no one has created an index before we acquire the lock
filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
if (filesIndex != null) {
ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
firstExternalDatasetIndex = false;
ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
}
}
if (firstExternalDatasetIndex) {
// Get snapshot from External File System
externalFilesSnapshot = ExternalIndexingOperations.getSnapshotFromExternalFileSystem(ds);
// Add an entry for the files index
filesIndex = new Index(dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName), IndexType.BTREE, ExternalIndexingOperations.FILE_INDEX_FIELD_NAMES, null, ExternalIndexingOperations.FILE_INDEX_FIELD_TYPES, false, false, MetadataUtil.PENDING_ADD_OP);
MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
// Add files to the external files index
for (ExternalFile file : externalFilesSnapshot) {
MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, file);
}
// This is the first index for the external dataset, replicate the files index
spec = ExternalIndexingOperations.buildFilesIndexCreateJobSpec(ds, externalFilesSnapshot, metadataProvider);
if (spec == null) {
throw new CompilationException("Failed to create job spec for replicating Files Index For external dataset");
}
filesIndexReplicated = true;
JobUtils.runJob(hcc, spec, true);
}
}
// check whether there exists another enforced index on the same field
if (stmtCreateIndex.isEnforced()) {
List<Index> indexes = MetadataManager.INSTANCE.getDatasetIndexes(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName);
for (Index existingIndex : indexes) {
if (existingIndex.getKeyFieldNames().equals(indexFields) && !existingIndex.getKeyFieldTypes().equals(indexFieldTypes) && existingIndex.isEnforcingKeyFileds()) {
throw new CompilationException("Cannot create index " + indexName + " , enforced index " + existingIndex.getIndexName() + " on field \"" + StringUtils.join(indexFields, ',') + "\" is already defined with type \"" + existingIndex.getKeyFieldTypes() + "\"");
}
}
}
// #. add a new index with PendingAddOp
index = new Index(dataverseName, datasetName, indexName, stmtCreateIndex.getIndexType(), indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getGramLength(), stmtCreateIndex.isEnforced(), false, MetadataUtil.PENDING_ADD_OP);
MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
// #. prepare to create the index artifact in NC.
spec = IndexUtil.buildSecondaryIndexCreationJobSpec(ds, index, metadataProvider);
if (spec == null) {
throw new CompilationException("Failed to create job spec for creating index '" + stmtCreateIndex.getDatasetName() + "." + stmtCreateIndex.getIndexName() + "'");
}
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
progress = ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA;
// #. create the index artifact in NC.
JobUtils.runJob(hcc, spec, true);
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
// #. load data into the index in NC.
spec = IndexUtil.buildSecondaryIndexLoadingJobSpec(ds, index, metadataProvider);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
JobUtils.runJob(hcc, spec, true);
// #. begin new metadataTxn
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
// #. add another new index with PendingNoOp after deleting the index with PendingAddOp
MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
index.setPendingOp(MetadataUtil.PENDING_NO_OP);
MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
// PendingAddOp
if (firstExternalDatasetIndex) {
MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, filesIndex.getIndexName());
filesIndex.setPendingOp(MetadataUtil.PENDING_NO_OP);
MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
// update transaction timestamp
((ExternalDatasetDetails) ds.getDatasetDetails()).setRefreshTimestamp(new Date());
MetadataManager.INSTANCE.updateDataset(mdTxnCtx, ds);
}
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e) {
if (bActiveTxn) {
abort(e, e, mdTxnCtx);
}
// If files index was replicated for external dataset, it should be cleaned up on NC side
if (filesIndexReplicated) {
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
try {
JobSpecification jobSpec = ExternalIndexingOperations.buildDropFilesIndexJobSpec(metadataProvider, ds);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
JobUtils.runJob(hcc, jobSpec, true);
} catch (Exception e2) {
e.addSuppressed(e2);
if (bActiveTxn) {
abort(e, e2, mdTxnCtx);
}
}
}
if (progress == ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA) {
// #. execute compensation operations
// remove the index in NC
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
JobSpecification jobSpec = IndexUtil.buildDropIndexJobSpec(index, metadataProvider, ds);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
JobUtils.runJob(hcc, jobSpec, true);
} catch (Exception e2) {
e.addSuppressed(e2);
if (bActiveTxn) {
abort(e, e2, mdTxnCtx);
}
}
if (firstExternalDatasetIndex) {
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
// Drop External Files from metadata
MetadataManager.INSTANCE.dropDatasetExternalFiles(mdTxnCtx, ds);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e2) {
e.addSuppressed(e2);
abort(e, e2, mdTxnCtx);
throw new IllegalStateException("System is inconsistent state: pending files for(" + dataverseName + "." + datasetName + ") couldn't be removed from the metadata", e);
}
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
// Drop the files index from metadata
MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e2) {
e.addSuppressed(e2);
abort(e, e2, mdTxnCtx);
throw new IllegalStateException("System is inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + IndexingConstants.getFilesIndexName(datasetName) + ") couldn't be removed from the metadata", e);
}
}
// remove the record from the metadata.
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e2) {
e.addSuppressed(e2);
abort(e, e2, mdTxnCtx);
throw new IllegalStateException("System is in inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + indexName + ") couldn't be removed from the metadata", e);
}
}
throw e;
} finally {
metadataProvider.getLocks().unlock();
if (datasetLocked) {
ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
}
}
}
Aggregations