Search in sources :

Example 56 with Dataset

use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.

the class FlushDatasetUtil method flushDataset.

public static void flushDataset(IHyracksClientConnection hcc, MetadataProvider metadataProvider, String dataverseName, String datasetName, String indexName) throws Exception {
    CompilerProperties compilerProperties = metadataProvider.getApplicationContext().getCompilerProperties();
    int frameSize = compilerProperties.getFrameSize();
    JobSpecification spec = new JobSpecification(frameSize);
    RecordDescriptor[] rDescs = new RecordDescriptor[] { new RecordDescriptor(new ISerializerDeserializer[] {}) };
    AlgebricksMetaOperatorDescriptor emptySource = new AlgebricksMetaOperatorDescriptor(spec, 0, 1, new IPushRuntimeFactory[] { new EmptyTupleSourceRuntimeFactory() }, rDescs);
    org.apache.asterix.common.transactions.JobId jobId = JobIdFactory.generateJobId();
    Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName);
    FlushDatasetOperatorDescriptor flushOperator = new FlushDatasetOperatorDescriptor(spec, jobId, dataset.getDatasetId());
    spec.connect(new OneToOneConnectorDescriptor(spec), emptySource, 0, flushOperator, 0);
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
    AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second;
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, emptySource, primaryPartitionConstraint);
    JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(jobId, true);
    spec.setJobletEventListenerFactory(jobEventListenerFactory);
    JobUtils.runJob(hcc, spec, true);
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) Dataset(org.apache.asterix.metadata.entities.Dataset) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) CompilerProperties(org.apache.asterix.common.config.CompilerProperties) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) JobEventListenerFactory(org.apache.asterix.runtime.job.listener.JobEventListenerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) FlushDatasetOperatorDescriptor(org.apache.asterix.runtime.operators.std.FlushDatasetOperatorDescriptor) EmptyTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification)

Example 57 with Dataset

use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.

the class RebalanceUtil method rebalanceSwitch.

private static void rebalanceSwitch(Dataset source, Dataset target, MetadataProvider metadataProvider, IHyracksClientConnection hcc) throws Exception {
    MetadataTransactionContext mdTxnCtx = metadataProvider.getMetadataTxnContext();
    // Acquires the metadata write lock for the source/target dataset.
    writeLockDataset(metadataProvider.getLocks(), source);
    Dataset sourceDataset = MetadataManagerUtil.findDataset(mdTxnCtx, source.getDataverseName(), source.getDatasetName());
    if (sourceDataset == null) {
        // The dataset has already been dropped.
        // In this case, we should drop the generated target dataset files.
        dropDatasetFiles(target, metadataProvider, hcc);
        return;
    }
    // Drops the source dataset files.
    dropDatasetFiles(source, metadataProvider, hcc);
    // Updates the dataset entry in the metadata storage
    MetadataManager.INSTANCE.updateDataset(mdTxnCtx, target);
    // Drops the metadata entry of source dataset's node group.
    String sourceNodeGroup = source.getNodeGroupName();
    MetadataLockManager.INSTANCE.acquireNodeGroupWriteLock(metadataProvider.getLocks(), sourceNodeGroup);
    MetadataManager.INSTANCE.dropNodegroup(mdTxnCtx, sourceNodeGroup, true);
}
Also used : Dataset(org.apache.asterix.metadata.entities.Dataset) MetadataTransactionContext(org.apache.asterix.metadata.MetadataTransactionContext)

Example 58 with Dataset

use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.

the class RebalanceUtil method rebalance.

/**
     * Rebalances an existing dataset to a list of target nodes.
     *
     * @param dataverseName,
     *            the dataverse name.
     * @param datasetName,
     *            the dataset name.
     * @param targetNcNames,
     *            the list of target nodes.
     * @param metadataProvider,
     *            the metadata provider.
     * @param hcc,
     *            the reusable hyracks connection.
     * @throws Exception
     */
public static void rebalance(String dataverseName, String datasetName, Set<String> targetNcNames, MetadataProvider metadataProvider, IHyracksClientConnection hcc) throws Exception {
    MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
    metadataProvider.setMetadataTxnContext(mdTxnCtx);
    Dataset sourceDataset;
    Dataset targetDataset;
    // that no one can drop the rebalance source dataset.
    try {
        // The source dataset.
        sourceDataset = metadataProvider.findDataset(dataverseName, datasetName);
        // If the source dataset doesn't exist, then it's a no-op.
        if (sourceDataset == null) {
            return;
        }
        Set<String> sourceNodes = new HashSet<>(metadataProvider.findNodes(sourceDataset.getNodeGroupName()));
        // The the source nodes are identical to the target nodes.
        if (sourceNodes.equals(targetNcNames)) {
            return;
        }
        // Creates a node group for rebalance.
        String nodeGroupName = DatasetUtil.createNodeGroupForNewDataset(sourceDataset.getDataverseName(), sourceDataset.getDatasetName(), sourceDataset.getRebalanceCount() + 1, targetNcNames, metadataProvider);
        // The target dataset for rebalance.
        targetDataset = new Dataset(sourceDataset, true, nodeGroupName);
        // Rebalances the source dataset into the target dataset.
        rebalance(sourceDataset, targetDataset, metadataProvider, hcc);
        // Complete the metadata transaction.
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
    } catch (Exception e) {
        abort(e, e, mdTxnCtx);
        throw e;
    } finally {
        metadataProvider.getLocks().reset();
    }
    // Starts another transaction for switching the metadata entity.
    mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
    metadataProvider.setMetadataTxnContext(mdTxnCtx);
    try {
        // Atomically switches the rebalance target to become the source dataset.
        rebalanceSwitch(sourceDataset, targetDataset, metadataProvider, hcc);
        // Complete the metadata transaction.
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
    } catch (Exception e) {
        abort(e, e, mdTxnCtx);
        throw e;
    } finally {
        metadataProvider.getLocks().reset();
    }
}
Also used : Dataset(org.apache.asterix.metadata.entities.Dataset) MetadataTransactionContext(org.apache.asterix.metadata.MetadataTransactionContext) AsterixException(org.apache.asterix.common.exceptions.AsterixException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HashSet(java.util.HashSet)

Example 59 with Dataset

use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.

the class CheckpointingTest method testDeleteOldLogFiles.

@Test
public void testDeleteOldLogFiles() {
    try {
        TestNodeController nc = new TestNodeController(new File(TEST_CONFIG_FILE_PATH).getAbsolutePath(), false);
        StorageComponentProvider storageManager = new StorageComponentProvider();
        nc.init();
        List<List<String>> partitioningKeys = new ArrayList<>();
        partitioningKeys.add(Collections.singletonList("key"));
        Dataset dataset = new Dataset(DATAVERSE_NAME, DATASET_NAME, DATAVERSE_NAME, DATA_TYPE_NAME, NODE_GROUP_NAME, null, null, new InternalDatasetDetails(null, PartitioningStrategy.HASH, partitioningKeys, null, null, null, false, null, false), null, DatasetType.INTERNAL, DATASET_ID, 0);
        try {
            nc.createPrimaryIndex(dataset, KEY_TYPES, RECORD_TYPE, META_TYPE, new NoMergePolicyFactory(), null, null, storageManager, KEY_INDEXES, KEY_INDICATOR_LIST);
            IHyracksTaskContext ctx = nc.createTestContext(false);
            nc.newJobId();
            ITransactionContext txnCtx = nc.getTransactionManager().getTransactionContext(nc.getTxnJobId(), true);
            // Prepare insert operation
            LSMInsertDeleteOperatorNodePushable insertOp = nc.getInsertPipeline(ctx, dataset, KEY_TYPES, RECORD_TYPE, META_TYPE, new NoMergePolicyFactory(), null, null, KEY_INDEXES, KEY_INDICATOR_LIST, storageManager).getLeft();
            insertOp.open();
            TupleGenerator tupleGenerator = new TupleGenerator(RECORD_TYPE, META_TYPE, KEY_INDEXES, KEY_INDICATOR, RECORD_GEN_FUNCTION, UNIQUE_RECORD_FIELDS, META_GEN_FUNCTION, UNIQUE_META_FIELDS);
            VSizeFrame frame = new VSizeFrame(ctx);
            FrameTupleAppender tupleAppender = new FrameTupleAppender(frame);
            IRecoveryManager recoveryManager = nc.getTransactionSubsystem().getRecoveryManager();
            ICheckpointManager checkpointManager = nc.getTransactionSubsystem().getCheckpointManager();
            LogManager logManager = (LogManager) nc.getTransactionSubsystem().getLogManager();
            // Number of log files after node startup should be one
            int numberOfLogFiles = logManager.getLogFileIds().size();
            Assert.assertEquals(1, numberOfLogFiles);
            // Low-water mark LSN
            long lowWaterMarkLSN = recoveryManager.getMinFirstLSN();
            // Low-water mark log file id
            long initialLowWaterMarkFileId = logManager.getLogFileId(lowWaterMarkLSN);
            // Initial Low-water mark should be in the only available log file
            Assert.assertEquals(initialLowWaterMarkFileId, logManager.getLogFileIds().get(0).longValue());
            // Insert records until a new log file is created
            while (logManager.getLogFileIds().size() == 1) {
                ITupleReference tuple = tupleGenerator.next();
                DataflowUtils.addTupleToFrame(tupleAppender, tuple, insertOp);
            }
            // Check if the new low-water mark is still in the initial low-water mark log file
            lowWaterMarkLSN = recoveryManager.getMinFirstLSN();
            long currentLowWaterMarkLogFileId = logManager.getLogFileId(lowWaterMarkLSN);
            if (currentLowWaterMarkLogFileId == initialLowWaterMarkFileId) {
                /*
                     * Make sure checkpoint will not delete the initial log file since
                     * the low-water mark is still in it (i.e. it is still required for
                     * recovery)
                     */
                int numberOfLogFilesBeforeCheckpoint = logManager.getLogFileIds().size();
                checkpointManager.tryCheckpoint(logManager.getAppendLSN());
                int numberOfLogFilesAfterCheckpoint = logManager.getLogFileIds().size();
                Assert.assertEquals(numberOfLogFilesBeforeCheckpoint, numberOfLogFilesAfterCheckpoint);
                /*
                     * Insert records until the low-water mark is not in the initialLowWaterMarkFileId
                     * either because of the asynchronous flush caused by the previous checkpoint or a flush
                     * due to the dataset memory budget getting full.
                     */
                while (currentLowWaterMarkLogFileId == initialLowWaterMarkFileId) {
                    ITupleReference tuple = tupleGenerator.next();
                    DataflowUtils.addTupleToFrame(tupleAppender, tuple, insertOp);
                    lowWaterMarkLSN = recoveryManager.getMinFirstLSN();
                    currentLowWaterMarkLogFileId = logManager.getLogFileId(lowWaterMarkLSN);
                }
            }
            /*
                 * At this point, the low-water mark is not in the initialLowWaterMarkFileId, so
                 * a checkpoint should delete it.
                 */
            checkpointManager.tryCheckpoint(recoveryManager.getMinFirstLSN());
            // Validate initialLowWaterMarkFileId was deleted
            for (Long fileId : logManager.getLogFileIds()) {
                Assert.assertNotEquals(initialLowWaterMarkFileId, fileId.longValue());
            }
            if (tupleAppender.getTupleCount() > 0) {
                tupleAppender.write(insertOp, true);
            }
            insertOp.close();
            nc.getTransactionManager().completedTransaction(txnCtx, DatasetId.NULL, -1, true);
        } finally {
            nc.deInit();
        }
    } catch (Throwable e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : LSMInsertDeleteOperatorNodePushable(org.apache.asterix.common.dataflow.LSMInsertDeleteOperatorNodePushable) ICheckpointManager(org.apache.asterix.common.transactions.ICheckpointManager) Dataset(org.apache.asterix.metadata.entities.Dataset) InternalDatasetDetails(org.apache.asterix.metadata.entities.InternalDatasetDetails) ITransactionContext(org.apache.asterix.common.transactions.ITransactionContext) ArrayList(java.util.ArrayList) TupleGenerator(org.apache.asterix.app.data.gen.TupleGenerator) StorageComponentProvider(org.apache.asterix.file.StorageComponentProvider) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) NoMergePolicyFactory(org.apache.hyracks.storage.am.lsm.common.impls.NoMergePolicyFactory) IRecoveryManager(org.apache.asterix.common.transactions.IRecoveryManager) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) TestNodeController(org.apache.asterix.app.bootstrap.TestNodeController) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File) LogManager(org.apache.asterix.transaction.management.service.logging.LogManager) Test(org.junit.Test)

Example 60 with Dataset

use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.

the class DatasetTupleTranslatorTest method test.

@Test
public void test() throws MetadataException, IOException {
    Integer[] indicators = { 0, 1, null };
    for (Integer indicator : indicators) {
        Map<String, String> compactionPolicyProperties = new HashMap<>();
        compactionPolicyProperties.put("max-mergable-component-size", "1073741824");
        compactionPolicyProperties.put("max-tolerance-component-count", "3");
        InternalDatasetDetails details = new InternalDatasetDetails(FileStructure.BTREE, PartitioningStrategy.HASH, Collections.singletonList(Collections.singletonList("row_id")), Collections.singletonList(Collections.singletonList("row_id")), indicator == null ? null : Collections.singletonList(indicator), Collections.singletonList(BuiltinType.AINT64), false, Collections.emptyList(), false);
        Dataset dataset = new Dataset("test", "log", "foo", "LogType", "CB", "MetaType", "DEFAULT_NG_ALL_NODES", "prefix", compactionPolicyProperties, details, Collections.emptyMap(), DatasetType.INTERNAL, 115, 0);
        DatasetTupleTranslator dtTranslator = new DatasetTupleTranslator(true);
        ITupleReference tuple = dtTranslator.getTupleFromMetadataEntity(dataset);
        Dataset deserializedDataset = dtTranslator.getMetadataEntityFromTuple(tuple);
        Assert.assertEquals(dataset.getMetaItemTypeDataverseName(), deserializedDataset.getMetaItemTypeDataverseName());
        Assert.assertEquals(dataset.getMetaItemTypeName(), deserializedDataset.getMetaItemTypeName());
        if (indicator == null) {
            Assert.assertEquals(Collections.singletonList(new Integer(0)), ((InternalDatasetDetails) deserializedDataset.getDatasetDetails()).getKeySourceIndicator());
        } else {
            Assert.assertEquals(((InternalDatasetDetails) dataset.getDatasetDetails()).getKeySourceIndicator(), ((InternalDatasetDetails) deserializedDataset.getDatasetDetails()).getKeySourceIndicator());
        }
    }
}
Also used : HashMap(java.util.HashMap) Dataset(org.apache.asterix.metadata.entities.Dataset) InternalDatasetDetails(org.apache.asterix.metadata.entities.InternalDatasetDetails) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) Test(org.junit.Test)

Aggregations

Dataset (org.apache.asterix.metadata.entities.Dataset)77 ArrayList (java.util.ArrayList)33 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)32 Index (org.apache.asterix.metadata.entities.Index)25 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)23 MetadataException (org.apache.asterix.metadata.MetadataException)19 ARecordType (org.apache.asterix.om.types.ARecordType)19 IAType (org.apache.asterix.om.types.IAType)18 ILogicalExpression (org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression)18 List (java.util.List)17 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)16 RemoteException (java.rmi.RemoteException)15 AsterixException (org.apache.asterix.common.exceptions.AsterixException)15 MetadataProvider (org.apache.asterix.metadata.declared.MetadataProvider)15 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)15 IOException (java.io.IOException)14 MetadataTransactionContext (org.apache.asterix.metadata.MetadataTransactionContext)14 CompilationException (org.apache.asterix.common.exceptions.CompilationException)13 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)12 ACIDException (org.apache.asterix.common.exceptions.ACIDException)11