Search in sources :

Example 1 with SystemDatasetInstantiator

use of co.cask.cdap.data.dataset.SystemDatasetInstantiator in project cdap by caskdata.

the class ExploreExecutorHttpHandler method doDropPartition.

private void doDropPartition(HttpRequest request, HttpResponder responder, DatasetId datasetId) {
    Dataset dataset;
    try (SystemDatasetInstantiator datasetInstantiator = datasetInstantiatorFactory.create()) {
        dataset = datasetInstantiator.getDataset(datasetId);
        if (dataset == null) {
            responder.sendString(HttpResponseStatus.NOT_FOUND, "Cannot load dataset " + datasetId);
            return;
        }
    } catch (IOException e) {
        String classNotFoundMessage = isClassNotFoundException(e);
        if (classNotFoundMessage != null) {
            JsonObject json = new JsonObject();
            json.addProperty("handle", QueryHandle.NO_OP.getHandle());
            responder.sendJson(HttpResponseStatus.OK, json);
            return;
        }
        LOG.error("Exception instantiating dataset {}.", datasetId, e);
        responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, "Exception instantiating dataset " + datasetId);
        return;
    }
    try {
        if (!(dataset instanceof PartitionedFileSet)) {
            responder.sendString(HttpResponseStatus.BAD_REQUEST, "not a partitioned dataset.");
            return;
        }
        Partitioning partitioning = ((PartitionedFileSet) dataset).getPartitioning();
        Reader reader = new InputStreamReader(new ChannelBufferInputStream(request.getContent()));
        Map<String, String> properties = GSON.fromJson(reader, new TypeToken<Map<String, String>>() {
        }.getType());
        PartitionKey partitionKey;
        try {
            partitionKey = PartitionedFileSetArguments.getOutputPartitionKey(properties, partitioning);
        } catch (Exception e) {
            responder.sendString(HttpResponseStatus.BAD_REQUEST, "invalid partition key: " + e.getMessage());
            return;
        }
        if (partitionKey == null) {
            responder.sendString(HttpResponseStatus.BAD_REQUEST, "no partition key was given.");
            return;
        }
        QueryHandle handle = exploreTableManager.dropPartition(datasetId, properties, partitionKey);
        JsonObject json = new JsonObject();
        json.addProperty("handle", handle.getHandle());
        responder.sendJson(HttpResponseStatus.OK, json);
    } catch (Throwable e) {
        LOG.error("Got exception:", e);
        responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, e.getMessage());
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) Dataset(co.cask.cdap.api.dataset.Dataset) JsonObject(com.google.gson.JsonObject) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) IOException(java.io.IOException) BadRequestException(co.cask.cdap.common.BadRequestException) ExploreException(co.cask.cdap.explore.service.ExploreException) SQLException(java.sql.SQLException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) JsonSyntaxException(com.google.gson.JsonSyntaxException) UnsupportedTypeException(co.cask.cdap.api.data.schema.UnsupportedTypeException) IOException(java.io.IOException) Partitioning(co.cask.cdap.api.dataset.lib.Partitioning) SystemDatasetInstantiator(co.cask.cdap.data.dataset.SystemDatasetInstantiator) TypeToken(com.google.common.reflect.TypeToken) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ChannelBufferInputStream(org.jboss.netty.buffer.ChannelBufferInputStream) QueryHandle(co.cask.cdap.proto.QueryHandle)

Example 2 with SystemDatasetInstantiator

use of co.cask.cdap.data.dataset.SystemDatasetInstantiator in project cdap by caskdata.

the class DynamicDatasetCacheTest method initCache.

@Before
public void initCache() {
    SystemDatasetInstantiator instantiator = new SystemDatasetInstantiator(dsFramework, getClass().getClassLoader(), null);
    cache = createCache(instantiator, ARGUMENTS, ImmutableMap.of("b", B_ARGUMENTS));
}
Also used : SystemDatasetInstantiator(co.cask.cdap.data.dataset.SystemDatasetInstantiator) Before(org.junit.Before)

Example 3 with SystemDatasetInstantiator

use of co.cask.cdap.data.dataset.SystemDatasetInstantiator in project cdap by caskdata.

the class LogFileManagerTest method testLogFileManager.

@Test
public void testLogFileManager() throws Exception {
    int syncInterval = 1024 * 1024;
    long maxLifeTimeMs = 50;
    long maxFileSizeInBytes = 104857600;
    DatasetManager datasetManager = new DefaultDatasetManager(injector.getInstance(DatasetFramework.class), NamespaceId.SYSTEM, co.cask.cdap.common.service.RetryStrategies.noRetry(), null);
    Transactional transactional = Transactions.createTransactionalWithRetry(Transactions.createTransactional(new MultiThreadDatasetCache(new SystemDatasetInstantiator(injector.getInstance(DatasetFramework.class)), injector.getInstance(TransactionSystemClient.class), NamespaceId.SYSTEM, ImmutableMap.<String, String>of(), null, null)), RetryStrategies.retryOnConflict(20, 100));
    FileMetaDataWriter fileMetaDataWriter = new FileMetaDataWriter(datasetManager, transactional);
    LogFileManager logFileManager = new LogFileManager("700", "600", maxLifeTimeMs, maxFileSizeInBytes, syncInterval, fileMetaDataWriter, injector.getInstance(LocationFactory.class));
    LogPathIdentifier logPathIdentifier = new LogPathIdentifier("test", "testApp", "testFlow");
    long timestamp = System.currentTimeMillis();
    LogFileOutputStream outputStream = logFileManager.getLogFileOutputStream(logPathIdentifier, timestamp);
    LoggingEvent event1 = getLoggingEvent("co.cask.Test1", (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME), Level.ERROR, "test message 1");
    outputStream.append(event1);
    // we are doing this, instead of calling getLogFileOutputStream to avoid race, if test machine can be slow.
    Assert.assertNotNull((logFileManager.getActiveOutputStream(logPathIdentifier)));
    TimeUnit.MILLISECONDS.sleep(60);
    logFileManager.flush();
    // should be closed on flush, should return null
    Assert.assertNull((logFileManager.getActiveOutputStream(logPathIdentifier)));
    LogFileOutputStream newLogOutStream = logFileManager.getLogFileOutputStream(logPathIdentifier, timestamp);
    // make sure the new location we got is different
    Assert.assertNotEquals(outputStream.getLocation(), newLogOutStream.getLocation());
}
Also used : FileMetaDataWriter(co.cask.cdap.logging.meta.FileMetaDataWriter) DefaultDatasetManager(co.cask.cdap.data2.datafabric.dataset.DefaultDatasetManager) DatasetManager(co.cask.cdap.api.dataset.DatasetManager) DefaultDatasetManager(co.cask.cdap.data2.datafabric.dataset.DefaultDatasetManager) LocationFactory(org.apache.twill.filesystem.LocationFactory) DatasetFramework(co.cask.cdap.data2.dataset2.DatasetFramework) LoggingEvent(ch.qos.logback.classic.spi.LoggingEvent) TransactionSystemClient(org.apache.tephra.TransactionSystemClient) MultiThreadDatasetCache(co.cask.cdap.data2.dataset2.MultiThreadDatasetCache) SystemDatasetInstantiator(co.cask.cdap.data.dataset.SystemDatasetInstantiator) Transactional(co.cask.cdap.api.Transactional) Test(org.junit.Test)

Example 4 with SystemDatasetInstantiator

use of co.cask.cdap.data.dataset.SystemDatasetInstantiator in project cdap by caskdata.

the class FileMetadataTest method testFileMetadataReadWrite.

@Test
public void testFileMetadataReadWrite() throws Exception {
    DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
    DatasetManager datasetManager = new DefaultDatasetManager(datasetFramework, NamespaceId.SYSTEM, co.cask.cdap.common.service.RetryStrategies.noRetry(), null);
    Transactional transactional = Transactions.createTransactionalWithRetry(Transactions.createTransactional(new MultiThreadDatasetCache(new SystemDatasetInstantiator(datasetFramework), injector.getInstance(TransactionSystemClient.class), NamespaceId.SYSTEM, ImmutableMap.<String, String>of(), null, null)), RetryStrategies.retryOnConflict(20, 100));
    FileMetaDataWriter fileMetaDataWriter = new FileMetaDataWriter(datasetManager, transactional);
    LogPathIdentifier logPathIdentifier = new LogPathIdentifier(NamespaceId.DEFAULT.getNamespace(), "testApp", "testFlow");
    LocationFactory locationFactory = injector.getInstance(LocationFactory.class);
    Location location = locationFactory.create(TMP_FOLDER.newFolder().getPath()).append("/logs");
    long currentTime = System.currentTimeMillis();
    for (int i = 10; i <= 100; i += 10) {
        // i is the event time
        fileMetaDataWriter.writeMetaData(logPathIdentifier, i, currentTime, location.append(Integer.toString(i)));
    }
    // for the timestamp 80, add new new log path id with different current time.
    fileMetaDataWriter.writeMetaData(logPathIdentifier, 80, currentTime + 1, location.append("81"));
    fileMetaDataWriter.writeMetaData(logPathIdentifier, 80, currentTime + 2, location.append("82"));
    // reader test
    FileMetaDataReader fileMetadataReader = injector.getInstance(FileMetaDataReader.class);
    Assert.assertEquals(12, fileMetadataReader.listFiles(logPathIdentifier, 0, 100).size());
    Assert.assertEquals(5, fileMetadataReader.listFiles(logPathIdentifier, 20, 50).size());
    Assert.assertEquals(2, fileMetadataReader.listFiles(logPathIdentifier, 100, 150).size());
    // should include the latest file with event start time 80.
    List<LogLocation> locationList = fileMetadataReader.listFiles(logPathIdentifier, 81, 85);
    Assert.assertEquals(1, locationList.size());
    Assert.assertEquals(80, locationList.get(0).getEventTimeMs());
    Assert.assertEquals(location.append("82"), locationList.get(0).getLocation());
    Assert.assertEquals(1, fileMetadataReader.listFiles(logPathIdentifier, 150, 1000).size());
}
Also used : FileMetaDataWriter(co.cask.cdap.logging.meta.FileMetaDataWriter) DefaultDatasetManager(co.cask.cdap.data2.datafabric.dataset.DefaultDatasetManager) DatasetManager(co.cask.cdap.api.dataset.DatasetManager) DefaultDatasetManager(co.cask.cdap.data2.datafabric.dataset.DefaultDatasetManager) LocationFactory(org.apache.twill.filesystem.LocationFactory) DatasetFramework(co.cask.cdap.data2.dataset2.DatasetFramework) TransactionSystemClient(org.apache.tephra.TransactionSystemClient) MultiThreadDatasetCache(co.cask.cdap.data2.dataset2.MultiThreadDatasetCache) SystemDatasetInstantiator(co.cask.cdap.data.dataset.SystemDatasetInstantiator) LogLocation(co.cask.cdap.logging.write.LogLocation) LogPathIdentifier(co.cask.cdap.logging.appender.system.LogPathIdentifier) FileMetaDataReader(co.cask.cdap.logging.meta.FileMetaDataReader) Transactional(co.cask.cdap.api.Transactional) Location(org.apache.twill.filesystem.Location) LogLocation(co.cask.cdap.logging.write.LogLocation) Test(org.junit.Test)

Example 5 with SystemDatasetInstantiator

use of co.cask.cdap.data.dataset.SystemDatasetInstantiator in project cdap by caskdata.

the class DatasetSerDe method getDatasetSchema.

private void getDatasetSchema(Configuration conf, DatasetId datasetId) throws SerDeException {
    try (ContextManager.Context hiveContext = ContextManager.getContext(conf)) {
        // Because it calls initialize just to get the object inspector
        if (hiveContext == null) {
            LOG.info("Hive provided a null conf, will not be able to get dataset schema.");
            return;
        }
        // some datasets like Table and ObjectMappedTable have schema in the dataset properties
        try {
            DatasetSpecification datasetSpec = hiveContext.getDatasetSpec(datasetId);
            String schemaStr = datasetSpec.getProperty("schema");
            if (schemaStr != null) {
                schema = Schema.parseJson(schemaStr);
                return;
            }
        } catch (DatasetManagementException | ServiceUnavailableException e) {
            throw new SerDeException("Could not instantiate dataset " + datasetId, e);
        } catch (IOException e) {
            throw new SerDeException("Exception getting schema for dataset " + datasetId, e);
        }
        // other datasets must be instantiated to get their schema
        // conf is null if this is a query that writes to a dataset
        ClassLoader parentClassLoader = conf == null ? null : conf.getClassLoader();
        try (SystemDatasetInstantiator datasetInstantiator = hiveContext.createDatasetInstantiator(parentClassLoader)) {
            Dataset dataset = datasetInstantiator.getDataset(datasetId);
            if (dataset == null) {
                throw new SerDeException("Could not find dataset " + datasetId);
            }
            Type recordType;
            if (dataset instanceof RecordScannable) {
                recordType = ((RecordScannable) dataset).getRecordType();
            } else if (dataset instanceof RecordWritable) {
                recordType = ((RecordWritable) dataset).getRecordType();
            } else {
                throw new SerDeException("Dataset " + datasetId + " is not explorable.");
            }
            schema = schemaGenerator.generate(recordType);
        } catch (UnsupportedTypeException e) {
            throw new SerDeException("Dataset " + datasetId + " has an unsupported schema.", e);
        } catch (IOException e) {
            throw new SerDeException("Exception while trying to instantiate dataset " + datasetId, e);
        }
    } catch (IOException e) {
        throw new SerDeException("Could not get hive context from configuration.", e);
    }
}
Also used : RecordWritable(co.cask.cdap.api.data.batch.RecordWritable) Dataset(co.cask.cdap.api.dataset.Dataset) DatasetSpecification(co.cask.cdap.api.dataset.DatasetSpecification) ServiceUnavailableException(co.cask.cdap.common.ServiceUnavailableException) IOException(java.io.IOException) RecordScannable(co.cask.cdap.api.data.batch.RecordScannable) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) Type(java.lang.reflect.Type) SystemDatasetInstantiator(co.cask.cdap.data.dataset.SystemDatasetInstantiator) ContextManager(co.cask.cdap.hive.context.ContextManager) UnsupportedTypeException(co.cask.cdap.api.data.schema.UnsupportedTypeException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

SystemDatasetInstantiator (co.cask.cdap.data.dataset.SystemDatasetInstantiator)20 DatasetFramework (co.cask.cdap.data2.dataset2.DatasetFramework)10 MultiThreadDatasetCache (co.cask.cdap.data2.dataset2.MultiThreadDatasetCache)9 IOException (java.io.IOException)8 TransactionSystemClient (org.apache.tephra.TransactionSystemClient)8 Test (org.junit.Test)8 Transactional (co.cask.cdap.api.Transactional)7 Dataset (co.cask.cdap.api.dataset.Dataset)7 DatasetManager (co.cask.cdap.api.dataset.DatasetManager)7 DefaultDatasetManager (co.cask.cdap.data2.datafabric.dataset.DefaultDatasetManager)7 FileMetaDataWriter (co.cask.cdap.logging.meta.FileMetaDataWriter)7 LocationFactory (org.apache.twill.filesystem.LocationFactory)7 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)5 LogPathIdentifier (co.cask.cdap.logging.appender.system.LogPathIdentifier)5 Location (org.apache.twill.filesystem.Location)5 UnsupportedTypeException (co.cask.cdap.api.data.schema.UnsupportedTypeException)4 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)4 FileMetaDataReader (co.cask.cdap.logging.meta.FileMetaDataReader)4 LogLocation (co.cask.cdap.logging.write.LogLocation)4 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)3