use of io.cdap.cdap.data2.dataset2.lib.table.Update in project cdap by caskdata.
the class DatasetAdminService method createOrUpdate.
/**
* Configures and creates a Dataset
*
* @param datasetInstanceId dataset instance to be created
* @param typeMeta type meta for the dataset
* @param props dataset instance properties
* @param existing if dataset already exists (in case of update), the existing properties
* @return dataset specification
*/
public DatasetCreationResponse createOrUpdate(final DatasetId datasetInstanceId, final DatasetTypeMeta typeMeta, final DatasetProperties props, @Nullable final DatasetSpecification existing) throws Exception {
if (existing == null) {
LOG.info("Creating dataset instance {}, type meta: {}", datasetInstanceId, typeMeta);
} else {
LOG.info("Updating dataset instance {}, type meta: {}, existing: {}", datasetInstanceId, typeMeta, existing);
}
try (DatasetClassLoaderProvider classLoaderProvider = new DirectoryClassLoaderProvider(cConf, locationFactory)) {
final DatasetContext context = DatasetContext.from(datasetInstanceId.getNamespace());
UserGroupInformation ugi = getUgiForDataset(impersonator, datasetInstanceId);
final DatasetType type = ImpersonationUtils.doAs(ugi, () -> {
LOG.trace("Getting dataset type {}", typeMeta.getName());
DatasetType type1 = dsFramework.getDatasetType(typeMeta, null, classLoaderProvider);
if (type1 == null) {
throw new BadRequestException(String.format("Cannot instantiate dataset type using provided type meta: %s", typeMeta));
}
LOG.trace("Got dataset type {}", typeMeta.getName());
return type1;
});
DatasetSpecification spec = ImpersonationUtils.doAs(ugi, () -> {
LOG.trace("Configuring dataset {} of type {}", datasetInstanceId.getDataset(), typeMeta.getName());
DatasetSpecification spec1 = existing == null ? type.configure(datasetInstanceId.getEntityName(), props) : type.reconfigure(datasetInstanceId.getEntityName(), props, existing);
LOG.trace("Configured dataset {} of type {}", datasetInstanceId.getDataset(), typeMeta.getName());
DatasetAdmin admin = type.getAdmin(context, spec1);
try {
if (existing != null) {
if (admin instanceof Updatable) {
((Updatable) admin).update(existing);
} else {
admin.upgrade();
}
} else {
LOG.trace("Creating dataset {} of type {}", datasetInstanceId.getDataset(), typeMeta.getName());
admin.create();
LOG.trace("Created dataset {} of type {}", datasetInstanceId.getDataset(), typeMeta.getName());
}
} finally {
Closeables.closeQuietly(admin);
}
return spec1;
});
// Writing system metadata should be done without impersonation since user may not have access to system tables.
LOG.trace("Computing metadata for dataset {}", datasetInstanceId.getDataset());
SystemMetadata metadata = computeSystemMetadata(datasetInstanceId, spec, props, typeMeta, type, context, existing != null, ugi);
LOG.trace("Computed metadata for dataset {}", datasetInstanceId.getDataset());
return new DatasetCreationResponse(spec, metadata);
} catch (Exception e) {
if (e instanceof IncompatibleUpdateException) {
// this is expected to happen if user provides bad update properties, so we log this as debug
LOG.debug("Incompatible update for dataset '{}'", datasetInstanceId, e);
} else {
LOG.error("Error {} dataset '{}': {}", existing == null ? "creating" : "updating", datasetInstanceId, e.getMessage(), e);
}
throw e;
}
}
use of io.cdap.cdap.data2.dataset2.lib.table.Update in project cdap by caskdata.
the class DatasetTypeManager method addModule.
/**
* Add datasets module in a namespace
*
* @param datasetModuleId the {@link DatasetModuleId} to add
* @param className module class
* @param jarLocation location of the module jar
* @param force if true, an update will be allowed even if there are conflicts with other modules, or if
* removal of a type would break other modules' dependencies.
*/
public void addModule(final DatasetModuleId datasetModuleId, final String className, final Location jarLocation, final boolean force) throws DatasetModuleConflictException {
LOG.debug("adding module: {}, className: {}, jarLocation: {}", datasetModuleId, className, jarLocation == null ? "[local]" : jarLocation);
try {
TransactionRunners.run(transactionRunner, context -> {
final DatasetTypeTable datasetTypeTable = DatasetTypeTable.create(context);
final DatasetInstanceTable datasetInstanceTable = new DatasetInstanceTable(context);
// 1. get existing module with all its types
DatasetModuleMeta existing = datasetTypeTable.getModule(datasetModuleId);
DependencyTrackingRegistry reg;
// 2. unpack jar and create class loader
ClassLoaderFolder classLoaderFolder = null;
DirectoryClassLoader cl = null;
try {
// NOTE: if jarLocation is null, we assume that this is a system module, ie. always present in classpath
if (jarLocation != null) {
classLoaderFolder = BundleJarUtil.prepareClassLoaderFolder(jarLocation, () -> Files.createTempDirectory(Files.createDirectories(systemTempPath), datasetModuleId.getEntityName()).toFile());
cl = new DirectoryClassLoader(classLoaderFolder.getDir(), cConf.get(Constants.AppFabric.PROGRAM_EXTRA_CLASSPATH), FilterClassLoader.create(getClass().getClassLoader()), "lib");
}
reg = new DependencyTrackingRegistry(datasetModuleId, datasetTypeTable, cl, force);
// 3. register the new module while tracking dependencies.
// this will fail if a type exists in a different module
DatasetDefinitionRegistries.register(className, cl, reg);
} catch (TypeConflictException e) {
// type conflict from the registry, we want to throw that as is
throw e;
} catch (Exception e) {
LOG.error("Could not instantiate instance of dataset module class {} for module {} using jarLocation {}", className, datasetModuleId, jarLocation);
throw Throwables.propagate(e);
} finally {
// Close the ProgramClassLoader
Closeables.closeQuietly(cl);
Closeables.closeQuietly(classLoaderFolder);
}
// 4. determine whether any type were removed from the module, and whether any other modules depend on them
if (existing != null) {
Set<String> removedTypes = new HashSet<>(existing.getTypes());
removedTypes.removeAll(reg.getTypes());
// TODO (CDAP-6294): track dependencies at the type level
if (!force && !removedTypes.isEmpty() && !existing.getUsedByModules().isEmpty()) {
throw new DatasetModuleConflictException(String.format("Cannot update module '%s' to remove types %s: Modules %s may depend on it. Delete them first", datasetModuleId, removedTypes, existing.getUsedByModules()));
}
Collection<DatasetSpecification> instances = datasetInstanceTable.getByTypes(datasetModuleId.getParent(), removedTypes);
if (!instances.isEmpty()) {
throw new DatasetModuleConflictException(String.format("Attempt to remove dataset types %s from module '%s' that have existing instances: %s. " + "Delete them first.", removedTypes, datasetModuleId, instances.stream().map(input -> input.getName() + ":" + input.getType()).collect(Collectors.joining(", "))));
}
}
// NOTE: we use set to avoid duplicated dependencies
// NOTE: we use LinkedHashSet to preserve order in which dependencies must be loaded
Set<String> moduleDependencies = new LinkedHashSet<String>();
for (DatasetTypeId usedType : reg.getUsedTypes()) {
DatasetModuleMeta usedModule = datasetTypeTable.getModuleByType(usedType);
if (usedModule == null) {
throw new IllegalStateException(String.format("Found a null used module for type %s for while adding module %s", usedType, datasetModuleId));
}
// adding all used types and the module itself, in this very order to keep the order of loading modules
// for instantiating a type
moduleDependencies.addAll(usedModule.getUsesModules());
boolean added = moduleDependencies.add(usedModule.getName());
if (added) {
// also adding this module as a dependent for all modules it uses
usedModule.addUsedByModule(datasetModuleId.getEntityName());
datasetTypeTable.writeModule(usedType.getParent(), usedModule);
}
}
URI jarURI = jarLocation == null ? null : jarLocation.toURI();
DatasetModuleMeta moduleMeta = existing == null ? new DatasetModuleMeta(datasetModuleId.getEntityName(), className, jarURI, reg.getTypes(), Lists.newArrayList(moduleDependencies)) : new DatasetModuleMeta(datasetModuleId.getEntityName(), className, jarURI, reg.getTypes(), Lists.newArrayList(moduleDependencies), Lists.newArrayList(existing.getUsedByModules()));
datasetTypeTable.writeModule(datasetModuleId.getParent(), moduleMeta);
});
} catch (RuntimeException e) {
for (Throwable cause : Throwables.getCausalChain(e)) {
if (cause instanceof DatasetModuleConflictException) {
throw (DatasetModuleConflictException) cause;
} else if (cause instanceof TypeConflictException) {
throw new DatasetModuleConflictException(cause.getMessage(), cause);
}
}
throw Throwables.propagate(e);
} catch (Exception e) {
LOG.error("Operation failed", e);
throw Throwables.propagate(e);
}
}
use of io.cdap.cdap.data2.dataset2.lib.table.Update in project cdap by caskdata.
the class AbstractHBaseDataSetAdmin method updateTable.
/**
* Performs update on a given HBase table. It will be updated if either its spec has
* changed since the HBase table was created or updated, or if the CDAP version recorded
* in the HTable descriptor is less than the current CDAP version.
*
* @param force forces update regardless of whether the table needs it.
* @throws IOException If update failed.
*/
public void updateTable(boolean force) throws IOException {
try (HBaseDDLExecutor ddlExecutor = ddlExecutorFactory.get()) {
HTableDescriptor tableDescriptor;
HTableDescriptorBuilder newDescriptor;
try (HBaseAdmin admin = new HBaseAdmin(hConf)) {
tableDescriptor = tableUtil.getHTableDescriptor(admin, tableId);
// create a new descriptor for the table update
newDescriptor = tableUtil.buildHTableDescriptor(tableDescriptor);
}
// update any table properties if necessary
boolean needUpdate = needsUpdate(tableDescriptor, newDescriptor) || force;
// Get the cdap version from the table
ProjectInfo.Version version = HBaseTableUtil.getVersion(tableDescriptor);
String hbaseVersion = HBaseTableUtil.getHBaseVersion(tableDescriptor);
if (!needUpdate && hbaseVersion != null && hbaseVersion.equals(HBaseVersion.getVersionString()) && version.compareTo(ProjectInfo.getVersion()) >= 0) {
// If neither the table spec nor the cdap version have changed, no need to update
LOG.info("Table '{}' has not changed and its version '{}' is same or greater " + "than current CDAP version '{}'. The underlying HBase version {} has also not changed.", tableId, version, ProjectInfo.getVersion(), hbaseVersion);
return;
}
// Generate the coprocessor jar
CoprocessorJar coprocessorJar = createCoprocessorJar();
Location jarLocation = coprocessorJar.getJarLocation();
// Check if coprocessor upgrade is needed
Map<String, HBaseTableUtil.CoprocessorInfo> coprocessorInfo = HBaseTableUtil.getCoprocessorInfo(tableDescriptor);
// For all required coprocessors, check if they've need to be upgraded.
for (Class<? extends Coprocessor> coprocessor : coprocessorJar.getCoprocessors()) {
HBaseTableUtil.CoprocessorInfo info = coprocessorInfo.get(coprocessor.getName());
if (info != null) {
// The same coprocessor has been configured, check by the file name to see if they are the same.
if (!jarLocation.getName().equals(info.getPath().getName())) {
// Remove old one and add the new one.
newDescriptor.removeCoprocessor(info.getClassName());
addCoprocessor(newDescriptor, coprocessor, coprocessorJar.getPriority(coprocessor));
}
} else {
// The coprocessor is missing from the table, add it.
addCoprocessor(newDescriptor, coprocessor, coprocessorJar.getPriority(coprocessor));
}
}
// Removes all old coprocessors
Set<String> coprocessorNames = ImmutableSet.copyOf(Iterables.transform(coprocessorJar.coprocessors, CLASS_TO_NAME));
for (String remove : Sets.difference(coprocessorInfo.keySet(), coprocessorNames)) {
newDescriptor.removeCoprocessor(remove);
}
HBaseTableUtil.setVersion(newDescriptor);
HBaseTableUtil.setHBaseVersion(newDescriptor);
HBaseTableUtil.setTablePrefix(newDescriptor, cConf);
LOG.info("Updating table '{}'...", tableId);
TableName tableName = HTableNameConverter.toTableName(cConf.get(Constants.Dataset.TABLE_PREFIX), tableId);
boolean enableTable = false;
try {
ddlExecutor.disableTableIfEnabled(tableName.getNamespaceAsString(), tableName.getQualifierAsString());
enableTable = true;
} catch (TableNotEnabledException e) {
// If the table is in cdap_system namespace enable it regardless so that they can be used later. See CDAP-7324
if (isSystemTable()) {
enableTable = true;
} else {
LOG.debug("Table '{}' was not enabled before update and will not be enabled after update.", tableId);
}
}
tableUtil.modifyTable(ddlExecutor, newDescriptor.build());
if (enableTable) {
LOG.debug("Enabling table '{}'...", tableId);
ddlExecutor.enableTableIfDisabled(tableName.getNamespaceAsString(), tableName.getQualifierAsString());
}
}
LOG.info("Table '{}' update completed.", tableId);
}
use of io.cdap.cdap.data2.dataset2.lib.table.Update in project cdap by caskdata.
the class InMemoryTableServiceTest method testInternalsNotLeaking.
@Test
public void testInternalsNotLeaking() {
// Test that there's no way to break the state of InMemoryTableService by changing parameters of update
// methods (after method invocation) or by changing returned values "in-place"
InMemoryTableService.create("table");
// verify writing thru merge is guarded
NavigableMap<byte[], NavigableMap<byte[], Update>> updates = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
NavigableMap<byte[], Update> rowUpdate = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
byte[] rowParam = new byte[] { 1 };
byte[] columnParam = new byte[] { 2 };
byte[] valParam = new byte[] { 3 };
rowUpdate.put(columnParam, new PutValue(valParam));
updates.put(rowParam, rowUpdate);
InMemoryTableService.merge("table", updates, 1L);
verify123();
updates.remove(rowParam);
rowUpdate.remove(columnParam);
rowParam[0]++;
columnParam[0]++;
valParam[0]++;
verify123();
// verify changing returned data from get doesn't affect the stored data
NavigableMap<byte[], NavigableMap<Long, byte[]>> rowFromGet = InMemoryTableService.get("table", new byte[] { 1 }, new Transaction(1L, 2L, new long[0], new long[0], 1L));
Assert.assertEquals(1, rowFromGet.size());
byte[] columnFromGet = rowFromGet.firstEntry().getKey();
Assert.assertArrayEquals(new byte[] { 2 }, columnFromGet);
byte[] valFromGet = rowFromGet.firstEntry().getValue().get(1L);
Assert.assertArrayEquals(new byte[] { 3 }, valFromGet);
rowFromGet.firstEntry().getValue().remove(1L);
rowFromGet.remove(columnFromGet);
columnFromGet[0]++;
valFromGet[0]++;
verify123();
// verify changing returned data doesn't affect the stored data
NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> fromGetRange = InMemoryTableService.getRowRange("table", null, null, new Transaction(1L, 2L, new long[0], new long[0], 1L));
Assert.assertEquals(1, fromGetRange.size());
byte[] keyFromGetRange = fromGetRange.firstEntry().getKey();
Assert.assertArrayEquals(new byte[] { 1 }, keyFromGetRange);
NavigableMap<byte[], NavigableMap<Long, byte[]>> rowFromGetRange = fromGetRange.get(new byte[] { 1 });
Assert.assertEquals(1, rowFromGetRange.size());
byte[] columnFromGetRange = rowFromGetRange.firstEntry().getKey();
Assert.assertArrayEquals(new byte[] { 2 }, columnFromGetRange);
byte[] valFromGetRange = rowFromGetRange.firstEntry().getValue().get(1L);
Assert.assertArrayEquals(new byte[] { 3 }, valFromGetRange);
rowFromGetRange.firstEntry().getValue().remove(1L);
rowFromGetRange.remove(columnFromGetRange);
fromGetRange.remove(keyFromGetRange);
keyFromGetRange[0]++;
columnFromGetRange[0]++;
valFromGet[0]++;
verify123();
}
use of io.cdap.cdap.data2.dataset2.lib.table.Update in project cdap by caskdata.
the class FactTable method add.
public void add(List<Fact> facts) {
// Simply collecting all rows/cols/values that need to be put to the underlying table.
NavigableMap<byte[], NavigableMap<byte[], Long>> gaugesTable = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
NavigableMap<byte[], NavigableMap<byte[], Long>> incrementsTable = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
// this map is used to store metrics which was COUNTER type, but can be considered as GAUGE, which means it is
// guaranteed to be a new row key in the underlying table.
NavigableMap<byte[], NavigableMap<byte[], Long>> incGaugeTable = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
// this map is used to store the updated timestamp for the cache
Map<FactCacheKey, Long> cacheUpdates = new HashMap<>();
for (Fact fact : facts) {
for (Measurement measurement : fact.getMeasurements()) {
byte[] rowKey = codec.createRowKey(fact.getDimensionValues(), measurement.getName(), fact.getTimestamp());
byte[] column = codec.createColumn(fact.getTimestamp());
if (MeasureType.COUNTER == measurement.getType()) {
if (factCounterCache != null) {
// round to the resolution timestamp
long tsToResolution = fact.getTimestamp() / resolution * resolution;
FactCacheKey cacheKey = new FactCacheKey(fact.getDimensionValues(), measurement.getName());
Long existingTs = factCounterCache.getIfPresent(cacheKey);
// cannot be considered as a gauge, and we should update the incrementsTable
if (existingTs == null || existingTs >= tsToResolution) {
inc(incrementsTable, rowKey, column, measurement.getValue());
// if the current ts is greater than existing ts, then we can consider this metric as a newly seen metric
// and perform gauge on this metric
} else {
inc(incGaugeTable, rowKey, column, measurement.getValue());
}
// should be updated
if (existingTs == null || existingTs < tsToResolution) {
cacheUpdates.compute(cacheKey, (key, oldValue) -> oldValue == null || tsToResolution > oldValue ? tsToResolution : oldValue);
}
} else {
inc(incrementsTable, rowKey, column, measurement.getValue());
}
} else {
gaugesTable.computeIfAbsent(rowKey, k -> Maps.newTreeMap(Bytes.BYTES_COMPARATOR)).put(column, measurement.getValue());
}
}
}
if (factCounterCache != null) {
gaugesTable.putAll(incGaugeTable);
factCounterCache.putAll(cacheUpdates);
}
// todo: replace with single call, to be able to optimize rpcs in underlying table
timeSeriesTable.put(gaugesTable);
timeSeriesTable.increment(incrementsTable);
if (metrics != null) {
metrics.increment(putCountMetric, gaugesTable.size());
metrics.increment(incrementCountMetric, incrementsTable.size());
}
}
Aggregations