use of com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException in project kylo by Teradata.
the class AbstractJdbcDataSetProvider method createHighWaterMark.
/**
* Creates a {@link JdbcHighWaterMark} using the specified high water mark.
*
* <p>The value is initialized using the {@link KyloCatalogClient}.</p>
*/
@Nonnull
@VisibleForTesting
JdbcHighWaterMark createHighWaterMark(@Nonnull final String highWaterMarkKey, @Nonnull final KyloCatalogClient<T> client) {
final JdbcHighWaterMark highWaterMark = new JdbcHighWaterMark(highWaterMarkKey, client);
highWaterMark.setFormatter(new LongToDateTime());
// Set value
final String value = client.getHighWaterMarks().get(highWaterMarkKey);
if (value != null) {
try {
highWaterMark.accumulate(ISODateTimeFormat.dateTimeParser().withZoneUTC().parseMillis(value));
} catch (final IllegalArgumentException e) {
throw new KyloCatalogException("Invalid value for high water mark " + highWaterMarkKey + ": " + value, e);
}
}
return highWaterMark;
}
use of com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException in project kylo by Teradata.
the class SparkDataSetContext method resolveHighWaterMarkPaths.
/**
* Resolves the specified URIs by removing files that have been previously read.
*
* @throws KyloCatalogException if a data set option is invalid
* @throws IOException if an I/O error occurs
*/
@Nonnull
@SuppressWarnings({ "squid:HiddenFieldCheck", "squid:S1192" })
private List<String> resolveHighWaterMarkPaths(@Nonnull final List<String> uris) throws IOException {
// Get configuration
final Configuration conf = delegate.getHadoopConfiguration(client);
final String highWaterMarkName = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.HIGH_WATER_MARK), SparkUtil.getOrElse(getOption(HIGH_WATER_MARK_OPTION), null));
final Job job = Job.getInstance(conf);
final String highWaterMarkValue = client.getHighWaterMarks().get(highWaterMarkName);
if (highWaterMarkValue != null) {
try {
HighWaterMarkInputFormat.setHighWaterMark(job, Long.parseLong(highWaterMarkValue));
} catch (final NumberFormatException e) {
throw new KyloCatalogException("Invalid " + HIGH_WATER_MARK_OPTION + " value: " + highWaterMarkValue, e);
}
}
final String maxFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MAX_FILE_AGE), SparkUtil.getOrElse(getOption(MAX_AGE_OPTION), null));
if (maxFileAge != null) {
try {
HighWaterMarkInputFormat.setMaxFileAge(job, Long.parseLong(maxFileAge));
} catch (final NumberFormatException e) {
throw new KyloCatalogException("Invalid " + MAX_AGE_OPTION + " value: " + maxFileAge, e);
}
}
final String minFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MIN_FILE_AGE), SparkUtil.getOrElse(getOption(MIN_AGE_OPTION), null));
if (minFileAge != null) {
try {
HighWaterMarkInputFormat.setMinFileAge(job, Long.parseLong(minFileAge));
} catch (final NumberFormatException e) {
throw new KyloCatalogException("Invalid " + MIN_AGE_OPTION + " value: " + minFileAge, e);
}
}
// Convert URIs to Paths
final Path[] paths = new Path[uris.size()];
for (int i = 0; i < uris.size(); ++i) {
final Path path = new Path(uris.get(i));
final FileSystem fs = path.getFileSystem(conf);
paths[i] = path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
}
HighWaterMarkInputFormat.setInputPaths(job, paths);
// Get high water mark paths
final HighWaterMarkInputFormat inputFormat = new HighWaterMarkInputFormat();
final List<FileStatus> files = inputFormat.listStatus(job);
client.setHighWaterMarks(Collections.singletonMap(highWaterMarkName, Long.toString(inputFormat.getLastHighWaterMark())));
// Return resolved paths
final List<String> resolvedPaths = new ArrayList<>(files.size());
if (files.isEmpty()) {
resolvedPaths.add("file:/dev/null");
} else {
for (final FileStatus file : files) {
resolvedPaths.add(file.getPath().toString());
}
}
return resolvedPaths;
}
use of com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException in project kylo by Teradata.
the class DefaultKyloCatalogReader method load.
@Nonnull
@Override
public T load() {
Preconditions.checkNotNull(options.getFormat(), "Format must be defined");
// Find data set provider
final Option<DataSetProvider<T>> provider = client.getDataSetProvider(options.getFormat());
if (!provider.isDefined()) {
throw new KyloCatalogException("Format is not supported: " + options.getFormat());
}
// Load data set
try {
return resourceLoader.runWithThreadContext(new Callable<T>() {
@Override
public T call() {
return provider.get().read(client, options);
}
});
} catch (final Exception e) {
throw new KyloCatalogException("Unable to load '" + options.getFormat() + "' source: " + e, e);
}
}
use of com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException in project kylo by Teradata.
the class AbstractKyloCatalogClient method write.
@Nonnull
@Override
public KyloCatalogWriter<T> write(@Nonnull final T source, @Nonnull final String targetId) {
final DataSetTemplate dataSet = (dataSets != null) ? dataSets.get(targetId) : null;
if (dataSet != null) {
final DefaultKyloCatalogWriter<T> writer = new DefaultKyloCatalogWriter<>(this, hadoopConfiguration, resourceLoader, source);
writer.dataSet(dataSet);
return writer;
} else {
throw new KyloCatalogException("Data set does not exist: " + targetId);
}
}
use of com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException in project kylo by Teradata.
the class AbstractJdbcDataSetProvider method updateHighWaterMark.
/**
* Scans the specified field and updates the specified high water mark.
*/
@Nonnull
@VisibleForTesting
T updateHighWaterMark(@Nonnull final T dataSet, @Nonnull final String fieldName, @Nonnull final JdbcHighWaterMark highWaterMark, @Nonnull final KyloCatalogClient<T> client) {
// Determine function to convert column to Long
final DataType fieldType = schema(dataSet).apply(fieldName).dataType();
final Function1<?, Long> toLong;
if (fieldType == DataTypes.DateType) {
toLong = new DateToLong();
} else if (fieldType == DataTypes.TimestampType) {
toLong = new TimestampToLong();
} else {
throw new KyloCatalogException("Unsupported column type for high water mark: " + fieldType);
}
// Create UDF and apply to field
final String accumulableId = (highWaterMark.getName() != null) ? highWaterMark.getName() : UUID.randomUUID().toString();
final Accumulable<JdbcHighWaterMark, Long> accumulable = accumulable(highWaterMark, accumulableId, new JdbcHighWaterMarkAccumulableParam(), client);
final JdbcHighWaterMarkVisitor<?> visitor = new JdbcHighWaterMarkVisitor<>(accumulable, toLong);
return map(dataSet, fieldName, visitor, fieldType);
}
Aggregations