Search in sources :

Example 1 with KyloCatalogException

use of com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException in project kylo by Teradata.

the class AbstractJdbcDataSetProvider method createHighWaterMark.

/**
 * Creates a {@link JdbcHighWaterMark} using the specified high water mark.
 *
 * <p>The value is initialized using the {@link KyloCatalogClient}.</p>
 */
@Nonnull
@VisibleForTesting
JdbcHighWaterMark createHighWaterMark(@Nonnull final String highWaterMarkKey, @Nonnull final KyloCatalogClient<T> client) {
    final JdbcHighWaterMark highWaterMark = new JdbcHighWaterMark(highWaterMarkKey, client);
    highWaterMark.setFormatter(new LongToDateTime());
    // Set value
    final String value = client.getHighWaterMarks().get(highWaterMarkKey);
    if (value != null) {
        try {
            highWaterMark.accumulate(ISODateTimeFormat.dateTimeParser().withZoneUTC().parseMillis(value));
        } catch (final IllegalArgumentException e) {
            throw new KyloCatalogException("Invalid value for high water mark " + highWaterMarkKey + ": " + value, e);
        }
    }
    return highWaterMark;
}
Also used : JdbcHighWaterMark(com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMark) KyloCatalogException(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Nonnull(javax.annotation.Nonnull)

Example 2 with KyloCatalogException

use of com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException in project kylo by Teradata.

the class SparkDataSetContext method resolveHighWaterMarkPaths.

/**
 * Resolves the specified URIs by removing files that have been previously read.
 *
 * @throws KyloCatalogException if a data set option is invalid
 * @throws IOException          if an I/O error occurs
 */
@Nonnull
@SuppressWarnings({ "squid:HiddenFieldCheck", "squid:S1192" })
private List<String> resolveHighWaterMarkPaths(@Nonnull final List<String> uris) throws IOException {
    // Get configuration
    final Configuration conf = delegate.getHadoopConfiguration(client);
    final String highWaterMarkName = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.HIGH_WATER_MARK), SparkUtil.getOrElse(getOption(HIGH_WATER_MARK_OPTION), null));
    final Job job = Job.getInstance(conf);
    final String highWaterMarkValue = client.getHighWaterMarks().get(highWaterMarkName);
    if (highWaterMarkValue != null) {
        try {
            HighWaterMarkInputFormat.setHighWaterMark(job, Long.parseLong(highWaterMarkValue));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException("Invalid " + HIGH_WATER_MARK_OPTION + " value: " + highWaterMarkValue, e);
        }
    }
    final String maxFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MAX_FILE_AGE), SparkUtil.getOrElse(getOption(MAX_AGE_OPTION), null));
    if (maxFileAge != null) {
        try {
            HighWaterMarkInputFormat.setMaxFileAge(job, Long.parseLong(maxFileAge));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException("Invalid " + MAX_AGE_OPTION + " value: " + maxFileAge, e);
        }
    }
    final String minFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MIN_FILE_AGE), SparkUtil.getOrElse(getOption(MIN_AGE_OPTION), null));
    if (minFileAge != null) {
        try {
            HighWaterMarkInputFormat.setMinFileAge(job, Long.parseLong(minFileAge));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException("Invalid " + MIN_AGE_OPTION + " value: " + minFileAge, e);
        }
    }
    // Convert URIs to Paths
    final Path[] paths = new Path[uris.size()];
    for (int i = 0; i < uris.size(); ++i) {
        final Path path = new Path(uris.get(i));
        final FileSystem fs = path.getFileSystem(conf);
        paths[i] = path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
    }
    HighWaterMarkInputFormat.setInputPaths(job, paths);
    // Get high water mark paths
    final HighWaterMarkInputFormat inputFormat = new HighWaterMarkInputFormat();
    final List<FileStatus> files = inputFormat.listStatus(job);
    client.setHighWaterMarks(Collections.singletonMap(highWaterMarkName, Long.toString(inputFormat.getLastHighWaterMark())));
    // Return resolved paths
    final List<String> resolvedPaths = new ArrayList<>(files.size());
    if (files.isEmpty()) {
        resolvedPaths.add("file:/dev/null");
    } else {
        for (final FileStatus file : files) {
            resolvedPaths.add(file.getPath().toString());
        }
    }
    return resolvedPaths;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) KyloCatalogException(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException) ArrayList(java.util.ArrayList) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) Nonnull(javax.annotation.Nonnull)

Example 3 with KyloCatalogException

use of com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException in project kylo by Teradata.

the class DefaultKyloCatalogReader method load.

@Nonnull
@Override
public T load() {
    Preconditions.checkNotNull(options.getFormat(), "Format must be defined");
    // Find data set provider
    final Option<DataSetProvider<T>> provider = client.getDataSetProvider(options.getFormat());
    if (!provider.isDefined()) {
        throw new KyloCatalogException("Format is not supported: " + options.getFormat());
    }
    // Load data set
    try {
        return resourceLoader.runWithThreadContext(new Callable<T>() {

            @Override
            public T call() {
                return provider.get().read(client, options);
            }
        });
    } catch (final Exception e) {
        throw new KyloCatalogException("Unable to load '" + options.getFormat() + "' source: " + e, e);
    }
}
Also used : KyloCatalogException(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException) DataSetProvider(com.thinkbiganalytics.kylo.catalog.spi.DataSetProvider) KyloCatalogException(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException) Nonnull(javax.annotation.Nonnull)

Example 4 with KyloCatalogException

use of com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException in project kylo by Teradata.

the class AbstractKyloCatalogClient method write.

@Nonnull
@Override
public KyloCatalogWriter<T> write(@Nonnull final T source, @Nonnull final String targetId) {
    final DataSetTemplate dataSet = (dataSets != null) ? dataSets.get(targetId) : null;
    if (dataSet != null) {
        final DefaultKyloCatalogWriter<T> writer = new DefaultKyloCatalogWriter<>(this, hadoopConfiguration, resourceLoader, source);
        writer.dataSet(dataSet);
        return writer;
    } else {
        throw new KyloCatalogException("Data set does not exist: " + targetId);
    }
}
Also used : KyloCatalogException(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException) DataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate) Nonnull(javax.annotation.Nonnull)

Example 5 with KyloCatalogException

use of com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException in project kylo by Teradata.

the class AbstractJdbcDataSetProvider method updateHighWaterMark.

/**
 * Scans the specified field and updates the specified high water mark.
 */
@Nonnull
@VisibleForTesting
T updateHighWaterMark(@Nonnull final T dataSet, @Nonnull final String fieldName, @Nonnull final JdbcHighWaterMark highWaterMark, @Nonnull final KyloCatalogClient<T> client) {
    // Determine function to convert column to Long
    final DataType fieldType = schema(dataSet).apply(fieldName).dataType();
    final Function1<?, Long> toLong;
    if (fieldType == DataTypes.DateType) {
        toLong = new DateToLong();
    } else if (fieldType == DataTypes.TimestampType) {
        toLong = new TimestampToLong();
    } else {
        throw new KyloCatalogException("Unsupported column type for high water mark: " + fieldType);
    }
    // Create UDF and apply to field
    final String accumulableId = (highWaterMark.getName() != null) ? highWaterMark.getName() : UUID.randomUUID().toString();
    final Accumulable<JdbcHighWaterMark, Long> accumulable = accumulable(highWaterMark, accumulableId, new JdbcHighWaterMarkAccumulableParam(), client);
    final JdbcHighWaterMarkVisitor<?> visitor = new JdbcHighWaterMarkVisitor<>(accumulable, toLong);
    return map(dataSet, fieldName, visitor, fieldType);
}
Also used : JdbcHighWaterMarkVisitor(com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMarkVisitor) JdbcHighWaterMark(com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMark) KyloCatalogException(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException) DataType(org.apache.spark.sql.types.DataType) JdbcHighWaterMarkAccumulableParam(com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMarkAccumulableParam) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Nonnull(javax.annotation.Nonnull)

Aggregations

KyloCatalogException (com.thinkbiganalytics.kylo.catalog.api.KyloCatalogException)6 Nonnull (javax.annotation.Nonnull)6 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 DataSetTemplate (com.thinkbiganalytics.kylo.catalog.rest.model.DataSetTemplate)2 JdbcHighWaterMark (com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMark)2 JdbcHighWaterMarkAccumulableParam (com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMarkAccumulableParam)1 JdbcHighWaterMarkVisitor (com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMarkVisitor)1 DataSetProvider (com.thinkbiganalytics.kylo.catalog.spi.DataSetProvider)1 ArrayList (java.util.ArrayList)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 Job (org.apache.hadoop.mapreduce.Job)1 DataType (org.apache.spark.sql.types.DataType)1