Search in sources :

Example 26 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieAvroDataBlock method decompress.

private static String decompress(byte[] bytes) {
    InputStream in = new InflaterInputStream(new ByteArrayInputStream(bytes));
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    try {
        byte[] buffer = new byte[8192];
        int len;
        while ((len = in.read(buffer)) > 0) {
            baos.write(buffer, 0, len);
        }
        return new String(baos.toByteArray(), StandardCharsets.UTF_8);
    } catch (IOException e) {
        throw new HoodieIOException("IOException while decompressing text", e);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) ByteArrayInputStream(java.io.ByteArrayInputStream) DataInputStream(java.io.DataInputStream) InflaterInputStream(java.util.zip.InflaterInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) SizeAwareDataInputStream(org.apache.hudi.common.fs.SizeAwareDataInputStream) InputStream(java.io.InputStream) InflaterInputStream(java.util.zip.InflaterInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 27 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieDeleteBlock method getKeysToDelete.

public HoodieKey[] getKeysToDelete() {
    try {
        if (keysToDelete == null) {
            if (!getContent().isPresent() && readBlockLazily) {
                // read content from disk
                inflate();
            }
            SizeAwareDataInputStream dis = new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(getContent().get())));
            int version = dis.readInt();
            int dataLength = dis.readInt();
            byte[] data = new byte[dataLength];
            dis.readFully(data);
            this.keysToDelete = SerializationUtils.<HoodieKey[]>deserialize(data);
            deflate();
        }
        return keysToDelete;
    } catch (IOException io) {
        throw new HoodieIOException("Unable to generate keys to delete from block content", io);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) ByteArrayInputStream(java.io.ByteArrayInputStream) HoodieKey(org.apache.hudi.common.model.HoodieKey) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) DataInputStream(java.io.DataInputStream) SizeAwareDataInputStream(org.apache.hudi.common.fs.SizeAwareDataInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) SizeAwareDataInputStream(org.apache.hudi.common.fs.SizeAwareDataInputStream)

Example 28 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class TableOptionProperties method loadFromProperties.

/**
 * Read table options map from the given table base path.
 */
public static Map<String, String> loadFromProperties(String basePath, Configuration hadoopConf) {
    Path propertiesFilePath = getPropertiesFilePath(basePath);
    Map<String, String> options = new HashMap<>();
    Properties props = new Properties();
    FileSystem fs = FSUtils.getFs(basePath, hadoopConf);
    try (FSDataInputStream inputStream = fs.open(propertiesFilePath)) {
        props.load(inputStream);
        for (final String name : props.stringPropertyNames()) {
            options.put(name, props.getProperty(name));
        }
    } catch (IOException e) {
        throw new HoodieIOException(String.format("Could not load table option properties from %s", propertiesFilePath), e);
    }
    LOG.info(String.format("Loading table option properties from %s success.", propertiesFilePath));
    return options;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HashMap(java.util.HashMap) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Properties(java.util.Properties)

Example 29 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class SchemaRegistryProvider method getTargetSchema.

@Override
public Schema getTargetSchema() {
    String registryUrl = config.getString(Config.SRC_SCHEMA_REGISTRY_URL_PROP);
    String targetRegistryUrl = config.getString(Config.TARGET_SCHEMA_REGISTRY_URL_PROP, registryUrl);
    try {
        return getSchema(targetRegistryUrl);
    } catch (IOException ioe) {
        throw new HoodieIOException("Error reading target schema from registry :" + registryUrl, ioe);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 30 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class TestOrcBootstrap method generateInputBatch.

private static JavaRDD<HoodieRecord> generateInputBatch(JavaSparkContext jsc, List<Pair<String, List<HoodieFileStatus>>> partitionPaths, Schema writerSchema) {
    List<Pair<String, Path>> fullFilePathsWithPartition = partitionPaths.stream().flatMap(p -> p.getValue().stream().map(x -> Pair.of(p.getKey(), FileStatusUtils.toPath(x.getPath())))).collect(Collectors.toList());
    return jsc.parallelize(fullFilePathsWithPartition.stream().flatMap(p -> {
        try {
            Configuration conf = jsc.hadoopConfiguration();
            AvroReadSupport.setAvroReadSchema(conf, writerSchema);
            Reader orcReader = OrcFile.createReader(p.getValue(), new OrcFile.ReaderOptions(jsc.hadoopConfiguration()));
            RecordReader recordReader = orcReader.rows();
            TypeDescription orcSchema = orcReader.getSchema();
            Schema avroSchema = AvroOrcUtils.createAvroSchemaWithDefaultValue(orcSchema, "test_orc_record", null, true);
            Iterator<GenericRecord> recIterator = new OrcReaderIterator(recordReader, avroSchema, orcSchema);
            return StreamSupport.stream(Spliterators.spliteratorUnknownSize(recIterator, 0), false).map(gr -> {
                try {
                    String key = gr.get("_row_key").toString();
                    String pPath = p.getKey();
                    return new HoodieAvroRecord<>(new HoodieKey(key, pPath), new RawTripTestPayload(gr.toString(), key, pPath, HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA));
                } catch (IOException e) {
                    throw new HoodieIOException(e.getMessage(), e);
                }
            });
        } catch (IOException ioe) {
            throw new HoodieIOException(ioe.getMessage(), ioe);
        }
    }).collect(Collectors.toList()));
}
Also used : BootstrapUtils(org.apache.hudi.table.action.bootstrap.BootstrapUtils) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) BootstrapMode(org.apache.hudi.client.bootstrap.BootstrapMode) Spliterators(java.util.Spliterators) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Random(java.util.Random) HoodieParquetRealtimeInputFormat(org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat) DataSourceWriteOptions(org.apache.hudi.DataSourceWriteOptions) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) NonpartitionedKeyGenerator(org.apache.hudi.keygen.NonpartitionedKeyGenerator) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) Tag(org.junit.jupiter.api.Tag) DataTypes(org.apache.spark.sql.types.DataTypes) Schema(org.apache.avro.Schema) IndexType(org.apache.hudi.index.HoodieIndex.IndexType) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) MetadataOnlyBootstrapModeSelector(org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector) SimpleKeyGenerator(org.apache.hudi.keygen.SimpleKeyGenerator) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) List(java.util.List) TempDir(org.junit.jupiter.api.io.TempDir) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) FullRecordBootstrapDataProvider(org.apache.hudi.client.bootstrap.FullRecordBootstrapDataProvider) HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) IntStream(java.util.stream.IntStream) AvroReadSupport(org.apache.parquet.avro.AvroReadSupport) AvroOrcUtils(org.apache.hudi.common.util.AvroOrcUtils) Dataset(org.apache.spark.sql.Dataset) OrcReaderIterator(org.apache.hudi.common.util.OrcReaderIterator) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) OrcFile(org.apache.orc.OrcFile) ArrayList(java.util.ArrayList) Reader(org.apache.orc.Reader) Collectors.mapping(java.util.stream.Collectors.mapping) StreamSupport(java.util.stream.StreamSupport) HoodieParquetInputFormat(org.apache.hudi.hadoop.HoodieParquetInputFormat) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) FullRecordBootstrapModeSelector(org.apache.hudi.client.bootstrap.selector.FullRecordBootstrapModeSelector) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) SparkSession(org.apache.spark.sql.SparkSession) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) SaveMode(org.apache.spark.sql.SaveMode) BootstrapModeSelector(org.apache.hudi.client.bootstrap.selector.BootstrapModeSelector) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) Column(org.apache.spark.sql.Column) RecordReader(org.apache.orc.RecordReader) SQLContext(org.apache.spark.sql.SQLContext) TypeDescription(org.apache.orc.TypeDescription) IOException(java.io.IOException) Row(org.apache.spark.sql.Row) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) FileCreateUtils(org.apache.hudi.common.testutils.FileCreateUtils) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) JobConf(org.apache.hadoop.mapred.JobConf) AfterEach(org.junit.jupiter.api.AfterEach) Collectors.toList(java.util.stream.Collectors.toList) UDF1(org.apache.spark.sql.api.java.UDF1) HoodieKey(org.apache.hudi.common.model.HoodieKey) FileStatusUtils(org.apache.hudi.common.bootstrap.FileStatusUtils) HoodieIOException(org.apache.hudi.exception.HoodieIOException) PartitionPathEncodeUtils(org.apache.hudi.common.util.PartitionPathEncodeUtils) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) org.apache.spark.sql.functions.callUDF(org.apache.spark.sql.functions.callUDF) Pair(org.apache.hudi.common.util.collection.Pair) HoodieBootstrapConfig(org.apache.hudi.config.HoodieBootstrapConfig) Configuration(org.apache.hadoop.conf.Configuration) RecordReader(org.apache.orc.RecordReader) Schema(org.apache.avro.Schema) Reader(org.apache.orc.Reader) RecordReader(org.apache.orc.RecordReader) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) HoodieIOException(org.apache.hudi.exception.HoodieIOException) OrcReaderIterator(org.apache.hudi.common.util.OrcReaderIterator) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) OrcFile(org.apache.orc.OrcFile) HoodieKey(org.apache.hudi.common.model.HoodieKey) TypeDescription(org.apache.orc.TypeDescription) GenericRecord(org.apache.avro.generic.GenericRecord) Pair(org.apache.hudi.common.util.collection.Pair)

Aggregations

HoodieIOException (org.apache.hudi.exception.HoodieIOException)139 IOException (java.io.IOException)127 Path (org.apache.hadoop.fs.Path)45 List (java.util.List)31 ArrayList (java.util.ArrayList)30 Option (org.apache.hudi.common.util.Option)27 Collectors (java.util.stream.Collectors)26 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)26 Pair (org.apache.hudi.common.util.collection.Pair)25 LogManager (org.apache.log4j.LogManager)25 Logger (org.apache.log4j.Logger)25 Map (java.util.Map)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 GenericRecord (org.apache.avro.generic.GenericRecord)19 HashSet (java.util.HashSet)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 Set (java.util.Set)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)17 HoodieException (org.apache.hudi.exception.HoodieException)17