Search in sources :

Example 26 with WritableComparable

use of org.apache.hadoop.io.WritableComparable in project hive by apache.

the class RCFileMapReduceOutputFormat method getRecordWriter.

/* (non-Javadoc)
  * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext)
  */
@Override
public org.apache.hadoop.mapreduce.RecordWriter<WritableComparable<?>, BytesRefArrayWritable> getRecordWriter(TaskAttemptContext task) throws IOException, InterruptedException {
    // FileOutputFormat.getWorkOutputPath takes TaskInputOutputContext instead of
    // TaskAttemptContext, so can't use that here
    FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(task);
    Path outputPath = committer.getWorkPath();
    FileSystem fs = outputPath.getFileSystem(task.getConfiguration());
    if (!fs.exists(outputPath)) {
        fs.mkdirs(outputPath);
    }
    Path file = getDefaultWorkFile(task, "");
    CompressionCodec codec = null;
    if (getCompressOutput(task)) {
        Class<?> codecClass = getOutputCompressorClass(task, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, task.getConfiguration());
    }
    final RCFile.Writer out = new RCFile.Writer(fs, task.getConfiguration(), file, task, codec);
    return new RecordWriter<WritableComparable<?>, BytesRefArrayWritable>() {

        /* (non-Javadoc)
      * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object)
      */
        @Override
        public void write(WritableComparable<?> key, BytesRefArrayWritable value) throws IOException {
            out.append(value);
        }

        /* (non-Javadoc)
      * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext)
      */
        @Override
        public void close(TaskAttemptContext task) throws IOException, InterruptedException {
            out.close();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) RCFile(org.apache.hadoop.hive.ql.io.RCFile) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) WritableComparable(org.apache.hadoop.io.WritableComparable) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) FileSystem(org.apache.hadoop.fs.FileSystem) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter)

Example 27 with WritableComparable

use of org.apache.hadoop.io.WritableComparable in project nutch by apache.

the class SegmentReader method getSeqRecords.

private List<Writable> getSeqRecords(Path dir, Text key) throws Exception {
    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(dir, getConf());
    ArrayList<Writable> res = new ArrayList<>();
    Class<?> keyClass = readers[0].getKeyClass();
    Class<?> valueClass = readers[0].getValueClass();
    if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
        throw new IOException("Incompatible key (" + keyClass.getName() + ")");
    WritableComparable aKey = (WritableComparable) keyClass.newInstance();
    Writable value = (Writable) valueClass.newInstance();
    for (int i = 0; i < readers.length; i++) {
        while (readers[i].next(aKey, value)) {
            if (aKey.equals(key)) {
                res.add(value);
                value = (Writable) valueClass.newInstance();
            }
        }
        readers[i].close();
    }
    return res;
}
Also used : WritableComparable(org.apache.hadoop.io.WritableComparable) ArrayList(java.util.ArrayList) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) Writable(org.apache.hadoop.io.Writable) NutchWritable(org.apache.nutch.crawl.NutchWritable) IOException(java.io.IOException)

Example 28 with WritableComparable

use of org.apache.hadoop.io.WritableComparable in project incubator-systemml by apache.

the class SamplingSortMRInputFormat method writePartitionFile.

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 *
 * @param conf the job to sample
 * @param partFile where to write the output file to
 * @return index value
 * @throws IOException if something goes wrong
 * @throws InstantiationException if InstantiationException occurs
 * @throws IllegalAccessException if IllegalAccessException occurs
 */
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile) throws IOException, InstantiationException, IllegalAccessException {
    SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
    Sampler sampler = new Sampler();
    Class<? extends WritableComparable> targetKeyClass;
    targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS, WritableComparable.class);
    // get input converter information
    int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
    int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);
    // indicate whether the matrix value in this mapper is a matrix cell or a matrix block
    int partitions = conf.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    // take N samples from different parts of the input
    int totalcount = 0;
    for (int i = 0; i < samples; i++) {
        SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat.getRecordReader(splits[sampleStep * i], conf, null);
        int count = 0;
        WritableComparable key = (WritableComparable) reader.createKey();
        Writable value = (Writable) reader.createValue();
        while (reader.next(key, value) && count < recordsPerSample) {
            Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
            inputConverter.setBlockSize(brlen, bclen);
            inputConverter.convert(key, value);
            while (inputConverter.hasNext()) {
                Pair pair = inputConverter.next();
                if (pair.getKey() instanceof DoubleWritable) {
                    sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
                } else if (pair.getValue() instanceof MatrixCell) {
                    sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
                } else
                    throw new IOException("SamplingSortMRInputFormat unsupported key/value class: " + pair.getKey().getClass() + ":" + pair.getValue().getClass());
                count++;
            }
            key = (WritableComparable) reader.createKey();
            value = (Writable) reader.createValue();
        }
        totalcount += count;
    }
    if (// empty input files
    totalcount == 0)
        sampler.addValue(new DoubleWritable(0));
    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }
    // note: key value always double/null as expected by partitioner
    SequenceFile.Writer writer = null;
    int index0 = -1;
    try {
        writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class, NullWritable.class);
        NullWritable nullValue = NullWritable.get();
        int i = 0;
        boolean lessthan0 = true;
        for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
            writer.append(splitValue, nullValue);
            if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
                index0 = i;
                lessthan0 = false;
            }
            i++;
        }
        if (lessthan0)
            index0 = partitions - 1;
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
    return index0;
}
Also used : SequenceFileRecordReader(org.apache.hadoop.mapred.SequenceFileRecordReader) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) IOException(java.io.IOException) NullWritable(org.apache.hadoop.io.NullWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) WritableComparable(org.apache.hadoop.io.WritableComparable) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Converter(org.apache.sysml.runtime.matrix.data.Converter) InputSplit(org.apache.hadoop.mapred.InputSplit) Pair(org.apache.sysml.runtime.matrix.data.Pair)

Example 29 with WritableComparable

use of org.apache.hadoop.io.WritableComparable in project systemml by apache.

the class SamplingSortMRInputFormat method writePartitionFile.

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 *
 * @param conf the job to sample
 * @param partFile where to write the output file to
 * @return index value
 * @throws IOException if something goes wrong
 * @throws InstantiationException if InstantiationException occurs
 * @throws IllegalAccessException if IllegalAccessException occurs
 */
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile) throws IOException, InstantiationException, IllegalAccessException {
    SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
    Sampler sampler = new Sampler();
    Class<? extends WritableComparable> targetKeyClass;
    targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS, WritableComparable.class);
    // get input converter information
    int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
    int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);
    // indicate whether the matrix value in this mapper is a matrix cell or a matrix block
    int partitions = conf.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    // take N samples from different parts of the input
    int totalcount = 0;
    for (int i = 0; i < samples; i++) {
        SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat.getRecordReader(splits[sampleStep * i], conf, null);
        int count = 0;
        WritableComparable key = (WritableComparable) reader.createKey();
        Writable value = (Writable) reader.createValue();
        while (reader.next(key, value) && count < recordsPerSample) {
            Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
            inputConverter.setBlockSize(brlen, bclen);
            inputConverter.convert(key, value);
            while (inputConverter.hasNext()) {
                Pair pair = inputConverter.next();
                if (pair.getKey() instanceof DoubleWritable) {
                    sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
                } else if (pair.getValue() instanceof MatrixCell) {
                    sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
                } else
                    throw new IOException("SamplingSortMRInputFormat unsupported key/value class: " + pair.getKey().getClass() + ":" + pair.getValue().getClass());
                count++;
            }
            key = (WritableComparable) reader.createKey();
            value = (Writable) reader.createValue();
        }
        totalcount += count;
    }
    if (// empty input files
    totalcount == 0)
        sampler.addValue(new DoubleWritable(0));
    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }
    // note: key value always double/null as expected by partitioner
    SequenceFile.Writer writer = null;
    int index0 = -1;
    try {
        writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class, NullWritable.class);
        NullWritable nullValue = NullWritable.get();
        int i = 0;
        boolean lessthan0 = true;
        for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
            writer.append(splitValue, nullValue);
            if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
                index0 = i;
                lessthan0 = false;
            }
            i++;
        }
        if (lessthan0)
            index0 = partitions - 1;
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
    return index0;
}
Also used : SequenceFileRecordReader(org.apache.hadoop.mapred.SequenceFileRecordReader) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) IOException(java.io.IOException) NullWritable(org.apache.hadoop.io.NullWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) WritableComparable(org.apache.hadoop.io.WritableComparable) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Converter(org.apache.sysml.runtime.matrix.data.Converter) InputSplit(org.apache.hadoop.mapred.InputSplit) Pair(org.apache.sysml.runtime.matrix.data.Pair)

Example 30 with WritableComparable

use of org.apache.hadoop.io.WritableComparable in project presto by prestodb.

the class HiveUtil method createRecordReader.

public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, Map<String, String> customSplitInfo) {
    // determine which hive columns we will read
    List<HiveColumnHandle> readColumns = ImmutableList.copyOf(filter(columns, column -> column.getColumnType() == REGULAR));
    List<Integer> readHiveColumnIndexes = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex));
    // Tell hive the columns we would like to read, this lets hive optimize reading column oriented files
    setReadColumns(configuration, readHiveColumnIndexes);
    // Only propagate serialization schema configs by default
    Predicate<String> schemaFilter = schemaProperty -> schemaProperty.startsWith("serialization.");
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, getInputFormatName(schema), true);
    JobConf jobConf = toJobConf(configuration);
    FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);
    if (!customSplitInfo.isEmpty() && isHudiRealtimeSplit(customSplitInfo)) {
        fileSplit = recreateSplitWithCustomInfo(fileSplit, customSplitInfo);
        // Add additional column information for record reader
        List<String> readHiveColumnNames = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getName));
        jobConf.set(READ_COLUMN_NAMES_CONF_STR, Joiner.on(',').join(readHiveColumnNames));
        // Remove filter when using customSplitInfo as the record reader requires complete schema configs
        schemaFilter = schemaProperty -> true;
    }
    schema.stringPropertyNames().stream().filter(schemaFilter).forEach(name -> jobConf.set(name, schema.getProperty(name)));
    // add Airlift LZO and LZOP to head of codecs list so as to not override existing entries
    List<String> codecs = newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(jobConf.get("io.compression.codecs", "")));
    if (!codecs.contains(LzoCodec.class.getName())) {
        codecs.add(0, LzoCodec.class.getName());
    }
    if (!codecs.contains(LzopCodec.class.getName())) {
        codecs.add(0, LzopCodec.class.getName());
    }
    jobConf.set("io.compression.codecs", codecs.stream().collect(joining(",")));
    try {
        RecordReader<WritableComparable, Writable> recordReader = (RecordReader<WritableComparable, Writable>) inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);
        int headerCount = getHeaderCount(schema);
        // Only skip header rows when the split is at the beginning of the file
        if (start == 0 && headerCount > 0) {
            Utilities.skipHeader(recordReader, headerCount, recordReader.createKey(), recordReader.createValue());
        }
        int footerCount = getFooterCount(schema);
        if (footerCount > 0) {
            recordReader = new FooterAwareRecordReader<>(recordReader, footerCount, jobConf);
        }
        return recordReader;
    } catch (IOException e) {
        if (e instanceof TextLineLengthLimitExceededException) {
            throw new PrestoException(HIVE_BAD_DATA, "Line too long in text file: " + path, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length, getInputFormatName(schema), firstNonNull(e.getMessage(), e.getClass().getName())), e);
    }
}
Also used : HIVE_TABLE_BUCKETING_IS_IGNORED(com.facebook.presto.hive.HiveErrorCode.HIVE_TABLE_BUCKETING_IS_IGNORED) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) DECIMAL_TYPE_NAME(org.apache.hadoop.hive.serde.serdeConstants.DECIMAL_TYPE_NAME) GENERIC_INTERNAL_ERROR(com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) Writable(org.apache.hadoop.io.Writable) Short.parseShort(java.lang.Short.parseShort) TypeSignature(com.facebook.presto.common.type.TypeSignature) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) MAX_PARTITION_KEY_COLUMN_INDEX(com.facebook.presto.hive.HiveColumnHandle.MAX_PARTITION_KEY_COLUMN_INDEX) BigDecimal(java.math.BigDecimal) FileSplit(org.apache.hadoop.mapred.FileSplit) Matcher(java.util.regex.Matcher) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveColumnHandle.isFileModifiedTimeColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isFileModifiedTimeColumnHandle) Double.parseDouble(java.lang.Double.parseDouble) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) PageInputFormat(com.facebook.presto.hive.pagefile.PageInputFormat) LzoCodec(io.airlift.compress.lzo.LzoCodec) NullableValue(com.facebook.presto.common.predicate.NullableValue) HIVE_INVALID_METADATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HIVE_SERDE_NOT_FOUND(com.facebook.presto.hive.HiveErrorCode.HIVE_SERDE_NOT_FOUND) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) HiveColumnHandle.bucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle) Collectors.joining(java.util.stream.Collectors.joining) InvocationTargetException(java.lang.reflect.InvocationTargetException) TypeUtils.isEnumType(com.facebook.presto.common.type.TypeUtils.isEnumType) UncheckedIOException(java.io.UncheckedIOException) Decimals.isShortDecimal(com.facebook.presto.common.type.Decimals.isShortDecimal) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) FooterAwareRecordReader(com.facebook.presto.hive.util.FooterAwareRecordReader) Predicate(com.google.common.base.Predicate) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) DateTimePrinter(org.joda.time.format.DateTimePrinter) RecordReader(org.apache.hadoop.mapred.RecordReader) Iterables.filter(com.google.common.collect.Iterables.filter) Joiner(com.google.common.base.Joiner) StandardTypes(com.facebook.presto.common.type.StandardTypes) DecimalType(com.facebook.presto.common.type.DecimalType) Table(com.facebook.presto.hive.metastore.Table) Slice(io.airlift.slice.Slice) DateTimeFormatterBuilder(org.joda.time.format.DateTimeFormatterBuilder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TypeSignatureParameter(com.facebook.presto.common.type.TypeSignatureParameter) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) WritableComparable(org.apache.hadoop.io.WritableComparable) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) Byte.parseByte(java.lang.Byte.parseByte) ConfigurationUtils.toJobConf(com.facebook.presto.hive.util.ConfigurationUtils.toJobConf) COLLECTION_DELIM(org.apache.hadoop.hive.serde.serdeConstants.COLLECTION_DELIM) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) HIVE_BAD_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA) CharType(com.facebook.presto.common.type.CharType) Nullable(javax.annotation.Nullable) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) Properties(java.util.Properties) HiveColumnHandle.fileSizeColumnHandle(com.facebook.presto.hive.HiveColumnHandle.fileSizeColumnHandle) Reporter(org.apache.hadoop.mapred.Reporter) HiveColumnHandle.pathColumnHandle(com.facebook.presto.hive.HiveColumnHandle.pathColumnHandle) IOException(java.io.IOException) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Field(java.lang.reflect.Field) HiveColumnHandle.isBucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isBucketColumnHandle) Chars.trimTrailingSpaces(com.facebook.presto.common.type.Chars.trimTrailingSpaces) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) HiveColumnHandle.fileModifiedTimeColumnHandle(com.facebook.presto.hive.HiveColumnHandle.fileModifiedTimeColumnHandle) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) CustomSplitConversionUtils.recreateSplitWithCustomInfo(com.facebook.presto.hive.util.CustomSplitConversionUtils.recreateSplitWithCustomInfo) TextLineLengthLimitExceededException(com.facebook.presto.hadoop.TextLineLengthLimitExceededException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) Long.parseLong(java.lang.Long.parseLong) ReflectionUtils(org.apache.hadoop.util.ReflectionUtils) ZstdInputStreamNoFinalizer(com.github.luben.zstd.ZstdInputStreamNoFinalizer) DateTimeParser(org.joda.time.format.DateTimeParser) HIVE_INVALID_VIEW_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_VIEW_DATA) HiveColumnHandle.isFileSizeColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isFileSizeColumnHandle) HoodieParquetRealtimeInputFormat(org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat) READ_COLUMN_NAMES_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR) Float.parseFloat(java.lang.Float.parseFloat) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ByteArrayInputStream(java.io.ByteArrayInputStream) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) ConfigurationUtils.copy(com.facebook.presto.hive.util.ConfigurationUtils.copy) Splitter(com.google.common.base.Splitter) Method(java.lang.reflect.Method) SliceUtf8(io.airlift.slice.SliceUtf8) DateTimeFormat(org.joda.time.format.DateTimeFormat) ISODateTimeFormat(org.joda.time.format.ISODateTimeFormat) HIVE_UNSUPPORTED_FORMAT(com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) ZstdOutputStreamNoFinalizer(com.github.luben.zstd.ZstdOutputStreamNoFinalizer) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) VarcharType(com.facebook.presto.common.type.VarcharType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Codec(com.facebook.airlift.json.Codec) Decimals.isLongDecimal(com.facebook.presto.common.type.Decimals.isLongDecimal) String.format(java.lang.String.format) PrestoAvroSerDe(com.facebook.presto.hive.avro.PrestoAvroSerDe) RecordCursor(com.facebook.presto.spi.RecordCursor) Base64(java.util.Base64) List(java.util.List) ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) Annotation(java.lang.annotation.Annotation) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) Pattern(java.util.regex.Pattern) PARTITION_KEY(com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) Column(com.facebook.presto.hive.metastore.Column) ROUND_UNNECESSARY(java.math.BigDecimal.ROUND_UNNECESSARY) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) Lists.transform(com.google.common.collect.Lists.transform) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) HiveColumnHandle.isPathColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isPathColumnHandle) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) LzopCodec(io.airlift.compress.lzo.LzopCodec) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) MapredParquetInputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat) ImmutableList(com.google.common.collect.ImmutableList) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) TypeManager(com.facebook.presto.common.type.TypeManager) HIVE_FILE_MISSING_COLUMN_NAMES(com.facebook.presto.hive.HiveErrorCode.HIVE_FILE_MISSING_COLUMN_NAMES) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcType(com.facebook.presto.orc.metadata.OrcType) HoodieParquetInputFormat(org.apache.hudi.hadoop.HoodieParquetInputFormat) Type(com.facebook.presto.common.type.Type) NamedTypeSignature(com.facebook.presto.common.type.NamedTypeSignature) VerifyException(com.google.common.base.VerifyException) Storage(com.facebook.presto.hive.metastore.Storage) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) UTF_8(java.nio.charset.StandardCharsets.UTF_8) HIVE_DEFAULT_DYNAMIC_PARTITION(com.facebook.presto.hive.metastore.MetastoreUtil.HIVE_DEFAULT_DYNAMIC_PARTITION) Decimals(com.facebook.presto.common.type.Decimals) Integer.parseInt(java.lang.Integer.parseInt) JavaUtils(org.apache.hadoop.hive.common.JavaUtils) JobConf(org.apache.hadoop.mapred.JobConf) TimeUnit(java.util.concurrent.TimeUnit) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) RowFieldName(com.facebook.presto.common.type.RowFieldName) MetastoreUtil.checkCondition(com.facebook.presto.hive.metastore.MetastoreUtil.checkCondition) InputStream(java.io.InputStream) DecimalType.createDecimalType(com.facebook.presto.common.type.DecimalType.createDecimalType) FooterAwareRecordReader(com.facebook.presto.hive.util.FooterAwareRecordReader) RecordReader(org.apache.hadoop.mapred.RecordReader) Writable(org.apache.hadoop.io.Writable) LzoCodec(io.airlift.compress.lzo.LzoCodec) PrestoException(com.facebook.presto.spi.PrestoException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) BigInteger(java.math.BigInteger) LzopCodec(io.airlift.compress.lzo.LzopCodec) TextLineLengthLimitExceededException(com.facebook.presto.hadoop.TextLineLengthLimitExceededException) WritableComparable(org.apache.hadoop.io.WritableComparable) ConfigurationUtils.toJobConf(com.facebook.presto.hive.util.ConfigurationUtils.toJobConf) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

WritableComparable (org.apache.hadoop.io.WritableComparable)34 IOException (java.io.IOException)14 Writable (org.apache.hadoop.io.Writable)14 Path (org.apache.hadoop.fs.Path)13 FileSystem (org.apache.hadoop.fs.FileSystem)11 JobConf (org.apache.hadoop.mapred.JobConf)6 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)5 ArrayList (java.util.ArrayList)4 IntWritable (org.apache.hadoop.io.IntWritable)4 NullWritable (org.apache.hadoop.io.NullWritable)4 SequenceFile (org.apache.hadoop.io.SequenceFile)4 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)4 PCollection (com.tdunning.plume.PCollection)3 OutputChannel (com.tdunning.plume.local.lazy.MSCR.OutputChannel)3 PlumeObject (com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject)3 HashMap (java.util.HashMap)3 BytesWritable (org.apache.hadoop.io.BytesWritable)3 FloatWritable (org.apache.hadoop.io.FloatWritable)3 HCatRecord (org.apache.hive.hcatalog.data.HCatRecord)3 DoFn (com.tdunning.plume.DoFn)2