use of org.apache.avro.mapred.FsInput in project incubator-gobblin by apache.
the class FsSpecConsumer method changedSpecs.
/**
* List of newly changed {@link Spec}s for execution on {@link SpecExecutor}.
* The {@link Spec}s are returned in the increasing order of their modification times.
*/
@Override
public Future<? extends List<Pair<SpecExecutor.Verb, Spec>>> changedSpecs() {
List<Pair<SpecExecutor.Verb, Spec>> specList = new ArrayList<>();
FileStatus[] fileStatuses;
try {
fileStatuses = this.fs.listStatus(this.specDirPath, new AndPathFilter(new HiddenFilter(), new AvroUtils.AvroPathFilter()));
} catch (IOException e) {
log.error("Error when listing files at path: {}", this.specDirPath.toString(), e);
return null;
}
log.info("Found {} files at path {}", fileStatuses.length, this.specDirPath.toString());
// Sort the {@link JobSpec}s in increasing order of their modification times.
// This is done so that the {JobSpec}s can be handled in FIFO order by the
// JobConfigurationManager and eventually, the GobblinHelixJobScheduler.
Arrays.sort(fileStatuses, Comparator.comparingLong(FileStatus::getModificationTime));
for (FileStatus fileStatus : fileStatuses) {
DataFileReader<AvroJobSpec> dataFileReader;
try {
dataFileReader = new DataFileReader<>(new FsInput(fileStatus.getPath(), this.fs.getConf()), new SpecificDatumReader<>());
} catch (IOException e) {
log.error("Error creating DataFileReader for: {}", fileStatus.getPath().toString(), e);
continue;
}
AvroJobSpec avroJobSpec = null;
while (dataFileReader.hasNext()) {
avroJobSpec = dataFileReader.next();
break;
}
if (avroJobSpec != null) {
JobSpec.Builder jobSpecBuilder = new JobSpec.Builder(avroJobSpec.getUri());
Properties props = new Properties();
props.putAll(avroJobSpec.getProperties());
jobSpecBuilder.withJobCatalogURI(avroJobSpec.getUri()).withVersion(avroJobSpec.getVersion()).withDescription(avroJobSpec.getDescription()).withConfigAsProperties(props).withConfig(ConfigUtils.propertiesToConfig(props));
try {
if (!avroJobSpec.getTemplateUri().isEmpty()) {
jobSpecBuilder.withTemplate(new URI(avroJobSpec.getTemplateUri()));
}
} catch (URISyntaxException u) {
log.error("Error building a job spec: ", u);
continue;
}
String verbName = avroJobSpec.getMetadata().get(SpecExecutor.VERB_KEY);
SpecExecutor.Verb verb = SpecExecutor.Verb.valueOf(verbName);
JobSpec jobSpec = jobSpecBuilder.build();
log.debug("Successfully built jobspec: {}", jobSpec.getUri().toString());
specList.add(new ImmutablePair<SpecExecutor.Verb, Spec>(verb, jobSpec));
this.specToPathMap.put(jobSpec.getUri(), fileStatus.getPath());
}
}
return new CompletedFuture<>(specList, null);
}
use of org.apache.avro.mapred.FsInput in project crunch by cloudera.
the class AvroRecordReader method initialize.
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException {
FileSplit split = (FileSplit) genericSplit;
Configuration conf = context.getConfiguration();
SeekableInput in = new FsInput(split.getPath(), conf);
DatumReader<T> datumReader = null;
if (context.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, true)) {
ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
datumReader = factory.getReader(schema);
} else {
datumReader = new SpecificDatumReader<T>(schema);
}
this.reader = DataFileReader.openReader(in, datumReader);
// sync to start
reader.sync(split.getStart());
this.start = reader.tell();
this.end = split.getStart() + split.getLength();
}
use of org.apache.avro.mapred.FsInput in project gora by apache.
the class DataFileAvroStore method executePartial.
@Override
protected Result<K, T> executePartial(FileSplitPartitionQuery<K, T> query) throws IOException {
FsInput fsInput = createFsInput();
DataFileReader<T> reader = createReader(fsInput);
return new DataFileAvroResult<>(this, query, reader, fsInput, query.getStart(), query.getLength());
}
use of org.apache.avro.mapred.FsInput in project incubator-gobblin by apache.
the class TestAvroExtractor method getRecordFromFile.
public static List<GenericRecord> getRecordFromFile(String path) throws IOException {
Configuration config = new Configuration();
SeekableInput input = new FsInput(new Path(path), config);
DatumReader<GenericRecord> reader1 = new GenericDatumReader<>();
FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader1);
List<GenericRecord> records = new ArrayList<>();
for (GenericRecord datum : fileReader) {
records.add(datum);
}
fileReader.close();
return records;
}
use of org.apache.avro.mapred.FsInput in project incubator-gobblin by apache.
the class HdfsReader method getFsInput.
public FsInput getFsInput() throws IOException {
Path path = new Path(this.filePathInHdfs);
Configuration conf = getConfiguration();
return new FsInput(path, conf);
}
Aggregations