use of org.apache.gobblin.source.extractor.filebased.FileBasedHelperException in project incubator-gobblin by apache.
the class GoogleDriveSource method getExtractor.
/**
* As Google Drive extractor needs file system helper, it invokes to initialize file system helper.
* {@inheritDoc}
* @see org.apache.gobblin.source.Source#getExtractor(org.apache.gobblin.configuration.WorkUnitState)
*/
@Override
public Extractor<S, D> getExtractor(WorkUnitState state) throws IOException {
Preconditions.checkNotNull(state, "WorkUnitState should not be null");
LOG.info("WorkUnitState from getExtractor: " + state);
try {
// GoogleDriveExtractor needs GoogleDriveFsHelper
initFileSystemHelper(state);
} catch (FileBasedHelperException e) {
throw new IOException(e);
}
Preconditions.checkNotNull(fsHelper, "File system helper should not be null");
return new GoogleDriveExtractor<>(state, fsHelper);
}
use of org.apache.gobblin.source.extractor.filebased.FileBasedHelperException in project incubator-gobblin by apache.
the class DatePartitionedNestedRetriever method getFilesToProcess.
@Override
public List<FileInfo> getFilesToProcess(long minWatermark, int maxFilesToReturn) throws IOException {
DateTime currentDay = new DateTime().minus(leadTimeDuration);
DateTime lowWaterMarkDate = new DateTime(minWatermark);
List<FileInfo> filesToProcess = new ArrayList<>();
try {
helper.connect();
this.fs = helper.getFileSystem();
} catch (FileBasedHelperException e) {
throw new IOException("Error initializing FileSystem", e);
}
for (DateTime date = lowWaterMarkDate; !date.isAfter(currentDay) && filesToProcess.size() < maxFilesToReturn; date = date.withFieldAdded(incrementalUnit, 1)) {
// Constructs the path folder - e.g. /my/data/prefix/2015/01/01/suffix
Path sourcePath = constructSourcePath(date);
if (this.fs.exists(sourcePath)) {
for (FileStatus fileStatus : this.fs.listStatus(sourcePath, getFileFilter())) {
LOG.info("Will process file " + fileStatus.getPath());
filesToProcess.add(new FileInfo(fileStatus.getPath().toString(), fileStatus.getLen(), date.getMillis()));
}
}
}
return filesToProcess;
}
use of org.apache.gobblin.source.extractor.filebased.FileBasedHelperException in project incubator-gobblin by apache.
the class PartitionedFileSourceBase method init.
/**
* Gobblin calls the {@link Source#getWorkunits(SourceState)} method after creating a {@link Source} object with a
* blank constructor, so any custom initialization of the object needs to be done here.
*/
protected void init(SourceState state) {
retriever.init(state);
try {
initFileSystemHelper(state);
} catch (FileBasedHelperException e) {
Throwables.propagate(e);
}
AvroFsHelper fsHelper = (AvroFsHelper) this.fsHelper;
this.fs = fsHelper.getFileSystem();
this.sourceState = state;
this.lowWaterMark = getLowWaterMark(state.getPreviousWorkUnitStates(), state.getProp(DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE, String.valueOf(DEFAULT_DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE)));
this.maxFilesPerJob = state.getPropAsInt(DATE_PARTITIONED_SOURCE_MAX_FILES_PER_JOB, DEFAULT_DATE_PARTITIONED_SOURCE_MAX_FILES_PER_JOB);
this.maxWorkUnitsPerJob = state.getPropAsInt(DATE_PARTITIONED_SOURCE_MAX_WORKUNITS_PER_JOB, DEFAULT_DATE_PARTITIONED_SOURCE_MAX_WORKUNITS_PER_JOB);
this.tableType = TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase());
this.fileCount = 0;
this.sourceDir = new Path(state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY));
}
use of org.apache.gobblin.source.extractor.filebased.FileBasedHelperException in project incubator-gobblin by apache.
the class GoogleDriveFsHelper method getFileMTime.
@Override
public long getFileMTime(String fileId) throws FileBasedHelperException {
Preconditions.checkNotNull(fileId, "fileId is required");
Path p = new Path(fileId);
try {
FileStatus status = fileSystem.getFileStatus(p);
return status.getModificationTime();
} catch (IOException e) {
throw new FileBasedHelperException("Failed to retrieve getModificationTime on path: " + p + " , fileId: " + fileId, e);
}
}
use of org.apache.gobblin.source.extractor.filebased.FileBasedHelperException in project incubator-gobblin by apache.
the class GoogleDriveFsHelper method getFileStream.
@Override
public InputStream getFileStream(String fileId) throws FileBasedHelperException {
Preconditions.checkNotNull(fileId, "fileId is required");
Path p = new Path(fileId);
try {
if (bufferSizeByte.isPresent()) {
return fileSystem.open(p, bufferSizeByte.get());
}
return fileSystem.open(p);
} catch (IOException e) {
throw new FileBasedHelperException("Failed to open files stream on path: " + p + " , fileId: " + fileId, e);
}
}
Aggregations