use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.
the class ExternalIndexingOperations method getSnapshotFromExternalFileSystem.
public static List<ExternalFile> getSnapshotFromExternalFileSystem(Dataset dataset) throws AlgebricksException {
ArrayList<ExternalFile> files = new ArrayList<>();
ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
try {
// Create the file system object
FileSystem fs = getFileSystemObject(datasetDetails.getProperties());
// Get paths of dataset
String path = datasetDetails.getProperties().get(ExternalDataConstants.KEY_PATH);
String[] paths = path.split(",");
// Add fileStatuses to files
for (String aPath : paths) {
FileStatus[] fileStatuses = fs.listStatus(new Path(aPath));
for (int i = 0; i < fileStatuses.length; i++) {
int nextFileNumber = files.size();
handleFile(dataset, files, fs, fileStatuses[i], nextFileNumber);
}
}
// Close file system
fs.close();
if (files.isEmpty()) {
throw new AlgebricksException("File Snapshot retrieved from external file system is empty");
}
return files;
} catch (Exception e) {
LOGGER.log(Level.WARNING, "Exception while trying to get snapshot from external system", e);
throw new AlgebricksException("Unable to get list of HDFS files " + e);
}
}
use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.
the class ExternalIndexingOperations method isDatasetUptodate.
/**
* At the end of this method, we expect to have 4 sets as follows:
* metadataFiles should contain only the files that are appended in their original state
* addedFiles should contain new files that has number assigned starting after the max original file number
* deletedFiles should contain files that are no longer there in the file system
* appendedFiles should have the new file information of existing files
* The method should return false in case of zero delta
*
* @param dataset
* @param metadataFiles
* @param addedFiles
* @param deletedFiles
* @param appendedFiles
* @return
* @throws AlgebricksException
*/
public static boolean isDatasetUptodate(Dataset dataset, List<ExternalFile> metadataFiles, List<ExternalFile> addedFiles, List<ExternalFile> deletedFiles, List<ExternalFile> appendedFiles) throws AlgebricksException {
boolean uptodate = true;
int newFileNumber = metadataFiles.get(metadataFiles.size() - 1).getFileNumber() + 1;
List<ExternalFile> fileSystemFiles = getSnapshotFromExternalFileSystem(dataset);
// Loop over file system files < taking care of added files >
for (ExternalFile fileSystemFile : fileSystemFiles) {
boolean fileFound = false;
Iterator<ExternalFile> mdFilesIterator = metadataFiles.iterator();
while (mdFilesIterator.hasNext()) {
ExternalFile metadataFile = mdFilesIterator.next();
if (!fileSystemFile.getFileName().equals(metadataFile.getFileName())) {
continue;
}
// Same file name
if (fileSystemFile.getLastModefiedTime().equals(metadataFile.getLastModefiedTime())) {
// Same timestamp
if (fileSystemFile.getSize() == metadataFile.getSize()) {
// Same size -> no op
mdFilesIterator.remove();
fileFound = true;
} else {
// Different size -> append op
metadataFile.setPendingOp(ExternalFilePendingOp.APPEND_OP);
fileSystemFile.setPendingOp(ExternalFilePendingOp.APPEND_OP);
appendedFiles.add(fileSystemFile);
fileFound = true;
uptodate = false;
}
} else {
// Same file name, Different file mod date -> delete and add
metadataFile.setPendingOp(ExternalFilePendingOp.DROP_OP);
deletedFiles.add(new ExternalFile(metadataFile.getDataverseName(), metadataFile.getDatasetName(), 0, metadataFile.getFileName(), metadataFile.getLastModefiedTime(), metadataFile.getSize(), ExternalFilePendingOp.DROP_OP));
fileSystemFile.setPendingOp(ExternalFilePendingOp.ADD_OP);
fileSystemFile.setFileNumber(newFileNumber);
addedFiles.add(fileSystemFile);
newFileNumber++;
fileFound = true;
uptodate = false;
}
if (fileFound) {
break;
}
}
if (!fileFound) {
// File not stored previously in metadata -> pending add op
fileSystemFile.setPendingOp(ExternalFilePendingOp.ADD_OP);
fileSystemFile.setFileNumber(newFileNumber);
addedFiles.add(fileSystemFile);
newFileNumber++;
uptodate = false;
}
}
// first, correct number assignment to deleted and updated files
for (ExternalFile deletedFile : deletedFiles) {
deletedFile.setFileNumber(newFileNumber);
newFileNumber++;
}
for (ExternalFile appendedFile : appendedFiles) {
appendedFile.setFileNumber(newFileNumber);
newFileNumber++;
}
// include the remaining deleted files
Iterator<ExternalFile> mdFilesIterator = metadataFiles.iterator();
while (mdFilesIterator.hasNext()) {
ExternalFile metadataFile = mdFilesIterator.next();
if (metadataFile.getPendingOp() == ExternalFilePendingOp.NO_OP) {
metadataFile.setPendingOp(ExternalFilePendingOp.DROP_OP);
deletedFiles.add(new ExternalFile(metadataFile.getDataverseName(), metadataFile.getDatasetName(), newFileNumber, metadataFile.getFileName(), metadataFile.getLastModefiedTime(), metadataFile.getSize(), metadataFile.getPendingOp()));
newFileNumber++;
uptodate = false;
}
}
return uptodate;
}
use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.
the class ExternalIndexingOperations method listSubFiles.
/* list all files under the directory
* src is expected to be a folder
*/
private static void listSubFiles(Dataset dataset, FileSystem srcFs, FileStatus src, List<ExternalFile> files) throws IOException {
Path path = src.getPath();
FileStatus[] fileStatuses = srcFs.listStatus(path);
for (int i = 0; i < fileStatuses.length; i++) {
int nextFileNumber = files.size();
if (fileStatuses[i].isDirectory()) {
listSubFiles(dataset, srcFs, fileStatuses[i], files);
} else {
files.add(new ExternalFile(dataset.getDataverseName(), dataset.getDatasetName(), nextFileNumber, fileStatuses[i].getPath().toUri().getPath(), new Date(fileStatuses[i].getModificationTime()), fileStatuses[i].getLen(), ExternalFilePendingOp.NO_OP));
}
}
}
use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.
the class ExternalIndexingOperations method buildIndexUpdateOp.
public static JobSpecification buildIndexUpdateOp(Dataset ds, Index index, List<ExternalFile> metadataFiles, List<ExternalFile> addedFiles, List<ExternalFile> appendedFiles, MetadataProvider metadataProvider) throws AlgebricksException {
// Create files list
ArrayList<ExternalFile> files = new ArrayList<>();
for (ExternalFile metadataFile : metadataFiles) {
if (metadataFile.getPendingOp() != ExternalFilePendingOp.APPEND_OP) {
files.add(metadataFile);
} else {
metadataFile.setPendingOp(ExternalFilePendingOp.NO_OP);
files.add(metadataFile);
}
}
// add new files
for (ExternalFile file : addedFiles) {
files.add(file);
}
// add appended files
for (ExternalFile file : appendedFiles) {
files.add(file);
}
return IndexUtil.buildSecondaryIndexLoadingJobSpec(ds, index, metadataProvider, files);
}
use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.
the class ExternalFileTupleTranslator method createExternalFileFromARecord.
private ExternalFile createExternalFileFromARecord(ARecord externalFileRecord) {
String dataverseName = ((AString) externalFileRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_DATAVERSENAME_FIELD_INDEX)).getStringValue();
String datasetName = ((AString) externalFileRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_DATASET_NAME_FIELD_INDEX)).getStringValue();
int fileNumber = ((AInt32) externalFileRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_NUMBER_FIELD_INDEX)).getIntegerValue();
String fileName = ((AString) externalFileRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_NAME_FIELD_INDEX)).getStringValue();
long fileSize = ((AInt64) externalFileRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_SIZE_FIELD_INDEX)).getLongValue();
Date lastMoDifiedDate = new Date(((ADateTime) externalFileRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_MOD_DATE_FIELD_INDEX)).getChrononTime());
ExternalFilePendingOp pendingOp = ExternalFilePendingOp.values()[((AInt32) externalFileRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_PENDING_OP_FIELD_INDEX)).getIntegerValue()];
return new ExternalFile(dataverseName, datasetName, fileNumber, fileName, lastMoDifiedDate, fileSize, pendingOp);
}
Aggregations