use of org.apache.parquet.SemanticVersion.SemanticVersionParseException in project parquet-mr by apache.
the class CorruptStatistics method shouldIgnoreStatistics.
/**
* Decides if the statistics from a file created by createdBy (the created_by field from parquet format)
* should be ignored because they are potentially corrupt.
*
* @param createdBy the created-by string from a file footer
* @param columnType the type of the column that this is checking
* @return true if the statistics may be invalid and should be ignored, false otherwise
*/
public static boolean shouldIgnoreStatistics(String createdBy, PrimitiveTypeName columnType) {
if (columnType != PrimitiveTypeName.BINARY && columnType != PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
// the bug only applies to binary columns
return false;
}
if (Strings.isNullOrEmpty(createdBy)) {
// created_by is not populated, which could have been caused by
// parquet-mr during the same time as PARQUET-251, see PARQUET-297
warnOnce("Ignoring statistics because created_by is null or empty! See PARQUET-251 and PARQUET-297");
return true;
}
try {
ParsedVersion version = VersionParser.parse(createdBy);
if (!"parquet-mr".equals(version.application)) {
// assume other applications don't have this bug
return false;
}
if (Strings.isNullOrEmpty(version.version)) {
warnOnce("Ignoring statistics because created_by did not contain a semver (see PARQUET-251): " + createdBy);
return true;
}
SemanticVersion semver = SemanticVersion.parse(version.version);
if (semver.compareTo(PARQUET_251_FIXED_VERSION) < 0 && !(semver.compareTo(CDH_5_PARQUET_251_FIXED_START) >= 0 && semver.compareTo(CDH_5_PARQUET_251_FIXED_END) < 0)) {
warnOnce("Ignoring statistics because this file was created prior to " + PARQUET_251_FIXED_VERSION + ", see PARQUET-251");
return true;
}
// this file was created after the fix
return false;
} catch (RuntimeException e) {
// couldn't parse the created_by field, log what went wrong, don't trust the stats,
// but don't make this fatal.
warnParseErrorOnce(createdBy, e);
return true;
} catch (SemanticVersionParseException e) {
// couldn't parse the created_by field, log what went wrong, don't trust the stats,
// but don't make this fatal.
warnParseErrorOnce(createdBy, e);
return true;
} catch (VersionParseException e) {
// couldn't parse the created_by field, log what went wrong, don't trust the stats,
// but don't make this fatal.
warnParseErrorOnce(createdBy, e);
return true;
}
}
Aggregations