use of de.jplag.JPlagResult in project ArTEMiS by ls1intum.
the class TextPlagiarismDetectionService method checkPlagiarism.
/**
* Download all submissions of the exercise, run JPlag, and return the result
*
* @param textExercise to detect plagiarism for
* @param similarityThreshold ignore comparisons whose similarity is below this threshold (%)
* @param minimumScore consider only submissions whose score is greater or equal to this value
* @param minimumSize consider only submissions whose size is greater or equal to this value
* @return a zip file that can be returned to the client
* @throws ExitException is thrown if JPlag exits unexpectedly
*/
public TextPlagiarismResult checkPlagiarism(TextExercise textExercise, float similarityThreshold, int minimumScore, int minimumSize) throws ExitException {
long start = System.nanoTime();
String topic = plagiarismWebsocketService.getTextExercisePlagiarismCheckTopic(textExercise.getId());
// TODO: why do we have such a strange folder name?
final var submissionsFolderName = "./tmp/submissions";
final var submissionFolderFile = new File(submissionsFolderName);
submissionFolderFile.mkdirs();
final List<TextSubmission> textSubmissions = textSubmissionsForComparison(textExercise, minimumScore, minimumSize);
final var submissionsSize = textSubmissions.size();
log.info("Save text submissions for JPlag text comparison with {} submissions", submissionsSize);
if (textSubmissions.size() < 2) {
log.info("Insufficient amount of submissions for plagiarism detection. Return empty result.");
TextPlagiarismResult textPlagiarismResult = new TextPlagiarismResult();
textPlagiarismResult.setExercise(textExercise);
textPlagiarismResult.setSimilarityDistribution(new int[0]);
return textPlagiarismResult;
}
AtomicInteger processedSubmissionCount = new AtomicInteger(1);
textSubmissions.forEach(submission -> {
var progressMessage = "Getting submission: " + processedSubmissionCount + "/" + textSubmissions.size();
plagiarismWebsocketService.notifyInstructorAboutPlagiarismState(topic, PlagiarismCheckState.RUNNING, List.of(progressMessage));
submission.setResults(new ArrayList<>());
StudentParticipation participation = (StudentParticipation) submission.getParticipation();
participation.setExercise(null);
participation.setSubmissions(null);
String participantIdentifier = participation.getParticipantIdentifier();
if (participantIdentifier == null) {
participantIdentifier = "unknown";
}
try {
textSubmissionExportService.saveSubmissionToFile(submission, participantIdentifier, submissionsFolderName);
} catch (IOException e) {
log.error(e.getMessage());
}
processedSubmissionCount.getAndIncrement();
});
log.info("Saving text submissions done");
JPlagOptions options = new JPlagOptions(submissionsFolderName, LanguageOption.TEXT);
options.setMinimumTokenMatch(minimumSize);
// Important: for large courses with more than 1000 students, we might get more than one million results and 10 million files in the file system due to many 0% results,
// therefore we limit the results to at least 50% or 0.5 similarity, the passed threshold is between 0 and 100%
options.setSimilarityThreshold(similarityThreshold);
log.info("Start JPlag Text comparison");
JPlag jplag = new JPlag(options);
JPlagResult jPlagResult = jplag.run();
log.info("JPlag Text comparison finished with {} comparisons. Will limit the number of comparisons to 500", jPlagResult.getComparisons().size());
log.info("Delete submission folder");
if (submissionFolderFile.exists()) {
FileSystemUtils.deleteRecursively(submissionFolderFile);
}
TextPlagiarismResult textPlagiarismResult = new TextPlagiarismResult();
textPlagiarismResult.convertJPlagResult(jPlagResult);
textPlagiarismResult.setExercise(textExercise);
log.info("JPlag text comparison for {} submissions done in {}", submissionsSize, TimeLogUtil.formatDurationFrom(start));
plagiarismWebsocketService.notifyInstructorAboutPlagiarismState(topic, PlagiarismCheckState.COMPLETED, List.of());
return textPlagiarismResult;
}
use of de.jplag.JPlagResult in project Artemis by ls1intum.
the class TextPlagiarismDetectionService method checkPlagiarism.
/**
* Download all submissions of the exercise, run JPlag, and return the result
*
* @param textExercise to detect plagiarism for
* @param similarityThreshold ignore comparisons whose similarity is below this threshold (%)
* @param minimumScore consider only submissions whose score is greater or equal to this value
* @param minimumSize consider only submissions whose size is greater or equal to this value
* @return a zip file that can be returned to the client
* @throws ExitException is thrown if JPlag exits unexpectedly
*/
public TextPlagiarismResult checkPlagiarism(TextExercise textExercise, float similarityThreshold, int minimumScore, int minimumSize) throws ExitException {
long start = System.nanoTime();
String topic = plagiarismWebsocketService.getTextExercisePlagiarismCheckTopic(textExercise.getId());
// TODO: why do we have such a strange folder name?
final var submissionsFolderName = "./tmp/submissions";
final var submissionFolderFile = new File(submissionsFolderName);
submissionFolderFile.mkdirs();
final List<TextSubmission> textSubmissions = textSubmissionsForComparison(textExercise, minimumScore, minimumSize);
final var submissionsSize = textSubmissions.size();
log.info("Save text submissions for JPlag text comparison with {} submissions", submissionsSize);
if (textSubmissions.size() < 2) {
log.info("Insufficient amount of submissions for plagiarism detection. Return empty result.");
TextPlagiarismResult textPlagiarismResult = new TextPlagiarismResult();
textPlagiarismResult.setExercise(textExercise);
textPlagiarismResult.setSimilarityDistribution(new int[0]);
return textPlagiarismResult;
}
AtomicInteger processedSubmissionCount = new AtomicInteger(1);
textSubmissions.forEach(submission -> {
var progressMessage = "Getting submission: " + processedSubmissionCount + "/" + textSubmissions.size();
plagiarismWebsocketService.notifyInstructorAboutPlagiarismState(topic, PlagiarismCheckState.RUNNING, List.of(progressMessage));
submission.setResults(new ArrayList<>());
StudentParticipation participation = (StudentParticipation) submission.getParticipation();
participation.setExercise(null);
participation.setSubmissions(null);
String participantIdentifier = participation.getParticipantIdentifier();
if (participantIdentifier == null) {
participantIdentifier = "unknown";
}
try {
textSubmissionExportService.saveSubmissionToFile(submission, participantIdentifier, submissionsFolderName);
} catch (IOException e) {
log.error(e.getMessage());
}
processedSubmissionCount.getAndIncrement();
});
log.info("Saving text submissions done");
JPlagOptions options = new JPlagOptions(submissionsFolderName, LanguageOption.TEXT);
options.setMinimumTokenMatch(minimumSize);
// Important: for large courses with more than 1000 students, we might get more than one million results and 10 million files in the file system due to many 0% results,
// therefore we limit the results to at least 50% or 0.5 similarity, the passed threshold is between 0 and 100%
options.setSimilarityThreshold(similarityThreshold);
log.info("Start JPlag Text comparison");
JPlag jplag = new JPlag(options);
JPlagResult jPlagResult = jplag.run();
log.info("JPlag Text comparison finished with {} comparisons. Will limit the number of comparisons to 500", jPlagResult.getComparisons().size());
log.info("Delete submission folder");
if (submissionFolderFile.exists()) {
FileSystemUtils.deleteRecursively(submissionFolderFile);
}
TextPlagiarismResult textPlagiarismResult = new TextPlagiarismResult();
textPlagiarismResult.convertJPlagResult(jPlagResult);
textPlagiarismResult.setExercise(textExercise);
log.info("JPlag text comparison for {} submissions done in {}", submissionsSize, TimeLogUtil.formatDurationFrom(start));
plagiarismWebsocketService.notifyInstructorAboutPlagiarismState(topic, PlagiarismCheckState.COMPLETED, List.of());
return textPlagiarismResult;
}
use of de.jplag.JPlagResult in project JPlag by jplag.
the class NormalComparisonStrategy method compareSubmissions.
@Override
public JPlagResult compareSubmissions(SubmissionSet submissionSet) {
boolean withBaseCode = submissionSet.hasBaseCode();
if (withBaseCode) {
compareSubmissionsToBaseCode(submissionSet);
}
List<Submission> submissions = submissionSet.getSubmissions();
long timeBeforeStartInMillis = System.currentTimeMillis();
int i, j, numberOfSubmissions = submissions.size();
Submission first, second;
List<JPlagComparison> comparisons = new ArrayList<>();
for (i = 0; i < (numberOfSubmissions - 1); i++) {
first = submissions.get(i);
if (first.getTokenList() == null) {
continue;
}
for (j = (i + 1); j < numberOfSubmissions; j++) {
second = submissions.get(j);
if (second.getTokenList() == null) {
continue;
}
compareSubmissions(first, second, withBaseCode).ifPresent(it -> comparisons.add(it));
}
}
long durationInMillis = System.currentTimeMillis() - timeBeforeStartInMillis;
return new JPlagResult(comparisons, submissionSet, durationInMillis, options);
}
Aggregations