====== Script to restart stuck tasks - extras status lrt cancel ======
This script finds all stuck long running tasks with the given descriptions to cancel and restart them if they are taking longer than the specified time since their last processed item.\\
This is necessary because some long running tasks may fail to complete their run and waiting for them blocks the whole cascade of scheduled tasks.
To use the script it is necessary to overwrite the **//tasks//** field with a map from descriptions (note that the script uses full text search, so the script will find all long running tasks with descriptions that contain the given string as a substring) to time in minutes that is the cutoff to decide a task is stuck and should be cancelled and restarted.\\
\\
Since the script uses fulltext search when looking for tasks, it is possible to use descriptions that capture multiple tasks. For example if you have three tasks with description "restartableTask1", "restartableTask2" and "restartableTask3", they'll all be found if you put one entry with the key "restartableTask" in the **//tasks//** field.\\
\\
The entries in the **//tasks//** field should look like
Map tasks = [
//note that the time offsets are in the Long data type, so you need to add 'l' after the number of minutes
"myTaskDescription": 30l, //this will restart any task with the description myTaskDescription if it has been running for 30 minutes
"restartableTask1": 15l,
"restartableTask2": 20l,
"restartableTask3": 25l
]
==== How to use ====
The script itself runs for a very short time, so it is necessary to schedule it as a regular task. To do so, perform the following steps:
- identify which long running tasks have trouble with getting stuck and how long it takes for them to finish a successful run.
- give each of those tasks an unique description
- fill the **//tasks//** field in the script with pairs of each task's description and the time after which you're reasonably sure that the task is now stuck
- create a long running task ExecuteScript with ExtrasStatusLRTCancel as its script\\
{{.:pasted:20260407-142429.png}}
- schedule this task to run regularly. How often you run it depends on how often tasks get stuck, typically tens of minutes to hours. In the worst case, the longest time a task can stay stuck is its cutoff time in the //tasks// field + the scheduling period of this script.
==== Conditions the task needs to satisfy to be restarted by this script ====
Before restarting a task, the script checks whether it satisfies the following conditions:
* it has non empty count (number of items to process) and counter (number of items processed so far)
* counter is equal to the count
* it has already been longer than the cutoff time specified in the //tasks// field since the task created its last audit item
{{.:pasted:20260420-080222.png}}
===== Script =====
import eu.bcvsolutions.idm.core.api.audidto.IdmAuditDto;
import eu.bcvsolutions.idm.core.api.audit.dto.filter.IdmAuditFilter;
import eu.bcvsolutions.idm.core.api.audit.service.IdmAuditService;
import eu.bcvsolutions.idm.core.api.domain.OperationState;
import eu.bcvsolutions.idm.core.api.dto.IdmEntityEventDto;
import eu.bcvsolutions.idm.core.api.dto.IdmIdentityDto;
import eu.bcvsolutions.idm.core.api.dto.filter.IdmEntityEventFilter;
import eu.bcvsolutions.idm.core.api.service.EntityEventManager;
import eu.bcvsolutions.idm.core.api.service.IdmEntityEventService;
import eu.bcvsolutions.idm.core.api.service.IdmIdentityService;
import eu.bcvsolutions.idm.core.api.utils.AutowireHelper;
import eu.bcvsolutions.idm.core.eav.entity.IdmFormAttribute_;
import eu.bcvsolutions.idm.core.notification.api.domain.NotificationLevel;
import eu.bcvsolutions.idm.core.notification.api.dto.IdmMessageDto;
import eu.bcvsolutions.idm.core.notification.api.service.NotificationManager;
import eu.bcvsolutions.idm.core.scheduler.api.dto.IdmLongRunningTaskDto;
import eu.bcvsolutions.idm.core.scheduler.api.dto.IdmScheduledTaskDto;
import eu.bcvsolutions.idm.core.scheduler.api.dto.Task;
import eu.bcvsolutions.idm.core.scheduler.api.dto.filter.IdmLongRunningTaskFilter;
import eu.bcvsolutions.idm.core.scheduler.api.service.IdmLongRunningTaskService;
import eu.bcvsolutions.idm.core.scheduler.api.service.IdmScheduledTaskService;
import eu.bcvsolutions.idm.core.scheduler.api.service.LongRunningTaskManager;
import eu.bcvsolutions.idm.core.scheduler.api.service.SchedulerManager;
import java.time.Instant;
import java.time.LocalDate;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Sort;
import com.google.common.collect.Lists;
// Task controlled every day
// Key - fulltext search for LRT / please use unique identifier that find only one LRT
// Value - unsigned long value - minutes from last run
Map tasks = [
//please fill this like
//"myTaskDescription": 30l //this will restart any task with the description myTaskDescription running for 30 minutes
]
// Recipient
List recipients = Lists.newArrayList("admin");
// Dont edit below
Logger log = LoggerFactory.getLogger("eu.bcvsolutions.idm.core.extrasStatusLRTCancel");
List logs = Lists.newArrayList();
IdmLongRunningTaskService longRunningTaskService = AutowireHelper.getBean(IdmLongRunningTaskService.class);
LongRunningTaskManager longRunningTaskManager = AutowireHelper.getBean(LongRunningTaskManager.class);
SchedulerManager schedulerManager = AutowireHelper.getBean(SchedulerManager.class);
IdmScheduledTaskService scheduledTaskService = AutowireHelper.getBean(IdmScheduledTaskService.class);
IdmAuditService auditService = AutowireHelper.getBean(IdmAuditService.class);
IdmEntityEventService entityEventService = AutowireHelper.getBean(IdmEntityEventService.class);
EntityEventManager entityEventManager = AutowireHelper.getBean(EntityEventManager.class);
NotificationManager notificationManager = AutowireHelper.getBean(NotificationManager.class);
IdmIdentityService identityService = AutowireHelper.getBean(IdmIdentityService.class);
Date now = new Date();
LocalDate today = LocalDate.now();
int dayOfMonth = today.getDayOfMonth();
// Iterate over list
for (Map.Entry entry : tasks.entrySet()) {
long maximumRunAsMinutes = entry.getValue();
IdmLongRunningTaskFilter filter = new IdmLongRunningTaskFilter();
// Original behavior not working with waiting LRTs
// filter.setRunning(Boolean.TRUE);
// Behavior for check more states - not required yet
// filter.setOperationStates(Lists.newArrayList(OperationState.RUNNING, OperationState.NOT_EXECUTED));
// Filtering only one state - running
filter.setOperationState(OperationState.RUNNING);
// filter.setTaskType(entry.getKey());
filter.setText(entry.getKey());
List runningTasks = longRunningTaskService.find(filter, null).getContent();
for (IdmLongRunningTaskDto runningTask : runningTasks) {
try {
Long count = runningTask.getCount();
Long counter = runningTask.getCounter();
if ((count == null && counter == null) || (count == 0 && counter == 0)) {
// LRT dosnt fill count and counter
// For now it will be more save to not solve this situation
log.warn("Skip LRT " + runningTask.getTaskType() + " count and counter is not filled.");
continue;
} else if (count == null) {
// Skip the state
log.warn("Skip LRT " + runningTask.getTaskType() + " count is not filled.");
continue;
} else if (counter == null) {
// Still running / created
log.info("Skip LRT " + runningTask.getTaskType() + " counter is not filled.");
continue;
}
int compare = Long.compare(count, counter);
if (compare == 0) {
// LRT has same count and counter - check transaction ID
IdmAuditFilter filterAudit = new IdmAuditFilter();
filterAudit.setTransactionId(runningTask.getTransactionId());
List audits = auditService.find(filterAudit, PageRequest.of(0, 1, Sort.by(Sort.Direction.DESC, "timestamp"))).getContent();
if (CollectionUtils.isEmpty(audits)) {
continue;
}
IdmAuditDto auditDto = audits.get(0);
Long toAddAsSecond = maximumRunAsMinutes * 60;
Date revisionDate = auditDto.getRevisionDate();
Instant modifiedInstantRevisionDate = revisionDate.toInstant().plusSeconds(toAddAsSecond);
Date modifiedRevisionDate = Date.from(modifiedInstantRevisionDate);
if (now.after(modifiedRevisionDate)) {
// Last audit created by transaction is after XY minutes
// Manually stop task
longRunningTaskManager.cancel(runningTask.getId());
// Cancel event
IdmEntityEventFilter eventFilter = new IdmEntityEventFilter();
eventFilter.setOwnerId(runningTask.getId());
eventFilter.setStates(List.of(OperationState.RUNNING));
List events = entityEventService.find(eventFilter, null).getContent();
if (CollectionUtils.isEmpty(events)) {
// Empty events?
} else if (events.size()> 1) {
// More running events?
log.warn("LRT " + runningTask.getTaskType() + " has more than one running event.");
}
for (IdmEntityEventDto event : events) {
entityEventManager.cancelEvent(event);
}
// then run again
IdmScheduledTaskDto scheduledTaskDto = scheduledTaskService.get(runningTask.getScheduledTask());
Task task = schedulerManager.getTask(sheduledTaskDto.getQuartzTaskName());
schedulerManager.runTask(task.getId());
StringBuilder sb = new StringBuilder();
sb.append("Task: ");
sb.append(runningTask.getTaskType());
sb.append(" description: ");
sb.append(runningTask.getTaskDescription());
logs.add(sb.toString());
continue;
}
} else if (compare> 0) {
// Running LRT
log.info("LRT " + runningTask.getTaskType() + " running OK.");
continue;
} else {
// Strange state counter is higher than count
continue;
}
} catch (Exception e) {
log.error("Error during process LRT " + runningTask.getTaskType() + " error stack trace: ");
e.printStackTrace();
StringBuilder sb = new StringBuilder();
sb.append("NOT PROCESSED Task: ");
sb.append(runningTask.getTaskType());
sb.append(" description: ");
sb.append(runningTask.getTaskDescription());
sb.append(" Error message: ");
sb.append(e.getMessage());
logs.add(sb.toString());
}
}
}
if (CollectionUtils.isNotEmpty(logs)) {
List finalRecipients = new ArrayList<>();
for (String username : recipients) {
IdmIdentityDto identityDto = identityService.getByUsername(username);
if (identityDto != null) {
finalRecipients.add(identityDto);
}
}
log.info("Sending message.");
notificationManager.send(
"extras:statusCanceledTasks",
new IdmMessageDto.Builder()
.setLevel(NotificationLevel.WARNING)
.addParameter("logs", logs)
.build(),
finalRecipients);
}
===== Script authorities =====
=== Class ===
com.google.common.collect.Lists \\ eu.bcvsolutions.idm.core.api.audit.dto.filter.IdmAuditFilter \\ eu.bcvsolutions.idm.core.api.audit.dto.IdmAuditDto \\ eu.bcvsolutions.idm.core.api.audit.service.IdmAuditService \\ eu.bcvsolutions.idm.core.api.domain.OperationState \\ eu.bcvsolutions.idm.core.api.dto.filter.IdmEntityEventFilter \\ eu.bcvsolutions.idm.core.api.dto.IdmEntityEventDto \\ eu.bcvsolutions.idm.core.api.dto.IdmIdentityDto \\ eu.bcvsolutions.idm.core.api.service.EntityEventManager \\ eu.bcvsolutions.idm.core.api.service.IdmEntityEventService \\ eu.bcvsolutions.idm.core.api.service.IdmIdentityService \\ eu.bcvsolutions.idm.core.api.utils.AutowireHelper \\ eu.bcvsolutions.idm.core.eav.entity.IdmFormAttribute_ \\ eu.bcvsolutions.idm.core.notification.api.domain.NotificationLevel \\ eu.bcvsolutions.idm.core.notification.api.domain.NotificationLevel \\ eu.bcvsolutions.idm.core.notification.api.dto.IdmMessageDto \\ eu.bcvsolutions.idm.core.notification.api.dto.IdmMessageDto \\ eu.bcvsolutions.idm.core.notification.api.dto.IdmMessageDto$Builder \\ eu.bcvsolutions.idm.core.notification.api.service.NotificationManager \\ eu.bcvsolutions.idm.core.notification.api.service.NotificationManager \\ eu.bcvsolutions.idm.core.scheduler.api.dto.filter.IdmLongRunningTaskFilter \\ eu.bcvsolutions.idm.core.scheduler.api.dto.IdmLongRunningTaskDto \\ eu.bcvsolutions.idm.core.scheduler.api.dto.IdmScheduledTaskDto \\ eu.bcvsolutions.idm.core.scheduler.api.dto.SimpleTaskTrigger \\ eu.bcvsolutions.idm.core.scheduler.api.dto.Task \\ eu.bcvsolutions.idm.core.scheduler.api.service.IdmLongRunningTaskService \\ eu.bcvsolutions.idm.core.scheduler.api.service.IdmScheduledTaskService \\ eu.bcvsolutions.idm.core.scheduler.api.service.LongRunningTaskManager \\ eu.bcvsolutions.idm.core.scheduler.api.service.SchedulerManager \\ java.lang.StringBuilder \\ java.time.Instant \\ java.time.LocalDate \\ java.time.temporal.ChronoUnit \\ java.util.ArrayList \\ java.util.Date \\ java.util.KeyValueHolder \\ java.util.LinkedHashMap$Entry \\ java.util.List \\ java.util.Map \\ java.util.Set \\ org.apache.commons.collections4.CollectionUtils \\ org.springframework.data.domain.PageImpl \\ org.springframework.data.domain.PageRequest \\ org.springframework.data.domain.Sort