From 3aac1c8b4df261c733fb967007839e5845a33d3a Mon Sep 17 00:00:00 2001 From: Stephane Gamard Date: Fri, 11 Jul 2014 10:34:52 +0200 Subject: SONAR-5409 - Monitor kills node if child process never checked-in (crash in main for example) --- .../main/java/org/sonar/process/MonitorService.java | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'server/sonar-process') diff --git a/server/sonar-process/src/main/java/org/sonar/process/MonitorService.java b/server/sonar-process/src/main/java/org/sonar/process/MonitorService.java index 05abdfc8c15..219fb64e332 100644 --- a/server/sonar-process/src/main/java/org/sonar/process/MonitorService.java +++ b/server/sonar-process/src/main/java/org/sonar/process/MonitorService.java @@ -31,12 +31,14 @@ public class MonitorService extends Thread { private final static Logger LOGGER = LoggerFactory.getLogger(ProcessWrapper.class); + private final static Long MAX_ELAPSED_TIME = 10000L; + final DatagramSocket socket; final Map processes; final Map processesPing; public MonitorService(DatagramSocket socket) { - LOGGER.info("Monitor listening on socket:{}", socket.getLocalPort()); + LOGGER.info("Monitor service is listening on socket:{}", socket.getLocalPort()); this.socket = socket; processes = new HashMap(); processesPing = new HashMap(); @@ -44,6 +46,7 @@ public class MonitorService extends Thread { public void register(ProcessWrapper process) { this.processes.put(process.getName(), process); + this.processesPing.put(process.getName(), System.currentTimeMillis()); } @Override @@ -65,7 +68,7 @@ public class MonitorService extends Thread { break; } } - LOGGER.info("Some app has not been pinging"); + LOGGER.error("Not all process have checked-in. Aborting node"); for (ProcessWrapper process : processes.values()) { process.shutdown(); } @@ -77,14 +80,21 @@ public class MonitorService extends Thread { //check that all thread wrapper are running for (Thread thread : processes.values()) { if (thread.isInterrupted()) { + LOGGER.error("process {} has been interrupted. Aborting node", + thread.getName()); return false; } } //check that all heartbeats are OK - for (Long ping : processesPing.values()) { - if ((now - ping) > 5000) { + for (Map.Entry processPing : processesPing.entrySet()) { + if ((now - processPing.getValue()) > MAX_ELAPSED_TIME) { + LOGGER.error("process {} has not checked-in since {}ms. Aborting node", + processPing.getKey(), (now - processPing.getValue())); return false; + } else { + LOGGER.debug("process {} has last checked-in {}ms ago.", + processPing.getKey(), (now - processPing.getValue())); } } return true; -- cgit v1.2.3