monit-general
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Monit problem with tomcat


From: Chad Neal
Subject: Monit problem with tomcat
Date: Tue, 24 Apr 2012 09:26:08 -0600

 

I am hoping one of you monit gurus can help me. I am using monit to monitor a tomcat process and having some issues getting actions monit takes to succeed. The tomcat app I am monitoring is a product called Fisheye from Atlassian and comes bundled with a version of tomcat that doesn’t write a pid file. To that create the pid file I use pgrep to locate the running process, collect its pid, and write that into to a file. Using this method Monit is able to tell when my app is running or not. The problem happens if I ask Monit to stop or start tomcat. When this happens monit attempts to stop the wrong pid or at start doesn’t seem to read the pid file I write to test if the start was successful. I am attaching a lot of config and log data for your review. Thanks much for any help you can provide.

 

 

Monitrc:

set daemon  60              # check services at 1-minute intervals

set logfile /apps/monit/var/log/monit.log

set idfile /apps/monit/var/.monit.id

set statefile /apps/monit/var/.monit.state

set mailserver mailhost.xxx.com               # primary mailserver

## --8<--

set mail-format {

from: address@hidden

reply-to: address@hidden

subject: ETG Monit alert --  $EVENT $SERVICE

message: $EVENT Service $SERVICE

Date:        $DATE

Action:      $ACTION

Host:        $HOST

Description: $DESCRIPTION

 

This email was generated by the ETG Monit service.

}

## --8<--

set alert address@hidden only on { instance } # receive all instance alerts

set httpd port 5280 and

    allow localhost        # allow localhost to connect to the server and

    allow 169.143.0.0/16

    allow xxx:xxx      # required for command line use

    allow @etgtools        # allow etg admin team access / also allows access to monit.d directory

    allow @bbaowner        # allow bamboo agent owners control over agents

 

check directory monit.d with path /apps/monit/etc/monit.d

  if changed timestamp then exec "/apps/monit/bin/monit reload"

include /apps/monit/etc/monit.d/*.monit

#

 

Tomcat.monit (/apps/monit/etc/monit.d/tomcat.monit)

check process fisheye_test with pidfile /eng/data/fecru_test/fecru.pid

        alert address@hidden

        start program = "/etc/init.d/fecru_test start" with timeout 300 seconds

        stop program  = "/etc/init.d/fecru_test stop"

 

 

init.d script:

RUN_AS_USER=etgfecru

unset CATALINA_OPTS

CATALINA_HOME=/eng/apps/atlassian/fecru/fecru_test_current

export FISHEYE_INST=/eng/data/fecru_test

export FISHEYE_OPTS="-d64 -Xms6g -Xmx6g -XX:MaxPermSize=768m -Dfile.encoding=UTF-8"

export JAVA_HOME=/eng/apps/oracle/java/jdk/fecru_current

PIDFILE="$FISHEYE_INST/fecru.pid"

 

start() {

        echo "Starting FECRU: "

        if [ "$USER" == "root" ]; then

                su $RUN_AS_USER -c "$CATALINA_HOME/bin/start.sh"

        elif [ "$USER" == "$RUN_AS_USER" ]; then

                $CATALINA_HOME/bin/start.sh

        else

                echo "You must be root or $RUN_AS_USER to start fecru\n"

        fi

        # sleep 20s

        pid=`pgrep -f "java -DSERVER_INFO=FECRU_TEST_15160"`

        if [ $pid ]; then

                echo $pid>${PIDFILE}

        fi

        echo "done."

}

stop() {

        echo "Shutting down FECRU: "

        if [ "$USER" == "root" ]; then

                su $RUN_AS_USER -c "$CATALINA_HOME/bin/stop.sh"

        elif [ "$USER" == "$RUN_AS_USER" ]; then

                $CATALINA_HOME/bin/stop.sh

        else

                echo "You must be root or $RUN_AS_USER to stop fecru\n"

        fi

        if [ -e ${PIDFILE} ]; then

                rm ${PIDFILE}

                echo "rm ${PIDFILE}"

        fi

        echo "done."

}

status(){

        pid=`pgrep -f "java -DSERVER_INFO=FECRU_TEST_15160"`

        if [ ! $pid ]; then

          echo "FECRU Test is not running"

        else

          echo "FECRU Test is running with a pid $pid"

        fi

}

pid(){

        pid=`pgrep -f "java -DSERVER_INFO=FECRU_TEST_15160"`

        if [ ! $pid ]; then

          echo "-1"

        else

          echo "$pid"

        fi

}

backup() {

        echo "backing up FECRU: "

        if [ "$USER" == "root" ]; then

                su $RUN_AS_USER -c "$CATALINA_HOME/bin/fisheyectl.sh backup"

        elif [ "$USER" == "$RUN_AS_USER" ]; then

                        $CATALINA_HOME/bin/fisheyectl.sh backup

        else

                        echo "You must be root or $RUN_AS_USER to backup fecru\n"

        fi

        echo "done."

}

restore() {

       echo "restore FECRU: "

                if [ -f $1 ]; then

                        if [ "$USER" == "root" ]; then

                                su $RUN_AS_USER -c "$CATALINA_HOME/bin/fisheyectl.sh restore --force -f $1"

                        elif [ "$USER" == "$RUN_AS_USER" ]; then

                $CATALINA_HOME/bin/fisheyectl.sh restore --force -f $1

                        else

                echo "You must be root or $RUN_AS_USER to restore fecru\n"

                        fi

                else

                        echo "The file $1 does not exist. Restore will not be completed."

                fi

        echo "done."

}

 

case "$1" in

        start)

                start

                ;;

        stop)

                stop

                ;;

                status)

                                status

                                ;;

                pid)

                                pid

                                ;;

        restart)

                stop

                sleep 10

                start

                ;;

        backup)

                backup

                ;;

        restore)

                restore $2

                ;;

        *)

        echo "Usage: $0 {start|stop|status|pid|restart|backup|restore <filename>}"

esac

exit 0

 

Running tomcat info:

address@hidden /eng/apps/atlassian/fecru

218 > cat /eng/data/fecru_test/fecru.pid

22148

address@hidden /eng/apps/atlassian/fecru

219 > ps -ef | grep -i TEST

etgfecru 22148     1 31 09:11 pts/1    00:00:44 /eng/apps/oracle/java/jdk/fecru_current/bin/java -DSERVER_INFO=FECRU_TEST_15160 -Xms6g -Xmx6g -XX:MaxPermSize=768m -Dfile.encoding=UTF-8 -XX:MaxNewSize=128m -Dfisheye.library.path= -Dfisheye.inst=/eng/data/fecru_test -Djava.awt.headless=true -Djava.endorsed.dirs=/eng/apps/atlassian/fecru/fecru_test_current/lib/endorsed -jar /eng/apps/atlassian/fecru/fecru_test_current/fisheyeboot.jar start

etgfecru 22531 13063  0 09:13 pts/1    00:00:00 grep -i test

address@hidden /eng/apps/atlassian/fecru

220 > ./fecru_test.init status

FECRU Test is running with a pid 22148

 

 

Now when attempting to stop tomcat this is found in the monit log files:

[MDT Apr 24 09:15:58] info     : 'fisheye_test' stop on user request

[MDT Apr 24 09:15:58] info     : monit daemon at 22396 awakened

[MDT Apr 24 09:15:58] info     : Awakened by User defined signal 1

[MDT Apr 24 09:15:58] info     : 'fisheye_test' stop: /eng/apps/atlassian/fecru/fecru_test.init

[MDT Apr 24 09:15:58] debug    : monit: pidfile '/eng/data/fecru_test/fecru.pid' does not exist

[MDT Apr 24 09:15:58] debug    : 'fisheye_test' monitoring disabled

[MDT Apr 24 09:15:58] info     : 'fisheye_test' stop action done

 

What is odd is the pid file was removed however the app remains running:

address@hidden /eng/apps/atlassian/fecru

223 > cat /eng/data/fecru_test/fecru.pid

cat: /eng/data/fecru_test/fecru.pid: No such file or directory

address@hidden /eng/apps/atlassian/fecru

224 > ./fecru_test.init status

FECRU Test is running with a pid 22148

 

Now after using the same init script to stop tomcat I enable monitoring for tomcat and this is found in the logs:

[MDT Apr 24 09:20:13] debug    : monit: pidfile '/eng/data/fecru_test/fecru.pid' does not exist

[MDT Apr 24 09:20:13] error    : 'fisheye_test' process is not running

[MDT Apr 24 09:20:13] debug    : -------------------------------------------------------------------------------

[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit [0x418653]

[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit(LogError+0x9f) [0x418e0f]

[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit(Event_post+0x429) [0x415e79]

[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit(check_process+0xa1) [0x425321]

[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit(validate+0x1f4) [0x4257d4]

[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit [0x41265d]

[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit(main+0x4df) [0x412f2f]

[MDT Apr 24 09:20:13] debug    :     /lib64/libc.so.6(__libc_start_main+0xf4) [0x3c0b41d994]

[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit [0x409a19]

[MDT Apr 24 09:20:13] debug    : -------------------------------------------------------------------------------

[MDT Apr 24 09:20:13] info     : 'fisheye_test' trying to restart

[MDT Apr 24 09:20:13] debug    : monit: pidfile '/eng/data/fecru_test/fecru.pid' does not exist

[MDT Apr 24 09:20:13] debug    : monit: pidfile '/eng/data/fecru_test/fecru.pid' does not exist

[MDT Apr 24 09:20:13] info     : 'fisheye_test' start: /eng/apps/atlassian/fecru/fecru_test.init

[MDT Apr 24 09:20:13] debug    : monit: pidfile '/eng/data/fecru_test/fecru.pid' does not exist

[MDT Apr 24 09:20:13] debug    : monit: pidfile '/eng/data/fecru_test/fecru.pid' does not exist

[MDT Apr 24 09:21:43] error    : 'fisheye_test' failed to start

[MDT Apr 24 09:21:43] debug    : -------------------------------------------------------------------------------

[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit [0x418653]

[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(LogError+0x9f) [0x418e0f]

[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(Event_post+0x429) [0x415e79]

[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit [0x413e13]

[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(control_service+0x137) [0x4140c7]

[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit [0x415774]

[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(Event_post+0x46a) [0x415eba]

[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(check_process+0xa1) [0x425321]

[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(validate+0x1f4) [0x4257d4]

[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit [0x41265d]

[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(main+0x4df) [0x412f2f]

[MDT Apr 24 09:21:43] debug    :     /lib64/libc.so.6(__libc_start_main+0xf4) [0x3c0b41d994]

[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit [0x409a19]

 

Thanks for any help you can provide-

Chad

 

 

Chad Neal

_________________________________________________

Information Technology Services

Making Every Mission Possible

 

(o): 303.328.6592 | (c) 720.226.8225 | address@hidden

55 Inverness Drive East | Englewood, CO | 80112-5498 | www.jeppesen.com

 


reply via email to

[Prev in Thread] Current Thread [Next in Thread]