gpsd-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [gpsd-dev] Updated docs on NTP segment management


From: Eric S. Raymond
Subject: Re: [gpsd-dev] Updated docs on NTP segment management
Date: Thu, 26 Feb 2015 18:04:04 -0500
User-agent: Mutt/1.5.23 (2014-03-12)

Harlan Stenn <address@hidden>:
> "Eric S. Raymond" writes:
> > Yes.  I wrote a better implementation of NTP's end of the SHM thing
> > yesterday and today, starting from what's in nptd/refclock_shm.c.  The
> > biggest problematic thing I fixed was the lack of memory barriers to
> > guarantee correctness under concurrent access by multiple writers.  
> > It's disturbing that this hasn't been done in ntpd itself.
> 
> Please open a bug report and submit a patch.

I don't know where your tracker is.  Here's the patch:

--
Refactor the SHM refclock driver and add protective memory barriers.

This change factors the SHM segment querying out of shm_timer() into a new
function shm_query() which isolates all the stuff that is  dependent on the
SHM segment data layout. The new function fills in a struct containing the
extracted data and returns a useful status code. Age control and the delta
check remain in shm_timer().

The memory_barrier() function needs a C11 extension from stdatomic.h; it
is guarded by STD_ATOMIC_H. If the compiler does not have this feature, but
does compile memcpy to an uninterruptible bitlblt instruction, update
atomicity should still be guaranteed.  If neither of these is true, the
pre-existing count check will still yield a CLASH status when a concurrent
write might screw up a read.

The shm_query() code has been tested and works as part of the new ntpmon
utility in GPSD.  The integration with the revised shm_timer() code has
not been tested but should be straightforward to understand and validate.
---
 ntpd/refclock_shm.c              | 300 +++++++++++++++++++++++++--------------
 1 files changed, 194 insertions(+), 110 deletions(-)

diff --git a/ntpd/refclock_shm.c b/ntpd/refclock_shm.c
index 7174abd..8eccf48 100644
--- a/ntpd/refclock_shm.c
+++ b/ntpd/refclock_shm.c
@@ -348,10 +348,153 @@ shm_poll(
        shm_clockstats(unit, peer);
 }
 
+
+enum segstat_t {
+    OK, NO_SEGMENT, NOT_READY, BAD_MODE, CLASH
+};
+
+struct shm_stat_t {
+    int status;
+    int mode;
+    struct timespec tvc, tvr, tvt;
+    int precision;
+    int leap;
+};
+
+static inline void memory_barrier(void)
+{
+#ifdef STD_ATOMIC_H
+    atomic_thread_fence(memory_order_seq_cst);
+#endif /* STD_ATOMIC_H */
+}
+
+static enum segstat_t shm_query(volatile struct shmTime *shm_in, struct 
shm_stat_t *shm_stat)
+/* try to grab a sample from the specified SHM segment */
+{
+    volatile struct shmTime shmcopy, *shm = shm_in;
+    volatile int cnt;
+
+    unsigned int cns_new, rns_new;
+
+    /*
+     * This is the main routine. It snatches the time from the shm
+     * board and tacks on a local timestamp.
+     */
+    if (shm == NULL) {
+       shm_stat->status = NO_SEGMENT;
+       return NO_SEGMENT;
+    }
+
+    /address@hidden@*//* splint is confused about struct timespec */
+    shm_stat->tvc.tv_sec = shm_stat->tvc.tv_nsec = 0;
+    clock_gettime(CLOCK_REALTIME, &shm_stat->tvc);
+
+    /* relying on word access to be atomic here */
+    if (shm->valid == 0) {
+       shm_stat->status = NOT_READY;
+       return NOT_READY;
+    }
+
+    cnt = shm->count;
+
+    /*
+     * This is proof against concurrency issues if either
+     * (a) the memory_barrier() call works on this host, or
+     * (b) memset compiles to an uninterruptible single-instruction bitblt.
+     */
+    memory_barrier();
+    memcpy((void *)&shmcopy, (void *)shm, sizeof(struct shmTime));
+    shm->valid = 0;
+    memory_barrier();
+
+    /* 
+     * Clash detection in case neither (a) nor (b) was true.
+     * Not supported in mode 0, and word access to the count field 
+     * must be atomic for this to work.
+     */
+    if (shmcopy.mode > 0 && cnt != shm->count) {
+       shm_stat->status = CLASH;
+       return shm_stat->status;
+    }
+
+    shm_stat->status = OK;
+    shm_stat->mode = shmcopy.mode;
+
+    switch (shmcopy.mode) {
+    case 0:
+       shm_stat->tvr.tv_sec    = shmcopy.receiveTimeStampSec;
+       shm_stat->tvr.tv_nsec   = shmcopy.receiveTimeStampUSec * 1000;
+       rns_new         = shmcopy.receiveTimeStampNSec;
+       shm_stat->tvt.tv_sec    = shmcopy.clockTimeStampSec;
+       shm_stat->tvt.tv_nsec   = shmcopy.clockTimeStampUSec * 1000;
+       cns_new         = shmcopy.clockTimeStampNSec;
+
+       /* Since the following comparisons are between unsigned
+       ** variables they are always well defined, and any
+       ** (signed) underflow will turn into very large unsigned
+       ** values, well above the 1000 cutoff.
+       **
+       ** Note: The usecs *must* be a *truncated*
+       ** representation of the nsecs. This code will fail for
+       ** *rounded* usecs, and the logic to deal with
+       ** wrap-arounds in the presence of rounded values is
+       ** much more convoluted.
+       */
+       if (   ((cns_new - (unsigned)shm_stat->tvt.tv_nsec) < 1000)
+              && ((rns_new - (unsigned)shm_stat->tvr.tv_nsec) < 1000)) {
+           shm_stat->tvt.tv_nsec = cns_new;
+           shm_stat->tvr.tv_nsec = rns_new;
+       }
+       /* At this point shm_stat->tvr and shm_stat->tvt contain valid ns-level
+       ** timestamps, possibly generated by extending the old
+       ** us-level timestamps
+       */
+       break;
+
+    case 1:
+
+       shm_stat->tvr.tv_sec    = shmcopy.receiveTimeStampSec;
+       shm_stat->tvr.tv_nsec   = shmcopy.receiveTimeStampUSec * 1000;
+       rns_new         = shmcopy.receiveTimeStampNSec;
+       shm_stat->tvt.tv_sec    = shmcopy.clockTimeStampSec;
+       shm_stat->tvt.tv_nsec   = shmcopy.clockTimeStampUSec * 1000;
+       cns_new         = shmcopy.clockTimeStampNSec;
+               
+       /* See the case above for an explanation of the
+       ** following test.
+       */
+       if (   ((cns_new - (unsigned)shm_stat->tvt.tv_nsec) < 1000)
+              && ((rns_new - (unsigned)shm_stat->tvr.tv_nsec) < 1000)) {
+           shm_stat->tvt.tv_nsec = cns_new;
+           shm_stat->tvr.tv_nsec = rns_new;
+       }
+       /* At this point shm_stat->tvr and shm_stat->tvt contains valid ns-level
+       ** timestamps, possibly generated by extending the old
+       ** us-level timestamps
+       */
+       break;
+
+    default:
+       shm_stat->status = BAD_MODE;
+       break;
+    }
+    /address@hidden@*/
+
+    /*
+     * leap field is not a leap offset but a leap notification code.
+     * The values are magic numbers used by NTP and set by GPSD, if at all, in
+     * the subframe code.
+     */
+    shm_stat->leap = shmcopy.leap;
+    shm_stat->precision = shmcopy.precision;
+
+    return shm_stat->status;
+}
+
 /*
- * shm_timer - called onece every second.
+ * shm_timer - called once every second.
  *
- * This tries to grab a sample from the SHM segment
+ * This tries to grab a sample from the SHM segment, filtering bad ones
  */
 static void
 shm_timer(
@@ -362,33 +505,20 @@ shm_timer(
        struct refclockproc * const pp = peer->procptr;
        struct shmunit *      const up = pp->unitptr;
 
-       /* access order is important for lock-free SHM access; we
-       ** enforce order by treating the whole structure volatile.
-       **
-       ** IMPORTANT NOTE: This does not protect from reordering on CPU
-       ** level, and it does nothing for cache consistency and
-       ** visibility of changes by other cores. We need atomic and/or
-       ** fence instructions for that.
-       */
        volatile struct shmTime *shm;
 
-       struct timespec tvr;
-       struct timespec tvt;
        l_fp tsrcv;
        l_fp tsref;
-       unsigned int c;
-       unsigned int cns_new, rns_new;
-       int cnt;
+       int c;
 
        /* for formatting 'a_lastcode': */
        struct calendar cd;
-       time_t tt, now;
+       time_t tt;
        vint64 ts;
 
-       /*
-        * This is the main routine. It snatches the time from the shm
-        * board and tacks on a local timestamp.
-        */
+       enum segstat_t status;
+       struct shm_stat_t shm_stat;
+
        up->ticks++;
        if ((shm = up->shm) == NULL) {
                /* try to map again - this may succeed if meanwhile some-
@@ -400,88 +530,43 @@ shm_timer(
                        return;
                }
        }
-       if ( ! shm->valid) {
-               DPRINTF(1, ("%s: SHM not ready\n",
-                           refnumtoa(&peer->srcadr)));
-               up->notready++;
-               return;
-       }
-
-       switch (shm->mode) {
-       case 0:
-               tvr.tv_sec      = shm->receiveTimeStampSec;
-               tvr.tv_nsec     = shm->receiveTimeStampUSec * 1000;
-               rns_new         = shm->receiveTimeStampNSec;
-               tvt.tv_sec      = shm->clockTimeStampSec;
-               tvt.tv_nsec     = shm->clockTimeStampUSec * 1000;
-               cns_new         = shm->clockTimeStampNSec;
-
-               /* Since the following comparisons are between unsigned
-               ** variables they are always well defined, and any
-               ** (signed) underflow will turn into very large unsigned
-               ** values, well above the 1000 cutoff.
-               **
-               ** Note: The usecs *must* be a *truncated*
-               ** representation of the nsecs. This code will fail for
-               ** *rounded* usecs, and the logic to deal with
-               ** wrap-arounds in the presence of rounded values is
-               ** much more convoluted.
-               */
-               if (   ((cns_new - (unsigned)tvt.tv_nsec) < 1000)
-                   && ((rns_new - (unsigned)tvr.tv_nsec) < 1000)) {
-                       tvt.tv_nsec = cns_new;
-                       tvr.tv_nsec = rns_new;
-               }
-               /* At this point tvr and tvt contains valid ns-level
-               ** timestamps, possibly generated by extending the old
-               ** us-level timestamps
-               */
-               DPRINTF(2, ("%s: SHM type 0 sample\n",
-                           refnumtoa(&peer->srcadr)));
-               break;
-
-       case 1:
-               cnt = shm->count;
-
-               tvr.tv_sec      = shm->receiveTimeStampSec;
-               tvr.tv_nsec     = shm->receiveTimeStampUSec * 1000;
-               rns_new         = shm->receiveTimeStampNSec;
-               tvt.tv_sec      = shm->clockTimeStampSec;
-               tvt.tv_nsec     = shm->clockTimeStampUSec * 1000;
-               cns_new         = shm->clockTimeStampNSec;
-               if (cnt != shm->count) {
-                       DPRINTF(1, ("%s: type 1 access clash\n",
-                                   refnumtoa(&peer->srcadr)));
-                       msyslog (LOG_NOTICE, "SHM: access clash in shared 
memory");
-                       up->clash++;
-                       return;
-               }
-               
-               /* See the case above for an explanation of the
-               ** following test.
-               */
-               if (   ((cns_new - (unsigned)tvt.tv_nsec) < 1000)
-                   && ((rns_new - (unsigned)tvr.tv_nsec) < 1000)) {
-                       tvt.tv_nsec = cns_new;
-                       tvr.tv_nsec = rns_new;
-               }
-               /* At this point tvr and tvt contains valid ns-level
-               ** timestamps, possibly generated by extending the old
-               ** us-level timestamps
-               */
-               DPRINTF(2, ("%s: SHM type 1 sample\n",
-                           refnumtoa(&peer->srcadr)));
-               break;
 
+       /* query the segment, atomically */
+       status = shm_query(shm, &shm_stat);
+
+       switch (status) {
+       case OK:
+           DPRINTF(2, ("%s: SHM type %d sample\n",
+                       refnumtoa(&peer->srcadr), shm_stat.mode));
+           break;
+       case NO_SEGMENT:
+           /* should never happen, but is harmless */
+           return;
+       case NOT_READY:
+           DPRINTF(1, ("%s: SHM not ready\n",refnumtoa(&peer->srcadr)));
+           up->notready++;
+           return;
+       case BAD_MODE:
+           DPRINTF(1, ("%s: SHM type blooper, mode=%d\n",
+                       refnumtoa(&peer->srcadr), shm->mode));
+           up->bad++;
+           msyslog (LOG_ERR, "SHM: bad mode found in shared memory: %d",
+                    shm->mode);
+           return;
+       case CLASH:
+           DPRINTF(1, ("%s: type 1 access clash\n",
+                       refnumtoa(&peer->srcadr)));
+           msyslog (LOG_NOTICE, "SHM: access clash in shared memory");
+           up->clash++;
+           return;
        default:
-               DPRINTF(1, ("%s: SHM type blooper, mode=%d\n",
-                           refnumtoa(&peer->srcadr), shm->mode));
-               up->bad++;
-               msyslog (LOG_ERR, "SHM: bad mode found in shared memory: %d",
-                        shm->mode);
-               return;
+           DPRINTF(1, ("%s: internal error, unknown SHM fetch status\n",
+                       refnumtoa(&peer->srcadr)));
+           msyslog (LOG_NOTICE, "internal error, unknown SHM fetch status");
+           up->bad++;
+           return;
        }
-       shm->valid = 0;
+
 
        /* format the last time code in human-readable form into
         * 'pp->a_lastcode'. Someone claimed: "NetBSD has incompatible
@@ -489,7 +574,7 @@ shm_timer(
         * around that potential problem. BTW, simply casting a pointer
         * is a receipe for disaster on some architectures.
         */
-       tt = (time_t)tvt.tv_sec;
+       tt = (time_t)shm_stat.tvt.tv_sec;
        ts = time_to_vint64(&tt);
        ntpcal_time_to_date(&cd, &ts);
                
@@ -498,12 +583,11 @@ shm_timer(
                     "%04u-%02u-%02uT%02u:%02u:%02u.%09ldZ",
                     cd.year, cd.month, cd.monthday,
                     cd.hour, cd.minute, cd.second,
-                    (long)tvt.tv_nsec);
+                    (long)shm_stat.tvt.tv_nsec);
        pp->lencode = (c < sizeof(pp->a_lastcode)) ? c : 0;
 
        /* check 1: age control of local time stamp */
-       time(&now);
-       tt = now - tvr.tv_sec;
+       tt = shm_stat.tvc.tv_sec - shm_stat.tvr.tv_sec;
        if (tt < 0 || tt > up->max_delay) {
                DPRINTF(1, ("%s:SHM stale/bad receive time, delay=%llds\n",
                            refnumtoa(&peer->srcadr), (long long)tt));
@@ -514,7 +598,7 @@ shm_timer(
        }
 
        /* check 2: delta check */
-       tt = tvr.tv_sec - tvt.tv_sec - (tvr.tv_nsec < tvt.tv_nsec);
+       tt = shm_stat.tvr.tv_sec - shm_stat.tvt.tv_sec - (shm_stat.tvr.tv_nsec 
< shm_stat.tvt.tv_nsec);
        if (tt < 0)
                tt = -tt;
        if (up->max_delta > 0 && tt > up->max_delta) {
@@ -529,10 +613,10 @@ shm_timer(
        /* if we really made it to this point... we're winners! */
        DPRINTF(2, ("%s: SHM feeding data\n",
                    refnumtoa(&peer->srcadr)));
-       tsrcv = tspec_stamp_to_lfp(tvr);
-       tsref = tspec_stamp_to_lfp(tvt);
-       pp->leap = shm->leap;
-       peer->precision = shm->precision;
+       tsrcv = tspec_stamp_to_lfp(shm_stat.tvr);
+       tsref = tspec_stamp_to_lfp(shm_stat.tvt);
+       pp->leap = shm_stat.leap;
+       peer->precision = shm_stat.precision;
        refclock_process_offset(pp, tsref, tsrcv, pp->fudgetime1);
        up->good++;
 }
 
-- 
2.1.0

-- 
                <a href="http://www.catb.org/~esr/";>Eric S. Raymond</a>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]