guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

srfi-1 delete-duplicates


From: Kevin Ryde
Subject: srfi-1 delete-duplicates
Date: Sun, 22 Jun 2003 10:23:01 +1000
User-agent: Gnus/5.090019 (Oort Gnus v0.19) Emacs/21.2 (gnu/linux)

This is new delete-duplicates and delete-duplicates!, avoiding the
non-tail-recursions of the current implementations.

Code and test cases below for contemplation.  The loops are a bit
hairy, but seem to run ok.  I'm intending to give them a bit more of a
think before actually checking them in.






SCM_DEFINE (scm_srfi1_delete_duplicates, "delete-duplicates", 1, 1, 0,
            (SCM lst, SCM pred),
            "Return a list containing the elements of @var{lst} but without\n"
            "duplicates.\n"
            "\n"
            "When elements are equal, only the first in @var{lst} is\n"
            "retained.  Equal elements can be anywhere in @var{lst}, they\n"
            "don't have to be adjacent.  The returned list will have the\n"
            "retained elements in the same order as they were in @var{lst}.\n"
            "\n"
            "Equality is determined by @var{pred}, or @code{equal?} if not\n"
            "given.  Calls @code{(pred x y)} are made with element @var{x}\n"
            "being before @var{y} in @var{lst}.  A call is made at most once\n"
            "for each combination, but the sequence of the calls across the\n"
            "elements is unspecified.\n"
            "\n"
            "@var{lst} is not modified, but the return might share a common\n"
            "tail with @var{lst}.\n"
            "\n"
            "In the worst case, this is an @math{O(N^2)} algorithm because\n"
            "it must check each element against all those preceding it.  For\n"
            "long lists it is more efficient to sort and then compare only\n"
            "adjacent elements.")
#define FUNC_NAME s_scm_srfi1_delete_duplicates
{
  scm_t_trampoline_2 equal_p;
  SCM  ret, *p, keeplst, item, l;

  /* skip to end if an empty list (or something invalid) */
  ret = lst;
  if (SCM_CONSP (lst))
    {
      if (SCM_UNBNDP (pred))
        equal_p = equal_trampoline;
      else
        {
          equal_p = scm_trampoline_2 (pred);
          SCM_ASSERT (equal_p, pred, SCM_ARG2, FUNC_NAME);
        }

      /* ret is the new list constructed.  p is where to append, initially
         &ret then SCM_CDRLOC of the last pair.  lst is advanced as each
         element is considered.

         Elements retained are not immediately appended to ret, instead
         keeplst is the last pair in lst which is to be kept but is not yet
         copied.  Initially this is the first pair of lst, since the first
         is always retained.  *p is kept set to keeplst, so ret (inclusive)
         to lst (exclusive) is all the elements retained, making the
         equality search easy.

         If an item must be deleted, elements from keeplst (inclusive) to
         lst (exclusive) must be copied and appended to ret.  When there's
         no more deletions, *p is left set to keeplst, so ret shares
         structure with the original lst.  (ret will be the entire original
         lst if there's no deletions.)  */

      keeplst = lst;
      p = &ret;

      /* loop over lst elements starting from second */
      for (;;)
        {
          lst = SCM_CDR (lst);
          if (! SCM_CONSP (lst))
            break;

          item = SCM_CAR (lst);

          /* loop searching ret upto lst */
          for (l = ret; l != lst; l = SCM_CDR (l))
            {
              if (SCM_NFALSEP (equal_p (pred, SCM_CAR (l), item)))
                {
                  /* duplicate, don't want this element, so copy keeplst
                     (inclusive) to lst (exclusive) onto ret */
                  while (keeplst != lst)
                    {
                      SCM c = scm_cons (SCM_CAR (keeplst), SCM_EOL);
                      *p = c;
                      p = SCM_CDRLOC (c);
                      keeplst = SCM_CDR (keeplst);
                    }

                  keeplst = SCM_CDR (keeplst);
                  *p = keeplst;
                  break;
                }
            }
        }
    }

  /* demand that lst was a proper list */
  SCM_ASSERT_TYPE (SCM_NULL_OR_NIL_P (lst), lst, SCM_ARG1, FUNC_NAME,
                   "list");
  return ret;
}
#undef FUNC_NAME


SCM_DEFINE (scm_srfi1_delete_duplicates_x, "delete-duplicates!", 1, 1, 0,
            (SCM lst, SCM pred),
            "Return a list containing the elements of @var{lst} but without\n"
            "duplicates.\n"
            "\n"
            "When elements are equal, only the first in @var{lst} is\n"
            "retained.  Equal elements can be anywhere in @var{lst}, they\n"
            "don't have to be adjacent.  The returned list will have the\n"
            "retained elements in the same order as they were in @var{lst}.\n"
            "\n"
            "Equality is determined by @var{pred}, or @code{equal?} if not\n"
            "given.  Calls @code{(pred x y)} are made with element @var{x}\n"
            "being before @var{y} in @var{lst}.  A call is made at most once\n"
            "for each combination, but the sequence of the calls across the\n"
            "elements is unspecified.\n"
            "\n"
            "@var{lst} may be modified to construct the returned list.\n"
            "\n"
            "In the worst case, this is an @math{O(N^2)} algorithm because\n"
            "it must check each element against all those preceding it.  For\n"
            "long lists it is more efficient to sort and then compare only\n"
            "adjacent elements.")
#define FUNC_NAME s_scm_srfi1_delete_duplicates_x
{
  scm_t_trampoline_2 equal_p;
  SCM  ret, endret, item, l;

  /* skip to end if an empty list (or something invalid) */
  ret = lst;
  if (SCM_CONSP (lst))
    {
      if (SCM_UNBNDP (pred))
        equal_p = equal_trampoline;
      else
        {
          equal_p = scm_trampoline_2 (pred);
          SCM_ASSERT (equal_p, pred, SCM_ARG2, FUNC_NAME);
        }

      /* ret is the return list, constructed from the pairs of lst.  endret
         is the last pair of ret, initially the first.  lst is advanced as
         elements are considered.  */

      endret = ret;
      for (;;)
        {
          lst = SCM_CDR (lst);
          if (! SCM_CONSP (lst))
            break;

          /* is item equal to any element from ret to endret (inclusive)? */
          item = SCM_CAR (lst);
          l = ret;
          for (;;)
            {
              if (SCM_NFALSEP (equal_p (pred, SCM_CAR (l), item)))
                break;  /* equal, forget this element */

              if (l == endret)
                {
                  /* not equal to any, so append this pair */
                  * SCM_CDRLOC (endret) = lst;
                  endret = lst;
                  break;
                }
              l = SCM_CDR (l);
            }
        }

      /* terminate, in case last element was deleted */
      * SCM_CDRLOC (endret) = SCM_EOL;
    }

  /* demand that lst was a proper list */
  SCM_ASSERT_TYPE (SCM_NULL_OR_NIL_P (lst), lst, SCM_ARG1, FUNC_NAME, "list");

  return ret;
}
#undef FUNC_NAME






(define (ref-delete-duplicates lst . proc)
  "Reference version of srfi-1 `delete-duplicates'."
  (set! proc (if (null? proc) equal? (car proc)))
  (if (null? lst)
      '()
      (do ((keep '()))
          ((null? lst)
           (reverse! keep))
        (set! keep (cons (car lst) keep))
        (set! lst  (ref-delete (car lst) lst proc)))))


;;
;; delete-duplicates and delete-duplicates!
;;

(let () 
  ;; Call (PROC lst) for all lists of length n <= 4, with all combinations
  ;; of numbers 1 to n in the elements
  (define (test-lists proc)
    (do ((n 1 (1+ n)))
        ((> n 4))
      (do ((limit (integer-expt n n))
           (i 0 (1+ i)))
          ((>= i limit))
        (let ((lst '()))
          (do ((j 0 (1+ j))
               (rem i (quotient rem n)))
              ((>= j n))
            (set! lst (cons (remainder rem n) lst)))
          (proc lst)))))

  (define (common-tests delete-duplicates-proc)
    (pass-if-exception "too few args" exception:wrong-num-args
      (delete-duplicates-proc))
    
    (pass-if-exception "too many args" exception:wrong-num-args
      (delete-duplicates-proc '() equal? 99))
    
    (pass-if "empty"
      (eq? '() (delete-duplicates-proc '())))
    
    (pass-if "equal? (default)"
      (equal? '((2))
              (delete-duplicates-proc '((2) (2) (2)))))
    
    (pass-if "eq?"
      (equal? '((2) (2) (2))
              (delete-duplicates-proc '((2) (2) (2)) eq?)))

    (pass-if "called arg order"
      (let ((ok #t))
        (delete-duplicates-proc '(1 2 3 4 5)
                                (lambda (x y)
                                  (if (> x y)
                                      (set! ok #f))
                                  #f))
        ok)))
  
  (with-test-prefix "delete-duplicates"
    (common-tests delete-duplicates)
    
    (test-lists
     (lambda (lst)
       (let ((lst-copy (list-copy lst)))
         (with-test-prefix lst-copy
           (pass-if "result"
             (equal? (delete-duplicates     lst)
                     (ref-delete-duplicates lst)))
           (pass-if "non-destructive"
             (equal? lst-copy lst)))))))  
  
  (with-test-prefix "delete-duplicates!"
    (common-tests delete-duplicates!)
    
    (test-lists
     (lambda (lst)
       (pass-if lst
         (equal? (delete-duplicates!    lst)
                 (ref-delete-duplicates lst)))))))




reply via email to

[Prev in Thread] Current Thread [Next in Thread]