gnugo-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnugo-devel] yet another one speed optimization


From: Paul Pogonyshev
Subject: [gnugo-devel] yet another one speed optimization
Date: Sun, 26 Jan 2003 23:12:17 +0200
User-agent: KMail/1.4.3

here are the first few lines of a profile of gnugo 3.3.16-pre-3
on nngs2.tst:

Each sample counts as 0.00195312 seconds.
  %   cumulative   self              self     total           
 time   seconds   seconds    calls   s/call   s/call  name    
 12.63 <<  57.15    57.15 133344624     0.00     0.00  scan_for_patterns
  4.98     79.70    22.55 175060566     0.00     0.00  fastlib
  4.16     98.52    18.82   320334     0.00     0.00  
compute_connection_distances
  3.95    116.39    17.87 70621678     0.00     0.00  do_play_move
  3.16    130.68    14.29 63575116     0.00     0.00  hashtable_search
  2.92    143.90    13.22 74884558     0.00     0.00  incremental_order_moves
  2.81    156.63    12.73 55062649     0.00     0.00  order_moves
  2.65    168.64    12.01    98154     0.00     0.00  do_push_owl
  2.30    179.07    10.43       96     0.11     0.13  hashtable_partially_clear
  2.21    189.09    10.02 84961653     0.00     0.00  check_pattern_light

i was quite surprised to see that scan_for_patterns() takes 12.5%
already. in the previous profile i made it was only about 7.5%.
maybe its a side effect on nando's recent patch - since tactical
reading now takes less time, scan_for_patterns() takes relatively
more. or maybe it's just because i now use gcc 3.2.

anyway, i was unsatisfied with 12.5% on a single function and came
up with a simple patch below. it doesn't do wonders, but still saves
about 1% runtime (according to profiles).

here is the "top ten" of the new profile:

Each sample counts as 0.00195312 seconds.
  %   cumulative   self              self     total           
 time   seconds   seconds    calls   s/call   s/call  name    
 11.00 <<  49.29    49.29 133344624     0.00     0.00  scan_for_patterns
  5.06     71.95    22.66 175060566     0.00     0.00  fastlib
  4.30     91.23    19.28   320334     0.00     0.00  
compute_connection_distances
  4.06    109.42    18.20 70621678     0.00     0.00  do_play_move
  3.22    123.86    14.44 63575116     0.00     0.00  hashtable_search
  2.91    136.88    13.02 74884558     0.00     0.00  incremental_order_moves
  2.85    149.64    12.76 55062649     0.00     0.00  order_moves
  2.75    161.96    12.32    98154     0.00     0.00  do_push_owl
  2.28    172.15    10.20       96     0.11     0.13  hashtable_partially_clear
  2.26    182.28    10.13 70614473     0.00     0.00  undo_trymove

unfortunately, do_dfa_matchpat() now takes a little more time itself:

before:
  0.96    332.42     4.36 77181979     0.00     0.00  remove_neighbor
  0.92 << 336.59     4.17 16668078     0.00     0.00  do_dfa_matchpat
  0.83    340.36     3.77 85877637     0.00     0.00  hashdata_invert_stone
-----------------------------------------------
                4.17  145.87 16668078/16668078     dfa_matchpat_loop [15]
[16]    33.2    4.17  145.87 16668078         do_dfa_matchpat [16]
        ^^^^   10.02   78.69 84961653/84961653     check_pattern_light [28]
               57.15    0.00 133344624/133344624     scan_for_patterns [38]

after:
  1.38    279.95     6.18 80023794     0.00     0.00  do_trymove
  1.30 << 285.78     5.83 16668078     0.00     0.00  do_dfa_matchpat
  1.28    291.50     5.72 37912163     0.00     0.00  count_common_libs
-----------------------------------------------
                5.83  138.48 16668078/16668078     dfa_matchpat_loop [15]
[16]    32.2    5.83  138.48 16668078         do_dfa_matchpat [16]
        ^^^^   10.05   79.14 84961653/84961653     check_pattern_light [28]
               49.29    0.00 133344624/133344624     scan_for_patterns [41]

Paul


Index: engine/matchpat.c
===================================================================
RCS file: /cvsroot/gnugo/gnugo/engine/matchpat.c,v
retrieving revision 1.51
diff -u -p -r1.51 matchpat.c
--- engine/matchpat.c   14 Jan 2003 20:50:33 -0000      1.51
+++ engine/matchpat.c   26 Jan 2003 21:02:19 -0000
@@ -790,7 +790,7 @@ extern const int convert[3][4];
 
 /* Forward declarations. */
 static void dfa_prepare_for_match(int color);
-static int scan_for_patterns(dfa_rt_t *pdfa, int l, int dfa_pos,
+static int scan_for_patterns(dfa_rt_t *pdfa, int l, int *dfa_pos,
                             int *pat_list);
 #if DFA_SORT
 static int compare_int(const void *a, const void *b);
@@ -900,7 +900,7 @@ dump_dfa_board(int m, int n)
  * Return the number of patterns found.
  */
 static int
-scan_for_patterns(dfa_rt_t *pdfa, int l, int dfa_pos, int *pat_list)
+scan_for_patterns(dfa_rt_t *pdfa, int l, int *dfa_pos, int *pat_list)
 {
   int delta;
   int state = 1; /* initial state */
@@ -911,27 +911,32 @@ scan_for_patterns(dfa_rt_t *pdfa, int l,
     /* collect patterns indexes */
     int att = pdfa->states[state].att;
     while (att != 0) {
+
+#if DFA_SORT
       if (pdfa->pre_rotated)
         pat_list[id] = pdfa->indexes[att].val;
       else
         pat_list[id] = l + 8 * (int)(pdfa->indexes[att].val);
+#else
+      pat_list[id] = pdfa->indexes[att].val;
+#endif
       id++;
       att = pdfa->indexes[att].next;
     }
       
     /* go to next state */
-    delta = pdfa->states[state].next[dfa_p[dfa_pos + spiral[row][l]]];
+    delta = pdfa->states[state].next[dfa_pos[spiral[row][l]]];
     state += delta;
     row++;
   } while (delta != 0); /* while not on error state */
 
-  ASSERT1(row < MAX_ORDER, dfa_pos);
+  gg_assert(row < MAX_ORDER);
   return id;
 }
 
 
 #if DFA_SORT
-/* used to sort patterns */
+/* Used to sort patterns. */
 static int
 compare_int(const void *a, const void *b)
 {
@@ -940,10 +945,11 @@ compare_int(const void *a, const void *b
      
   return (*da > *db) - (*da < *db);
 }
-#endif
 
 
-/* perform pattern matching with dfa filtering */
+/* Perform pattern matching with dfa filtering. Sort patterns to keep
+ * the same order as standard matcher.
+ */
 static void 
 do_dfa_matchpat(dfa_rt_t *pdfa,
                int anchor, matchpat_callback_fn_ptr callback,
@@ -957,7 +963,7 @@ do_dfa_matchpat(dfa_rt_t *pdfa,
   int reorder[DFA_MAX_MATCHED];
   int *preorder = reorder;
   int maxr = 0, k;
-  int dfa_pos = DFA_POS(I(anchor), J(anchor)) + DFA_OFFSET;
+  int *dfa_pos = dfa_p + DFA_POS(I(anchor), J(anchor)) + DFA_OFFSET;
 
   /* Basic sanity checks. */
   ASSERT_ON_BOARD1(anchor);
@@ -977,10 +983,7 @@ do_dfa_matchpat(dfa_rt_t *pdfa,
 
   /* Sorting patterns keep the same order as 
    * standard pattern matching algorithm */
-#if DFA_SORT
   gg_sort(reorder, maxr, sizeof(int), compare_int);
-#endif /* DFA_SORT */
-
 
   /* Constraints and other tests */
 
@@ -996,6 +999,58 @@ do_dfa_matchpat(dfa_rt_t *pdfa,
                        ll, callback_data, goal, anchor_in_goal);
   }
 }
+
+#else /* !DFA_SORT */
+
+/* Perform pattern matching with dfa filtering. */
+static void 
+do_dfa_matchpat(dfa_rt_t *pdfa,
+               int anchor, matchpat_callback_fn_ptr callback,
+               int color, struct pattern *database,
+               void *callback_data, char goal[BOARDMAX],
+                int anchor_in_goal)
+{
+  int k = 0;
+  int ll;      /* Iterate over transformations (rotations or reflections)  */
+  int patterns[DFA_MAX_MATCHED];
+  int *ll_patterns = patterns;
+  int num_matched = 0;
+  int num_transformations = (pdfa->pre_rotated ? 1 : 8);
+  int transformation_end[8];
+  int *dfa_pos = dfa_p + DFA_POS(I(anchor), J(anchor)) + DFA_OFFSET;
+
+  /* Basic sanity checks. */
+  ASSERT_ON_BOARD1(anchor);
+
+  /* One scan by transformation */
+  for (ll = 0; ll < num_transformations; ll++) {
+    int ll_matched = scan_for_patterns(pdfa, ll, dfa_pos,
+                                      patterns + num_matched);
+    
+    ll_patterns += ll_matched;
+    num_matched += ll_matched;
+    transformation_end[ll] = num_matched;
+  }
+
+  ASSERT1(num_matched <= DFA_MAX_MATCHED, anchor);
+
+  /* Constraints and other tests. */
+  for (ll = 0; ll < num_transformations; ll++) {
+    while (k < transformation_end[ll]) {
+      int matched = patterns[k];
+
+#if PROFILE_PATTERNS
+      database[matched].dfa_hits++;
+#endif
+    
+      check_pattern_light(anchor, callback, color, database + matched,
+                         ll, callback_data, goal, anchor_in_goal);
+      k++;
+    }
+  }
+}
+
+#endif /* !DFA_SORT */
 
 
 /*





reply via email to

[Prev in Thread] Current Thread [Next in Thread]