bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 4/4] kern: optimize the layout of struct kmem_cache


From: Justus Winter
Subject: Re: [PATCH 4/4] kern: optimize the layout of struct kmem_cache
Date: Mon, 06 Jan 2014 11:47:10 +0100
User-agent: alot/0.3.4

Quoting Samuel Thibault (2014-01-06 02:43:20)
> Justus Winter, le Mon 06 Jan 2014 00:34:55 +0100, a écrit :
> > * kern/slab.h (struct kmem_cache): Reorder the fields so that all hot
> > fields are within the first cache line.
> 
> Did you try to benchmark this a bit?
> 
> I'm unsure about all movements: nr_free_slabs, for instance, should
> probably really go along nr_slabs, as well as nr_objs, etc.

No, but I profiled this.  This is the output of pahole (on the left)
and the results of my profiling on the right.  The rightmost number is
the access count.  This is the situation before my patch:

struct kmem_cache {
        simple_lock_data_t         lock;                 /*     0     4 */      
0                 3ba043
        struct list                node;                 /*     4     8 */      
4                     58
        struct list                partial_slabs;        /*    12     8 */      
12                3d28af
        struct list                free_slabs;           /*    20     8 */      
20                33dcd0
        struct rbtree              active_slabs;         /*    28     4 */      
28                1781c6
        int                        flags;                /*    32     4 */      
32                6e9ab5
        size_t                     obj_size;             /*    36     4 */      
36                    2c
        size_t                     align;                /*    40     4 */      
40                   16a
        size_t                     buf_size;             /*    44     4 */      
44                   196
        size_t                     bufctl_dist;          /*    48     4 */      
48                3b9df3
        size_t                     slab_size;            /*    52     4 */      
52                3477ae
        size_t                     color;                /*    56     4 */      
56                   572
        size_t                     color_max;            /*    60     4 */      
60                   196
        /* --- cacheline 1 boundary (64 bytes) --- */
        long unsigned int          bufs_per_slab;        /*    64     4 */      
64                59451d
        long unsigned int          nr_objs;              /*    68     4 */      
68                773bba
        long unsigned int          nr_bufs;              /*    72     4 */      
72                   524
        long unsigned int          nr_slabs;             /*    76     4 */      
76                   2a8
        long unsigned int          nr_free_slabs;        /*    80     4 */      
80                4524b8
        kmem_cache_ctor_t          ctor;                 /*    84     4 */      
84                1df6c9
        kmem_slab_alloc_fn_t       slab_alloc_fn;        /*    88     4 */      
88                   17c
        kmem_slab_free_fn_t        slab_free_fn;         /*    92     4 */      
92                    2c
        char                       name[24];             /*    96    24 */      
96                    58
        size_t                     buftag_dist;          /*   120     4 */      
120                   2c
        size_t                     redzone_pad;          /*   124     4 */      
124                   2c
        /* --- cacheline 2 boundary (128 bytes) --- */

        /* size: 128, cachelines: 2, members: 24 */
};

And with my patch:

struct kmem_cache {
        simple_lock_data_t         lock;                 /*     0     4 */      
0                 36b2c4
        struct list                node;                 /*     4     8 */      
4                     58
        struct list                partial_slabs;        /*    12     8 */      
12                37fd71
        struct list                free_slabs;           /*    20     8 */      
20                2ff012
        struct rbtree              active_slabs;         /*    28     4 */      
28                14a19e
        int                        flags;                /*    32     4 */      
32                64c5a1
        size_t                     bufctl_dist;          /*    36     4 */      
36                36b132
        size_t                     slab_size;            /*    40     4 */      
40                3082ca
        long unsigned int          bufs_per_slab;        /*    44     4 */      
44                51efd8
        long unsigned int          nr_objs;              /*    48     4 */      
48                6d6238
        long unsigned int          nr_free_slabs;        /*    52     4 */      
52                3fe98e
        kmem_cache_ctor_t          ctor;                 /*    56     4 */      
56                1b728c
        size_t                     obj_size;             /*    60     4 */      
60                    2c
        /* --- cacheline 1 boundary (64 bytes) --- */
        size_t                     align;                /*    64     4 */      
64                   10b
        size_t                     buf_size;             /*    68     4 */      
68                   137
        size_t                     color;                /*    72     4 */      
72                   3d9
        size_t                     color_max;            /*    76     4 */      
76                   137
        long unsigned int          nr_bufs;              /*    80     4 */      
80                   3a8
        long unsigned int          nr_slabs;             /*    84     4 */      
84                   1ea
        kmem_slab_alloc_fn_t       slab_alloc_fn;        /*    88     4 */      
88                   117
        kmem_slab_free_fn_t        slab_free_fn;         /*    92     4 */      
92                    2c
        char                       name[24];             /*    96    24 */      
96                    58
        size_t                     buftag_dist;          /*   120     4 */      
120                   2c
        size_t                     redzone_pad;          /*   124     4 */      
124                   2c
        /* --- cacheline 2 boundary (128 bytes) --- */

        /* size: 128, cachelines: 2, members: 24 */
};

The access count of node might be underestimated, b/c node is actually
aliased, so I kept it at its position.

Justus



reply via email to

[Prev in Thread] Current Thread [Next in Thread]