kmem slab magazine ageing deadlock

- The previous magazine ageing sceme relied on the on_each_cpu()
  function to call spl_magazine_age() on each cpu.  It turns out
  this could deadlock with do_flush_tlb_all() which also relies
  on the IPI based on_each_cpu().  To avoid this problem a per-
  magazine delayed work item is created and indepentantly
  scheduled to the correct cpu removing the need for on_each_cpu().
- Additionally two unused fields were removed from the type
  spl_kmem_cache_t, they were hold overs from previous cleanup.
    - struct work_struct work
    - struct timer_list timer
This commit is contained in:
Brian Behlendorf 2009-02-17 15:52:18 -08:00
parent 1a944a7d0b
commit 9b1b8e4c24
2 changed files with 27 additions and 7 deletions

View File

@ -254,6 +254,8 @@ typedef struct spl_kmem_magazine {
uint32_t skm_avail; /* Available objects */ uint32_t skm_avail; /* Available objects */
uint32_t skm_size; /* Magazine size */ uint32_t skm_size; /* Magazine size */
uint32_t skm_refill; /* Batch refill size */ uint32_t skm_refill; /* Batch refill size */
struct spl_kmem_cache *skm_cache; /* Owned by cache */
struct delayed_work skm_work; /* Magazine reclaim work */
unsigned long skm_age; /* Last cache access */ unsigned long skm_age; /* Last cache access */
void *skm_objs[0]; /* Object pointers */ void *skm_objs[0]; /* Object pointers */
} spl_kmem_magazine_t; } spl_kmem_magazine_t;
@ -296,8 +298,6 @@ typedef struct spl_kmem_cache {
uint32_t skc_reap; /* Slab reclaim count */ uint32_t skc_reap; /* Slab reclaim count */
atomic_t skc_ref; /* Ref count callers */ atomic_t skc_ref; /* Ref count callers */
struct delayed_work skc_work; /* Slab reclaim work */ struct delayed_work skc_work; /* Slab reclaim work */
struct work_struct work;
struct timer_list timer;
struct list_head skc_list; /* List of caches linkage */ struct list_head skc_list; /* List of caches linkage */
struct list_head skc_complete_list;/* Completely alloc'ed */ struct list_head skc_complete_list;/* Completely alloc'ed */
struct list_head skc_partial_list; /* Partially alloc'ed */ struct list_head skc_partial_list; /* Partially alloc'ed */

View File

@ -932,12 +932,22 @@ spl_slab_reclaim(spl_kmem_cache_t *skc, int count, int flag)
static void static void
spl_magazine_age(void *data) spl_magazine_age(void *data)
{ {
spl_kmem_cache_t *skc = data; spl_kmem_magazine_t *skm =
spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()]; spl_get_work_data(data, spl_kmem_magazine_t, skm_work.work);
spl_kmem_cache_t *skc = skm->skm_cache;
int i = smp_processor_id();
ASSERT(skm->skm_magic == SKM_MAGIC);
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(skc->skc_mag[i] == skm);
if (skm->skm_avail > 0 && if (skm->skm_avail > 0 &&
time_after(jiffies, skm->skm_age + skc->skc_delay * HZ)) time_after(jiffies, skm->skm_age + skc->skc_delay * HZ))
(void)spl_cache_flush(skc, skm, skm->skm_refill); (void)spl_cache_flush(skc, skm, skm->skm_refill);
if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
schedule_delayed_work_on(i, &skm->skm_work,
skc->skc_delay / 3 * HZ);
} }
/* /*
@ -954,7 +964,6 @@ spl_cache_age(void *data)
ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skc->skc_magic == SKC_MAGIC);
spl_slab_reclaim(skc, skc->skc_reap, 0); spl_slab_reclaim(skc, skc->skc_reap, 0);
spl_on_each_cpu(spl_magazine_age, skc, 0);
if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags)) if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ); schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ);
@ -1050,6 +1059,8 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int node)
skm->skm_avail = 0; skm->skm_avail = 0;
skm->skm_size = skc->skc_mag_size; skm->skm_size = skc->skc_mag_size;
skm->skm_refill = skc->skc_mag_refill; skm->skm_refill = skc->skc_mag_refill;
skm->skm_cache = skc;
spl_init_delayed_work(&skm->skm_work, spl_magazine_age, skm);
skm->skm_age = jiffies; skm->skm_age = jiffies;
} }
@ -1095,6 +1106,11 @@ spl_magazine_create(spl_kmem_cache_t *skc)
} }
} }
/* Only after everything is allocated schedule magazine work */
for_each_online_cpu(i)
schedule_delayed_work_on(i, &skc->skc_mag[i]->skm_work,
skc->skc_delay / 3 * HZ);
RETURN(0); RETURN(0);
} }
@ -1245,6 +1261,7 @@ void
spl_kmem_cache_destroy(spl_kmem_cache_t *skc) spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
{ {
DECLARE_WAIT_QUEUE_HEAD(wq); DECLARE_WAIT_QUEUE_HEAD(wq);
int i;
ENTRY; ENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skc->skc_magic == SKC_MAGIC);
@ -1256,6 +1273,9 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
/* Cancel any and wait for any pending delayed work */ /* Cancel any and wait for any pending delayed work */
ASSERT(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags)); ASSERT(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags));
cancel_delayed_work(&skc->skc_work); cancel_delayed_work(&skc->skc_work);
for_each_online_cpu(i)
cancel_delayed_work(&skc->skc_mag[i]->skm_work);
flush_scheduled_work(); flush_scheduled_work();
/* Wait until all current callers complete, this is mainly /* Wait until all current callers complete, this is mainly