Merge f004d01ce4
into 63253dbf4f
This commit is contained in:
commit
c0ee77a5f0
|
@ -3087,6 +3087,10 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
|
||||||
(void) printf(gettext("invalid label"));
|
(void) printf(gettext("invalid label"));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case VDEV_AUX_POOL_USES_SHARED_L2ARC:
|
||||||
|
(void) printf(gettext("ignored in favor of cache devices in pool " ZFS_SHARED_L2ARC_POOL_NAME));
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
(void) printf(gettext("corrupted data"));
|
(void) printf(gettext("corrupted data"));
|
||||||
break;
|
break;
|
||||||
|
@ -3247,6 +3251,10 @@ print_import_config(status_cbdata_t *cb, const char *name, nvlist_t *nv,
|
||||||
(void) printf(gettext("invalid label"));
|
(void) printf(gettext("invalid label"));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case VDEV_AUX_POOL_USES_SHARED_L2ARC:
|
||||||
|
(void) printf(gettext("ignored in favor of cache devices in pool " ZFS_SHARED_L2ARC_POOL_NAME));
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
(void) printf(gettext("corrupted data"));
|
(void) printf(gettext("corrupted data"));
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -337,6 +337,7 @@ void arc_fini(void);
|
||||||
* Level 2 ARC
|
* Level 2 ARC
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#define ZFS_SHARED_L2ARC_POOL_NAME "NTNX-fsvm-local-l2arc"
|
||||||
void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
|
void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
|
||||||
void l2arc_remove_vdev(vdev_t *vd);
|
void l2arc_remove_vdev(vdev_t *vd);
|
||||||
boolean_t l2arc_vdev_present(vdev_t *vd);
|
boolean_t l2arc_vdev_present(vdev_t *vd);
|
||||||
|
|
|
@ -997,6 +997,7 @@ typedef enum vdev_state {
|
||||||
VDEV_STATE_REMOVED, /* Explicitly removed from system */
|
VDEV_STATE_REMOVED, /* Explicitly removed from system */
|
||||||
VDEV_STATE_CANT_OPEN, /* Tried to open, but failed */
|
VDEV_STATE_CANT_OPEN, /* Tried to open, but failed */
|
||||||
VDEV_STATE_FAULTED, /* External request to fault device */
|
VDEV_STATE_FAULTED, /* External request to fault device */
|
||||||
|
VDEV_STATE_UNUSED,
|
||||||
VDEV_STATE_DEGRADED, /* Replicated vdev with unhealthy kids */
|
VDEV_STATE_DEGRADED, /* Replicated vdev with unhealthy kids */
|
||||||
VDEV_STATE_HEALTHY /* Presumed good */
|
VDEV_STATE_HEALTHY /* Presumed good */
|
||||||
} vdev_state_t;
|
} vdev_state_t;
|
||||||
|
@ -1026,6 +1027,7 @@ typedef enum vdev_aux {
|
||||||
VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */
|
VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */
|
||||||
VDEV_AUX_BAD_ASHIFT, /* vdev ashift is invalid */
|
VDEV_AUX_BAD_ASHIFT, /* vdev ashift is invalid */
|
||||||
VDEV_AUX_EXTERNAL_PERSIST, /* persistent forced fault */
|
VDEV_AUX_EXTERNAL_PERSIST, /* persistent forced fault */
|
||||||
|
VDEV_AUX_POOL_USES_SHARED_L2ARC,
|
||||||
VDEV_AUX_ACTIVE, /* vdev active on a different host */
|
VDEV_AUX_ACTIVE, /* vdev active on a different host */
|
||||||
VDEV_AUX_CHILDREN_OFFLINE, /* all children are offline */
|
VDEV_AUX_CHILDREN_OFFLINE, /* all children are offline */
|
||||||
VDEV_AUX_ASHIFT_TOO_BIG, /* vdev's min block size is too large */
|
VDEV_AUX_ASHIFT_TOO_BIG, /* vdev's min block size is too large */
|
||||||
|
|
|
@ -208,6 +208,8 @@ zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
|
||||||
return (gettext("DEGRADED"));
|
return (gettext("DEGRADED"));
|
||||||
case VDEV_STATE_HEALTHY:
|
case VDEV_STATE_HEALTHY:
|
||||||
return (gettext("ONLINE"));
|
return (gettext("ONLINE"));
|
||||||
|
case VDEV_STATE_UNUSED:
|
||||||
|
return (gettext("UNUSED"));
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
250
module/zfs/arc.c
250
module/zfs/arc.c
|
@ -823,6 +823,8 @@ typedef struct l2arc_read_callback {
|
||||||
zbookmark_phys_t l2rcb_zb; /* original bookmark */
|
zbookmark_phys_t l2rcb_zb; /* original bookmark */
|
||||||
int l2rcb_flags; /* original flags */
|
int l2rcb_flags; /* original flags */
|
||||||
abd_t *l2rcb_abd; /* temporary buffer */
|
abd_t *l2rcb_abd; /* temporary buffer */
|
||||||
|
spa_t *l2rcb_primary_spa; /* original spa */
|
||||||
|
boolean_t l2rcb_need_exit_scl_l2arc_primary_spa;
|
||||||
} l2arc_read_callback_t;
|
} l2arc_read_callback_t;
|
||||||
|
|
||||||
typedef struct l2arc_data_free {
|
typedef struct l2arc_data_free {
|
||||||
|
@ -880,7 +882,7 @@ static uint32_t arc_bufc_to_flags(arc_buf_contents_t);
|
||||||
static inline void arc_hdr_set_flags(arc_buf_hdr_t *hdr, arc_flags_t flags);
|
static inline void arc_hdr_set_flags(arc_buf_hdr_t *hdr, arc_flags_t flags);
|
||||||
static inline void arc_hdr_clear_flags(arc_buf_hdr_t *hdr, arc_flags_t flags);
|
static inline void arc_hdr_clear_flags(arc_buf_hdr_t *hdr, arc_flags_t flags);
|
||||||
|
|
||||||
static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *);
|
static boolean_t l2arc_write_eligible(arc_buf_hdr_t *);
|
||||||
static void l2arc_read_done(zio_t *);
|
static void l2arc_read_done(zio_t *);
|
||||||
static void l2arc_do_free_on_write(void);
|
static void l2arc_do_free_on_write(void);
|
||||||
static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
|
static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
|
||||||
|
@ -3790,7 +3792,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)
|
||||||
if (HDR_HAS_L2HDR(hdr)) {
|
if (HDR_HAS_L2HDR(hdr)) {
|
||||||
ARCSTAT_INCR(arcstat_evict_l2_cached, HDR_GET_LSIZE(hdr));
|
ARCSTAT_INCR(arcstat_evict_l2_cached, HDR_GET_LSIZE(hdr));
|
||||||
} else {
|
} else {
|
||||||
if (l2arc_write_eligible(hdr->b_spa, hdr)) {
|
if (l2arc_write_eligible(hdr)) {
|
||||||
ARCSTAT_INCR(arcstat_evict_l2_eligible,
|
ARCSTAT_INCR(arcstat_evict_l2_eligible,
|
||||||
HDR_GET_LSIZE(hdr));
|
HDR_GET_LSIZE(hdr));
|
||||||
|
|
||||||
|
@ -5530,6 +5532,24 @@ arc_cached(spa_t *spa, const blkptr_t *bp)
|
||||||
return (flags);
|
return (flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static boolean_t
|
||||||
|
arc_read_l2arc_scl_l2arc_tryenter(vdev_t *l2arc_vdev)
|
||||||
|
{
|
||||||
|
ASSERT(l2arc_vdev);
|
||||||
|
|
||||||
|
/* we only use l2arc vdevs from the dedicated L2ARC pool */
|
||||||
|
ASSERT(l2arc_vdev->vdev_spa);
|
||||||
|
ASSERT(strcmp(spa_name(l2arc_vdev->vdev_spa), ZFS_SHARED_L2ARC_POOL_NAME) == 0);
|
||||||
|
|
||||||
|
return spa_config_tryenter(l2arc_vdev->vdev_spa, SCL_L2ARC, l2arc_vdev, RW_READER);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
arc_read_l2arc_scl_l2arc_exit(vdev_t *l2arc_vdev)
|
||||||
|
{
|
||||||
|
spa_config_exit(l2arc_vdev->vdev_spa, SCL_L2ARC, l2arc_vdev);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* "Read" the block at the specified DVA (in bp) via the
|
* "Read" the block at the specified DVA (in bp) via the
|
||||||
* cache. If the block is found in the cache, invoke the provided
|
* cache. If the block is found in the cache, invoke the provided
|
||||||
|
@ -5927,7 +5947,7 @@ top:
|
||||||
* Lock out L2ARC device removal.
|
* Lock out L2ARC device removal.
|
||||||
*/
|
*/
|
||||||
if (vdev_is_dead(vd) ||
|
if (vdev_is_dead(vd) ||
|
||||||
!spa_config_tryenter(spa, SCL_L2ARC, vd, RW_READER))
|
!arc_read_l2arc_scl_l2arc_tryenter(vd))
|
||||||
vd = NULL;
|
vd = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5965,8 +5985,13 @@ top:
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check if the spa even has l2 configured */
|
/* Check if the spa even has l2 configured */
|
||||||
const boolean_t spa_has_l2 = l2arc_ndev != 0 &&
|
/* XXX shared l2arc: if this pool is supposed to use the shared
|
||||||
spa->spa_l2cache.sav_count > 0;
|
* l2arc vdevs, the value should be the result of a check whether
|
||||||
|
* shared l2arc vdevs are available.
|
||||||
|
* On spa's that aren't supposed to use shared l2arc, bring back
|
||||||
|
* the original check from OpenZFS commit 666aa69f.
|
||||||
|
*/
|
||||||
|
const boolean_t spa_has_l2 = B_TRUE;
|
||||||
|
|
||||||
if (vd != NULL && spa_has_l2 && !(l2arc_norw && devw)) {
|
if (vd != NULL && spa_has_l2 && !(l2arc_norw && devw)) {
|
||||||
/*
|
/*
|
||||||
|
@ -5983,16 +6008,15 @@ top:
|
||||||
abd_t *abd;
|
abd_t *abd;
|
||||||
uint64_t asize;
|
uint64_t asize;
|
||||||
|
|
||||||
DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
|
|
||||||
ARCSTAT_BUMP(arcstat_l2_hits);
|
|
||||||
hdr->b_l2hdr.b_hits++;
|
|
||||||
|
|
||||||
cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
|
cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
|
||||||
KM_SLEEP);
|
KM_SLEEP);
|
||||||
cb->l2rcb_hdr = hdr;
|
cb->l2rcb_hdr = hdr;
|
||||||
cb->l2rcb_bp = *bp;
|
cb->l2rcb_bp = *bp;
|
||||||
cb->l2rcb_zb = *zb;
|
cb->l2rcb_zb = *zb;
|
||||||
cb->l2rcb_flags = zio_flags;
|
cb->l2rcb_flags = zio_flags;
|
||||||
|
cb->l2rcb_primary_spa = spa;
|
||||||
|
cb->l2rcb_need_exit_scl_l2arc_primary_spa =
|
||||||
|
B_FALSE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When Compressed ARC is disabled, but the
|
* When Compressed ARC is disabled, but the
|
||||||
|
@ -6018,6 +6042,38 @@ top:
|
||||||
addr + asize <= vd->vdev_psize -
|
addr + asize <= vd->vdev_psize -
|
||||||
VDEV_LABEL_END_SIZE);
|
VDEV_LABEL_END_SIZE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure the primary spa is still around
|
||||||
|
* for the fallback read that will be initiated
|
||||||
|
* by l2arc_read_done if the read from the l2arc
|
||||||
|
* vdev fails and it's ARC_FLAG_NOWAIT.
|
||||||
|
* l2arc_read_done is responsible for releasing.
|
||||||
|
* We use `cb` as the tag because unlike `vd`
|
||||||
|
* or `rzio`, we control its lifetime.
|
||||||
|
*/
|
||||||
|
if (spa != vd->vdev_spa) {
|
||||||
|
/*
|
||||||
|
* See comment in l2arc_read_done on how
|
||||||
|
* we are careful to avoid recursive
|
||||||
|
* calls to this function, which would
|
||||||
|
* result in an error.
|
||||||
|
*/
|
||||||
|
int ok = spa_config_tryenter(spa,
|
||||||
|
SCL_L2ARC, cb, RW_READER);
|
||||||
|
if (!ok) {
|
||||||
|
arc_read_l2arc_scl_l2arc_exit(vd);
|
||||||
|
/* XXX separate stat & probe?? */
|
||||||
|
DTRACE_PROBE1(l2arc__miss,
|
||||||
|
arc_buf_hdr_t *, hdr);
|
||||||
|
ARCSTAT_BUMP(arcstat_l2_misses);
|
||||||
|
goto l2arc_read_error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
|
||||||
|
ARCSTAT_BUMP(arcstat_l2_hits);
|
||||||
|
hdr->b_l2hdr.b_hits++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* l2arc read. The SCL_L2ARC lock will be
|
* l2arc read. The SCL_L2ARC lock will be
|
||||||
* released by l2arc_read_done().
|
* released by l2arc_read_done().
|
||||||
|
@ -6026,6 +6082,12 @@ top:
|
||||||
*/
|
*/
|
||||||
ASSERT3U(arc_hdr_get_compress(hdr), !=,
|
ASSERT3U(arc_hdr_get_compress(hdr), !=,
|
||||||
ZIO_COMPRESS_EMPTY);
|
ZIO_COMPRESS_EMPTY);
|
||||||
|
/*
|
||||||
|
* NB: if pio != NULL and spa != vd->vdev_spa,
|
||||||
|
* then this zio tree spans across multiple spa's.
|
||||||
|
* A manual code audit at the time of writing showed
|
||||||
|
* that the zio code is fine with that.
|
||||||
|
*/
|
||||||
rzio = zio_read_phys(pio, vd, addr,
|
rzio = zio_read_phys(pio, vd, addr,
|
||||||
asize, abd,
|
asize, abd,
|
||||||
ZIO_CHECKSUM_OFF,
|
ZIO_CHECKSUM_OFF,
|
||||||
|
@ -6052,6 +6114,7 @@ top:
|
||||||
if (zio_wait(rzio) == 0)
|
if (zio_wait(rzio) == 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
l2arc_read_error:
|
||||||
/* l2arc read error; goto zio_read() */
|
/* l2arc read error; goto zio_read() */
|
||||||
if (hash_lock != NULL)
|
if (hash_lock != NULL)
|
||||||
mutex_enter(hash_lock);
|
mutex_enter(hash_lock);
|
||||||
|
@ -6061,11 +6124,11 @@ top:
|
||||||
ARCSTAT_BUMP(arcstat_l2_misses);
|
ARCSTAT_BUMP(arcstat_l2_misses);
|
||||||
if (HDR_L2_WRITING(hdr))
|
if (HDR_L2_WRITING(hdr))
|
||||||
ARCSTAT_BUMP(arcstat_l2_rw_clash);
|
ARCSTAT_BUMP(arcstat_l2_rw_clash);
|
||||||
spa_config_exit(spa, SCL_L2ARC, vd);
|
arc_read_l2arc_scl_l2arc_exit(vd);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (vd != NULL)
|
if (vd != NULL)
|
||||||
spa_config_exit(spa, SCL_L2ARC, vd);
|
arc_read_l2arc_scl_l2arc_exit(vd);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only a spa with l2 should contribute to l2
|
* Only a spa with l2 should contribute to l2
|
||||||
|
@ -8110,7 +8173,7 @@ arc_fini(void)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static boolean_t
|
static boolean_t
|
||||||
l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr)
|
l2arc_write_eligible(arc_buf_hdr_t *hdr)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* A buffer is *not* eligible for the L2ARC if it:
|
* A buffer is *not* eligible for the L2ARC if it:
|
||||||
|
@ -8119,7 +8182,7 @@ l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr)
|
||||||
* 3. has an I/O in progress (it may be an incomplete read).
|
* 3. has an I/O in progress (it may be an incomplete read).
|
||||||
* 4. is flagged not eligible (zfs property).
|
* 4. is flagged not eligible (zfs property).
|
||||||
*/
|
*/
|
||||||
if (hdr->b_spa != spa_guid || HDR_HAS_L2HDR(hdr) ||
|
if (HDR_HAS_L2HDR(hdr) ||
|
||||||
HDR_IO_IN_PROGRESS(hdr) || !HDR_L2CACHE(hdr))
|
HDR_IO_IN_PROGRESS(hdr) || !HDR_L2CACHE(hdr))
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
|
|
||||||
|
@ -8547,9 +8610,31 @@ error:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
l2arc_fallback_read_done(zio_t *zio)
|
||||||
|
{
|
||||||
|
l2arc_read_callback_t *cb = zio->io_private;
|
||||||
|
ASSERT3P(zio->io_spa, ==, cb->l2rcb_primary_spa);
|
||||||
|
if (cb->l2rcb_need_exit_scl_l2arc_primary_spa) {
|
||||||
|
/*
|
||||||
|
* Release SCL_L2ARC before arc_read_done to avoid recursively
|
||||||
|
* spa_config_tryenter if arc_read_done calls arc_read again.
|
||||||
|
*/
|
||||||
|
spa_config_exit(cb->l2rcb_primary_spa, SCL_L2ARC, cb);
|
||||||
|
}
|
||||||
|
arc_buf_hdr_t *hdr = cb->l2rcb_hdr;
|
||||||
|
zio->io_private = hdr;
|
||||||
|
kmem_free(cb, sizeof (l2arc_read_callback_t));
|
||||||
|
arc_read_done(zio);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A read to a cache device completed. Validate buffer contents before
|
* A read to a cache device completed. Validate buffer contents before
|
||||||
* handing over to the regular ARC routines.
|
* handing over to the regular ARC routines.
|
||||||
|
*
|
||||||
|
* We take care of cleaning up any L2ARC read resources before calling
|
||||||
|
* arc_read_done to minimize overhead if the arc_read callbacks issue arc_read
|
||||||
|
* themselves. Ideally, the compiler turns the arc_read_done call into a jump.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
l2arc_read_done(zio_t *zio)
|
l2arc_read_done(zio_t *zio)
|
||||||
|
@ -8565,11 +8650,39 @@ l2arc_read_done(zio_t *zio)
|
||||||
ASSERT3P(zio->io_vd, !=, NULL);
|
ASSERT3P(zio->io_vd, !=, NULL);
|
||||||
ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE);
|
ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE);
|
||||||
|
|
||||||
spa_config_exit(zio->io_spa, SCL_L2ARC, zio->io_vd);
|
ASSERT(strcmp(spa_name(zio->io_spa), ZFS_SHARED_L2ARC_POOL_NAME) == 0);
|
||||||
|
|
||||||
ASSERT3P(cb, !=, NULL);
|
ASSERT3P(cb, !=, NULL);
|
||||||
hdr = cb->l2rcb_hdr;
|
hdr = cb->l2rcb_hdr;
|
||||||
ASSERT3P(hdr, !=, NULL);
|
ASSERT3P(hdr, !=, NULL);
|
||||||
|
ASSERT(cb->l2rcb_primary_spa);
|
||||||
|
|
||||||
|
boolean_t primay_spa_is_not_l2arc_spa =
|
||||||
|
zio->io_spa != cb->l2rcb_primary_spa;
|
||||||
|
/* Unblock L2ARC device removal. */
|
||||||
|
spa_config_exit(zio->io_spa, SCL_L2ARC, zio->io_vd);
|
||||||
|
if (primay_spa_is_not_l2arc_spa) {
|
||||||
|
/* We must keep holding SCL_L2ARC for l2rcb_primary_spa until
|
||||||
|
* after we have issued the fallback read, in order to prevent
|
||||||
|
* l2rcb_primary_spa from being exported (or worse, freed).
|
||||||
|
|
||||||
|
* However, we must release it before we call the `done`
|
||||||
|
* callback that was passed to the original arc_read. The
|
||||||
|
* reason is that the callback could issue another arc_read,
|
||||||
|
* resulting in a recursive call to spa_config_tryenter(primary
|
||||||
|
* spa, SCL_L2ARC). That call would fail if we still held
|
||||||
|
* SCL_L2ARC, and the code
|
||||||
|
* would treat is like an L2ARC miss, whereas it's most likely
|
||||||
|
* a hit.
|
||||||
|
*/
|
||||||
|
/* XXX: we should actually use refcount_held to assert that `cb`
|
||||||
|
* still holds the config lock */
|
||||||
|
ASSERT(spa_config_held(cb->l2rcb_primary_spa, SCL_L2ARC,
|
||||||
|
RW_READER));
|
||||||
|
} else {
|
||||||
|
/* XXX: assert that `cb` is not holding SCL_L2ARC */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
hash_lock = HDR_LOCK(hdr);
|
hash_lock = HDR_LOCK(hdr);
|
||||||
mutex_enter(hash_lock);
|
mutex_enter(hash_lock);
|
||||||
|
@ -8622,10 +8735,27 @@ l2arc_read_done(zio_t *zio)
|
||||||
*/
|
*/
|
||||||
ASSERT(zio->io_abd == hdr->b_l1hdr.b_pabd ||
|
ASSERT(zio->io_abd == hdr->b_l1hdr.b_pabd ||
|
||||||
(HDR_HAS_RABD(hdr) && zio->io_abd == hdr->b_crypt_hdr.b_rabd));
|
(HDR_HAS_RABD(hdr) && zio->io_abd == hdr->b_crypt_hdr.b_rabd));
|
||||||
|
/*
|
||||||
|
* Set the zio's bp to the one we're trying to fill from L2ARC.
|
||||||
|
* The bp contains the checksum, and both the arc_cksum_is_equal
|
||||||
|
* and the fallback zio_read depend on it being set correctly.
|
||||||
|
*/
|
||||||
zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */
|
zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */
|
||||||
zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */
|
zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */
|
||||||
zio->io_prop.zp_complevel = hdr->b_complevel;
|
zio->io_prop.zp_complevel = hdr->b_complevel;
|
||||||
|
|
||||||
|
#ifdef _KERNEL
|
||||||
|
if (0) {
|
||||||
|
zio_t *tmp = zio_unique_parent(zio);
|
||||||
|
cmn_err(CE_WARN, "zio->io_waiter=%p zio->io_spa=%s "
|
||||||
|
"zio_unique_parent(zio)=%p parent_is_godfather=%d "
|
||||||
|
"zio_unique_parent(zio)->io_spa=%s ",
|
||||||
|
zio->io_waiter, spa_name(zio->io_spa),
|
||||||
|
tmp, tmp ? !!(tmp->io_flags & ZIO_FLAG_GODFATHER) : -1,
|
||||||
|
tmp ? spa_name(tmp->io_spa) : "<NONE>");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
valid_cksum = arc_cksum_is_equal(hdr, zio);
|
valid_cksum = arc_cksum_is_equal(hdr, zio);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -8639,6 +8769,10 @@ l2arc_read_done(zio_t *zio)
|
||||||
if (valid_cksum && tfm_error == 0 && zio->io_error == 0 &&
|
if (valid_cksum && tfm_error == 0 && zio->io_error == 0 &&
|
||||||
!HDR_L2_EVICTED(hdr)) {
|
!HDR_L2_EVICTED(hdr)) {
|
||||||
mutex_exit(hash_lock);
|
mutex_exit(hash_lock);
|
||||||
|
if (primay_spa_is_not_l2arc_spa) {
|
||||||
|
spa_config_exit(cb->l2rcb_primary_spa, SCL_L2ARC, cb);
|
||||||
|
}
|
||||||
|
kmem_free(cb, sizeof (l2arc_read_callback_t));
|
||||||
zio->io_private = hdr;
|
zio->io_private = hdr;
|
||||||
arc_read_done(zio);
|
arc_read_done(zio);
|
||||||
} else {
|
} else {
|
||||||
|
@ -8649,6 +8783,10 @@ l2arc_read_done(zio_t *zio)
|
||||||
if (zio->io_error != 0) {
|
if (zio->io_error != 0) {
|
||||||
ARCSTAT_BUMP(arcstat_l2_io_error);
|
ARCSTAT_BUMP(arcstat_l2_io_error);
|
||||||
} else {
|
} else {
|
||||||
|
/*
|
||||||
|
* IO error ensures that arc_read does the fallback
|
||||||
|
* read for the case where zio->io_waiter != NULL.
|
||||||
|
*/
|
||||||
zio->io_error = SET_ERROR(EIO);
|
zio->io_error = SET_ERROR(EIO);
|
||||||
}
|
}
|
||||||
if (!valid_cksum || tfm_error != 0)
|
if (!valid_cksum || tfm_error != 0)
|
||||||
|
@ -8656,20 +8794,62 @@ l2arc_read_done(zio_t *zio)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there's no waiter, issue an async i/o to the primary
|
* If there's no waiter, issue an async i/o to the primary
|
||||||
* storage now. If there *is* a waiter, the caller must
|
* storage now. If there *is* a waiter, the caller must issue
|
||||||
* issue the i/o in a context where it's OK to block.
|
* the i/o in a context where it's OK to block.
|
||||||
|
*
|
||||||
|
* The above in more technical terms: If the arc_read was
|
||||||
|
* ARC_FLAG_NOWAIT, we issued the zio_read_phys to read from
|
||||||
|
* the l2arc vdev using zio_nowait and immediately returned
|
||||||
|
* from arc_read. But arc_read is supposed to transparently
|
||||||
|
* fall back to the primary storage. So, for the
|
||||||
|
* ARC_FLAG_NOWAIT case, we do that here. However, if there
|
||||||
|
* *is* a waiter, it's the ARC_FLAG_WAIT case in which case
|
||||||
|
* arc_read does the fallback read itself.
|
||||||
*/
|
*/
|
||||||
if (zio->io_waiter == NULL) {
|
if (zio->io_waiter == NULL) {
|
||||||
zio_t *pio = zio_unique_parent(zio);
|
zio_t *pio = zio_unique_parent(zio);
|
||||||
void *abd = (using_rdata) ?
|
void *abd = (using_rdata) ?
|
||||||
hdr->b_crypt_hdr.b_rabd : hdr->b_l1hdr.b_pabd;
|
hdr->b_crypt_hdr.b_rabd : hdr->b_l1hdr.b_pabd;
|
||||||
|
|
||||||
ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL);
|
/*
|
||||||
|
* The `pio` is the one that was passed to arc_read.
|
||||||
|
* We added `zio` as its (only) child.
|
||||||
|
* But arc_read can also be called with pio==NULL.
|
||||||
|
* In that case, zio_nowait sets zio's parent to
|
||||||
|
* spa_async_zio_root (aka the grandfather zio).
|
||||||
|
*/
|
||||||
|
if (pio) {
|
||||||
|
ASSERT3S(pio->io_child_type, ==, ZIO_CHILD_LOGICAL);
|
||||||
|
/*
|
||||||
|
* If we violated the follwoing assertion, we
|
||||||
|
* would be reading from the wrong spa in the
|
||||||
|
* fallback zio_read below. The caller of
|
||||||
|
* arc_read would likely observe an EIO due to
|
||||||
|
* invalid checksum. It depends on the caller
|
||||||
|
* what happens then. If the arc_read was for
|
||||||
|
* ZFS metadata it's likely that they have a
|
||||||
|
* VERIFY somewhere which would result in a
|
||||||
|
* kernel panic higher up the stack. If it's
|
||||||
|
* for user data, it's likely that we bubble up
|
||||||
|
* the error to userspace.
|
||||||
|
*
|
||||||
|
* Either way, the resulting behavior is
|
||||||
|
* misleading and would likely spark
|
||||||
|
* misdirected investigations into data
|
||||||
|
* integrity issues. So, just take the hit and
|
||||||
|
* do a VERIFY here to get a precise panic in
|
||||||
|
* case we have an implementation issue.
|
||||||
|
*/
|
||||||
|
VERIFY3P(pio->io_spa, ==, cb->l2rcb_primary_spa);
|
||||||
|
}
|
||||||
|
|
||||||
zio = zio_read(pio, zio->io_spa, zio->io_bp,
|
/* NB: io_bp has been copied from cb->l2rcb_bp above */
|
||||||
abd, zio->io_size, arc_read_done,
|
cb->l2rcb_need_exit_scl_l2arc_primary_spa =
|
||||||
hdr, zio->io_priority, cb->l2rcb_flags,
|
primay_spa_is_not_l2arc_spa;
|
||||||
&cb->l2rcb_zb);
|
zio_t *fallback_read = zio_read(pio,
|
||||||
|
cb->l2rcb_primary_spa, zio->io_bp,
|
||||||
|
abd, zio->io_size, l2arc_fallback_read_done, cb,
|
||||||
|
zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Original ZIO will be freed, so we need to update
|
* Original ZIO will be freed, so we need to update
|
||||||
|
@ -8681,13 +8861,22 @@ l2arc_read_done(zio_t *zio)
|
||||||
acb->acb_zio_head = zio;
|
acb->acb_zio_head = zio;
|
||||||
|
|
||||||
mutex_exit(hash_lock);
|
mutex_exit(hash_lock);
|
||||||
zio_nowait(zio);
|
zio_nowait(fallback_read);
|
||||||
|
/*
|
||||||
|
* We release the SCL_L2ARC on l2rcb_primary_spa in
|
||||||
|
* l2arc_fallback_read_done. See comment in there, at
|
||||||
|
* at the beginning of the function, for why we can't
|
||||||
|
* release it here.
|
||||||
|
*/
|
||||||
} else {
|
} else {
|
||||||
mutex_exit(hash_lock);
|
mutex_exit(hash_lock);
|
||||||
|
if (primay_spa_is_not_l2arc_spa) {
|
||||||
|
spa_config_exit(cb->l2rcb_primary_spa, SCL_L2ARC, cb);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
kmem_free(cb, sizeof (l2arc_read_callback_t));
|
kmem_free(cb, sizeof (l2arc_read_callback_t));
|
||||||
|
/* arc_read will do the fallback read */
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -8923,7 +9112,7 @@ retry:
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!HDR_HAS_L1HDR(hdr)) {
|
if (!HDR_HAS_L1HDR(hdr)) {
|
||||||
ASSERT(!HDR_L2_READING(hdr));
|
ASSERT(!HDR_L2_READING(hdr)); // why does this hold?
|
||||||
/*
|
/*
|
||||||
* This doesn't exist in the ARC. Destroy.
|
* This doesn't exist in the ARC. Destroy.
|
||||||
* arc_hdr_destroy() will call list_remove()
|
* arc_hdr_destroy() will call list_remove()
|
||||||
|
@ -9129,7 +9318,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
||||||
boolean_t full, from_head = !arc_warm;
|
boolean_t full, from_head = !arc_warm;
|
||||||
l2arc_write_callback_t *cb = NULL;
|
l2arc_write_callback_t *cb = NULL;
|
||||||
zio_t *pio, *wzio;
|
zio_t *pio, *wzio;
|
||||||
uint64_t guid = spa_load_guid(spa);
|
|
||||||
l2arc_dev_hdr_phys_t *l2dhdr = dev->l2ad_dev_hdr;
|
l2arc_dev_hdr_phys_t *l2dhdr = dev->l2ad_dev_hdr;
|
||||||
|
|
||||||
ASSERT3P(dev->l2ad_vdev, !=, NULL);
|
ASSERT3P(dev->l2ad_vdev, !=, NULL);
|
||||||
|
@ -9199,7 +9387,7 @@ skip:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!l2arc_write_eligible(guid, hdr)) {
|
if (!l2arc_write_eligible(hdr)) {
|
||||||
mutex_exit(hash_lock);
|
mutex_exit(hash_lock);
|
||||||
goto skip;
|
goto skip;
|
||||||
}
|
}
|
||||||
|
@ -9452,6 +9640,12 @@ l2arc_feed_thread(void *unused)
|
||||||
spa = dev->l2ad_spa;
|
spa = dev->l2ad_spa;
|
||||||
ASSERT3P(spa, !=, NULL);
|
ASSERT3P(spa, !=, NULL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We only allow add l2arc devices in pool NTNX_L2ARC_POOL_NAME
|
||||||
|
* to l2arc_dev_list.
|
||||||
|
*/
|
||||||
|
ASSERT0(strcmp(spa_name(spa), ZFS_SHARED_L2ARC_POOL_NAME));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the pool is read-only then force the feed thread to
|
* If the pool is read-only then force the feed thread to
|
||||||
* sleep a little longer.
|
* sleep a little longer.
|
||||||
|
@ -9611,6 +9805,8 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
|
||||||
|
|
||||||
ASSERT(!l2arc_vdev_present(vd));
|
ASSERT(!l2arc_vdev_present(vd));
|
||||||
|
|
||||||
|
VERIFY(strcmp(spa_name(spa), ZFS_SHARED_L2ARC_POOL_NAME) == 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create a new l2arc device entry.
|
* Create a new l2arc device entry.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -3482,7 +3482,28 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
|
||||||
} else {
|
} else {
|
||||||
ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size);
|
ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size);
|
||||||
}
|
}
|
||||||
ASSERT3P(zio->io_spa, ==, dpa->dpa_spa);
|
/* XXX: with shared l2arc, this ASSERT is incorrect.
|
||||||
|
* After conversation with Paul Dagnelie, who added it,
|
||||||
|
* we concluded that it's not necessary.
|
||||||
|
* The stack trace was :
|
||||||
|
* VERIFY3(zio->io_spa == dpa->dpa_spa) failed
|
||||||
|
* (ffff8fc81ba6c000== ffff8fc9c4aa8000)
|
||||||
|
* ^ l2arc spa ^primary spa
|
||||||
|
*
|
||||||
|
* dump_stack+0x6d/0x8b
|
||||||
|
* spl_dumpstack+0x29/0x2b [spl]
|
||||||
|
* spl_panic+0xd1/0xd3 [spl]
|
||||||
|
* dbuf_prefetch_indirect_done+0xdd/0x420 [zfs]
|
||||||
|
* arc_read_done+0x3e6/0x990 [zfs]
|
||||||
|
* l2arc_read_done+0x5a9/0x9a0 [zfs]
|
||||||
|
* zio_done+0x54d/0x1680 [zfs]
|
||||||
|
* zio_execute+0xe9/0x2e0 [zfs]
|
||||||
|
* taskq_thread+0x2ec/0x650 [spl]
|
||||||
|
* kthread+0x128/0x140
|
||||||
|
* ret_from_fork+0x35/0x40
|
||||||
|
*
|
||||||
|
* ASSERT3P(zio->io_spa, ==, dpa->dpa_spa);
|
||||||
|
*/
|
||||||
|
|
||||||
dpa->dpa_dnode = NULL;
|
dpa->dpa_dnode = NULL;
|
||||||
} else if (dpa->dpa_dnode != NULL) {
|
} else if (dpa->dpa_dnode != NULL) {
|
||||||
|
|
|
@ -2413,6 +2413,20 @@ spa_load_l2cache(spa_t *spa)
|
||||||
|
|
||||||
(void) vdev_validate_aux(vd);
|
(void) vdev_validate_aux(vd);
|
||||||
|
|
||||||
|
if (!vdev_is_dead(vd) &&
|
||||||
|
strcmp(spa_name(spa), ZFS_SHARED_L2ARC_POOL_NAME) != 0) {
|
||||||
|
cmn_err(CE_WARN, "pool '%s': l2arc devices outside of pool " ZFS_SHARED_L2ARC_POOL_NAME " will not be used and marked offline.", spa_name(spa));
|
||||||
|
/* mark device as dead */
|
||||||
|
vdev_set_state(vd, B_TRUE, VDEV_STATE_UNUSED,
|
||||||
|
VDEV_AUX_POOL_USES_SHARED_L2ARC);
|
||||||
|
/*
|
||||||
|
* Ensure that vdev_is_dead() returns true, so
|
||||||
|
* we don't l2arc_add_vdev below. The whole
|
||||||
|
* Shared L2ARC code relies on this.
|
||||||
|
*/
|
||||||
|
VERIFY(vdev_is_dead(vd));
|
||||||
|
}
|
||||||
|
|
||||||
if (!vdev_is_dead(vd))
|
if (!vdev_is_dead(vd))
|
||||||
l2arc_add_vdev(spa, vd);
|
l2arc_add_vdev(spa, vd);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue