From 7558997d2f808368867ca7e5234e5793446e8f3f Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Thu, 31 Jan 2019 09:16:39 -0800 Subject: [PATCH] vs_alloc can underflow in L2ARC vdevs The current L2 ARC device code consistently uses psize to increment vs_alloc but varies between psize and lsize when decrementing it. The result of this behavior is that vs_alloc can be decremented more that it is incremented and underflow. This patch changes the code so asize is used anywhere. In addition, it ensures that vs_alloc gets incremented by the L2 ARC device code as buffers are written and not at the end of the l2arc_write_buffers() routine. The latter (and old) way would temporarily underflow vs_alloc as buffers that were just written, would be destroyed while l2arc_write_buffers() was still looping. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Serapheim Dimitropoulos Closes #8298 --- module/zfs/arc.c | 16 ++++++++++------ module/zfs/vdev.c | 6 ++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index f5d94cbf90..385a2edec3 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -3786,7 +3786,8 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr) { l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr; l2arc_dev_t *dev = l2hdr->b_dev; - uint64_t psize = arc_hdr_size(hdr); + uint64_t psize = HDR_GET_PSIZE(hdr); + uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, psize); ASSERT(MUTEX_HELD(&dev->l2ad_mtx)); ASSERT(HDR_HAS_L2HDR(hdr)); @@ -3796,9 +3797,10 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr) ARCSTAT_INCR(arcstat_l2_psize, -psize); ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr)); - vdev_space_update(dev->l2ad_vdev, -psize, 0, 0); + vdev_space_update(dev->l2ad_vdev, -asize, 0, 0); - (void) zfs_refcount_remove_many(&dev->l2ad_alloc, psize, hdr); + (void) zfs_refcount_remove_many(&dev->l2ad_alloc, arc_hdr_size(hdr), + hdr); arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR); } @@ -8296,10 +8298,12 @@ top: list_remove(buflist, hdr); arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR); - ARCSTAT_INCR(arcstat_l2_psize, -arc_hdr_size(hdr)); + uint64_t psize = HDR_GET_PSIZE(hdr); + ARCSTAT_INCR(arcstat_l2_psize, -psize); ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr)); - bytes_dropped += arc_hdr_size(hdr); + bytes_dropped += + vdev_psize_to_asize(dev->l2ad_vdev, psize); (void) zfs_refcount_remove_many(&dev->l2ad_alloc, arc_hdr_size(hdr), hdr); } @@ -9015,6 +9019,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) write_psize += psize; write_asize += asize; dev->l2ad_hand += asize; + vdev_space_update(dev->l2ad_vdev, asize, 0, 0); mutex_exit(hash_lock); @@ -9040,7 +9045,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) ARCSTAT_INCR(arcstat_l2_write_bytes, write_psize); ARCSTAT_INCR(arcstat_l2_lsize, write_lsize); ARCSTAT_INCR(arcstat_l2_psize, write_psize); - vdev_space_update(dev->l2ad_vdev, write_psize, 0, 0); /* * Bump device hand to the device start if it is approaching the end. diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 50d230ccb2..def98ad136 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -4162,6 +4162,11 @@ vdev_space_update(vdev_t *vd, int64_t alloc_delta, int64_t defer_delta, dspace_delta = vdev_deflated_space(vd, space_delta); mutex_enter(&vd->vdev_stat_lock); + /* ensure we won't underflow */ + if (alloc_delta < 0) { + ASSERT3U(vd->vdev_stat.vs_alloc, >=, -alloc_delta); + } + vd->vdev_stat.vs_alloc += alloc_delta; vd->vdev_stat.vs_space += space_delta; vd->vdev_stat.vs_dspace += dspace_delta; @@ -4169,6 +4174,7 @@ vdev_space_update(vdev_t *vd, int64_t alloc_delta, int64_t defer_delta, /* every class but log contributes to root space stats */ if (vd->vdev_mg != NULL && !vd->vdev_islog) { + ASSERT(!vd->vdev_isl2cache); mutex_enter(&rvd->vdev_stat_lock); rvd->vdev_stat.vs_alloc += alloc_delta; rvd->vdev_stat.vs_space += space_delta;