ZFS Version 2.2.1

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEETzupq20fjWg9wt+1athg7tRZgCcFAmVdHzkACgkQathg7tRZ
 gCdjTQ//e4+vzpqqtZypJzFjc/wJkRIMBnwH+xYkD6IysqXjfmCtPgu0D2BftAEB
 Gfb5c27c7K5Hxj605axlVlpxiPbbCcYm80z/nSwclpHLh+fwf383iyGzXXEbytt9
 t0KuhC+nPxwa5QQs7Q77B+EMpsIv90Zuy+aF8uEsU+jNgG6qgpVSCkS83I7CzKVu
 ooNKP5fzFyZS7sxWAKZb4g5j6DiXY3+f/ZGSNcYcApm1KB8dgmwaqNr4Um0otKVQ
 ZGiCyHNC4I7ig5no4tpLASepfV9pqSOeHmx+0PZPRGuwKCLmEyEtCJz+OkM1Ylc2
 AyjXUlfw8ZIKUWJ+ihA/gQb9YgKsRDpKAeUJNTDiUA5dqy/CJUIgZpEK+sXUvWJR
 kFzhpgkIImd+IAu57TTwNALJsjNJJ6ZREqbyLm9z4UF7vj+WOQk8IClY41IsC8Mj
 FNBz86INtiqzlLdTdLKUmWfL0v5WTV00pzWAqYjJ/R8PBoVS0TPV1M2n6YSxQBX0
 CJ64y8WKoaRXj1zbLJ+etNlaSXB+WZaMgr44GM/k9HIwGyTXRxFC+WG68RspUzNw
 q8Utu0tU7pP/1knKyX+7c/R3P7LmRVsRYdX0RDroQXClxeTuuqkMdmbngykKC+K9
 crf8PnbJJ0wRpZ4128KphjB42T45AL/Ojlx6jDVXY8wQ8MgGRkM=
 =F7aE
 -----END PGP SIGNATURE-----

Merge tag 'zfs-2.2.1' into truenas/zfs-2.2-release

ZFS Version 2.2.1

Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
This commit is contained in:
Ameer Hamza 2023-11-22 02:54:32 +05:00
commit 20331b2aec
127 changed files with 2801 additions and 823 deletions

21
.cirrus.yml Normal file
View File

@ -0,0 +1,21 @@
env:
CIRRUS_CLONE_DEPTH: 1
ARCH: amd64
build_task:
matrix:
freebsd_instance:
image_family: freebsd-12-4
freebsd_instance:
image_family: freebsd-13-2
freebsd_instance:
image_family: freebsd-14-0-snap
prepare_script:
- pkg install -y autoconf automake libtool gettext-runtime gmake ksh93 py39-packaging py39-cffi py39-sysctl
configure_script:
- env MAKE=gmake ./autogen.sh
- env MAKE=gmake ./configure --with-config="user" --with-python=3.9
build_script:
- gmake -j `sysctl -n kern.smp.cpus`
install_script:
- gmake install

3
.gitignore vendored
View File

@ -42,8 +42,10 @@
!udev/** !udev/**
!.editorconfig !.editorconfig
!.cirrus.yml
!.gitignore !.gitignore
!.gitmodules !.gitmodules
!.mailmap
!AUTHORS !AUTHORS
!autogen.sh !autogen.sh
!CODE_OF_CONDUCT.md !CODE_OF_CONDUCT.md
@ -60,7 +62,6 @@
!TEST !TEST
!zfs.release.in !zfs.release.in
# #
# Normal rules # Normal rules
# #

189
.mailmap Normal file
View File

@ -0,0 +1,189 @@
# This file maps the name+email seen in a commit back to a canonical
# name+email. Git will replace the commit name/email with the canonical version
# wherever it sees it.
#
# If there is a commit in the history with a "wrong" name or email, list it
# here. If you regularly commit with an alternate name or email address and
# would like to ensure that you are always listed consistently in the repo, add
# mapping here.
#
# On the other hand, if you use multiple names or email addresses legitimately
# (eg you use a company email address for your paid OpenZFS work, and a
# personal address for your evening side projects), then don't map one to the
# other here.
#
# The most common formats are:
#
# Canonical Name <canonical-email>
# Canonical Name <canonical-email> <commit-email>
# Canonical Name <canonical-email> Commit Name <commit-email>
#
# See https://git-scm.com/docs/gitmailmap for more info.
# These maps are making names consistent where they have varied but the email
# address has never changed. In most cases, the full name is in the
# Signed-off-by of a commit with a matching author.
Ahelenia Ziemiańska <nabijaczleweli@gmail.com>
Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Alex John <alex@stty.io>
Andreas Dilger <adilger@dilger.ca>
Andrew Walker <awalker@ixsystems.com>
Benedikt Neuffer <github@itfriend.de>
Chengfei Zhu <chengfeix.zhu@intel.com>
Chris Lindee <chris.lindee+github@gmail.com>
Colm Buckley <colm@tuatha.org>
Crag Wang <crag0715@gmail.com>
Damian Szuberski <szuberskidamian@gmail.com>
Daniel Kolesa <daniel@octaforge.org>
Debabrata Banerjee <dbavatar@gmail.com>
Finix Yan <yanchongwen@hotmail.com>
Gaurav Kumar <gauravk.18@gmail.com>
Gionatan Danti <g.danti@assyoma.it>
Glenn Washburn <development@efficientek.com>
Gordan Bobic <gordan.bobic@gmail.com>
Gregory Bartholomew <gregory.lee.bartholomew@gmail.com>
hedong zhang <h_d_zhang@163.com>
InsanePrawn <Insane.Prawny@gmail.com>
Jason Cohen <jwittlincohen@gmail.com>
Jason Harmening <jason.harmening@gmail.com>
Jeremy Faulkner <gldisater@gmail.com>
Jinshan Xiong <jinshan.xiong@gmail.com>
John Poduska <jpoduska@datto.com>
Justin Scholz <git@justinscholz.de>
Ka Ho Ng <khng300@gmail.com>
Kash Pande <github@tripleback.net>
Kay Pedersen <christianpe96@gmail.com>
KernelOfTruth <kerneloftruth@gmail.com>
Liu Hua <liu.hua130@zte.com.cn>
Liu Qing <winglq@gmail.com>
loli10K <ezomori.nozomu@gmail.com>
Matthias Blankertz <matthias@blankertz.org>
Michael Gmelin <grembo@FreeBSD.org>
Olivier Mazouffre <olivier.mazouffre@ims-bordeaux.fr>
Piotr Kubaj <pkubaj@anongoth.pl>
Quentin Zdanis <zdanisq@gmail.com>
Roberto Ricci <ricci@disroot.org>
Rob Norris <robn@despairlabs.com>
Rob Norris <rob.norris@klarasystems.com>
Sam Lunt <samuel.j.lunt@gmail.com>
Sanjeev Bagewadi <sanjeev.bagewadi@gmail.com>
Stoiko Ivanov <github@nomore.at>
Tamas TEVESZ <ice@extreme.hu>
WHR <msl0000023508@gmail.com>
Yanping Gao <yanping.gao@xtaotech.com>
Youzhong Yang <youzhong@gmail.com>
# Commits from strange places, long ago
Brian Behlendorf <behlendorf1@llnl.gov> <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
Brian Behlendorf <behlendorf1@llnl.gov> <behlendo@fedora-17-amd64.(none)>
Brian Behlendorf <behlendorf1@llnl.gov> <behlendo@myhost.(none)>
Brian Behlendorf <behlendorf1@llnl.gov> <ubuntu@ip-172-31-16-145.us-west-1.compute.internal>
Brian Behlendorf <behlendorf1@llnl.gov> <ubuntu@ip-172-31-20-6.us-west-1.compute.internal>
Herb Wartens <wartens2@llnl.gov> <wartens2@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
Ned Bass <bass6@llnl.gov> <bass6@zeno1.(none)>
Tulsi Jain <tulsi.jain@delphix.com> <tulsi.jain@Tulsi-Jains-MacBook-Pro.local>
# Mappings from Github no-reply addresses
ajs124 <git@ajs124.de> <ajs124@users.noreply.github.com>
Alek Pinchuk <apinchuk@axcient.com> <alek-p@users.noreply.github.com>
Alexander Lobakin <alobakin@pm.me> <solbjorn@users.noreply.github.com>
Alexey Smirnoff <fling@member.fsf.org> <fling-@users.noreply.github.com>
Allen Holl <allen.m.holl@gmail.com> <65494904+allen-4@users.noreply.github.com>
Ameer Hamza <ahamza@ixsystems.com> <106930537+ixhamza@users.noreply.github.com>
Andrew J. Hesford <ajh@sideband.org> <48421688+ahesford@users.noreply.github.com>>
Andrew Sun <me@andrewsun.com> <as-com@users.noreply.github.com>
Aron Xu <happyaron.xu@gmail.com> <happyaron@users.noreply.github.com>
Arun KV <arun.kv@datacore.com> <65647132+arun-kv@users.noreply.github.com>
Ben Wolsieffer <benwolsieffer@gmail.com> <lopsided98@users.noreply.github.com>
bernie1995 <bernie.pikes@gmail.com> <42413912+bernie1995@users.noreply.github.com>
Boris Protopopov <boris.protopopov@actifio.com> <bprotopopov@users.noreply.github.com>
Brad Forschinger <github@bnjf.id.au> <bnjf@users.noreply.github.com>
Brandon Thetford <brandon@dodecatec.com> <dodexahedron@users.noreply.github.com>
buzzingwires <buzzingwires@outlook.com> <131118055+buzzingwires@users.noreply.github.com>
Cedric Maunoury <cedric.maunoury@gmail.com> <38213715+cedricmaunoury@users.noreply.github.com>
Charles Suh <charles.suh@gmail.com> <charlessuh@users.noreply.github.com>
Dacian Reece-Stremtan <dacianstremtan@gmail.com> <35844628+dacianstremtan@users.noreply.github.com>
Damian Szuberski <szuberskidamian@gmail.com> <30863496+szubersk@users.noreply.github.com>
Daniel Hiepler <d-git@coderdu.de> <32984777+heeplr@users.noreply.github.com>
Daniel Kobras <d.kobras@science-computing.de> <sckobras@users.noreply.github.com>
Daniel Reichelt <hacking@nachtgeist.net> <nachtgeist@users.noreply.github.com>
David Quigley <david.quigley@intel.com> <dpquigl@users.noreply.github.com>
DHE <git@dehacked.net> <DeHackEd@users.noreply.github.com>
Dmitri John Ledkov <dimitri.ledkov@canonical.com> <19779+xnox@users.noreply.github.com>
Dries Michiels <driesm.michiels@gmail.com> <32487486+driesmp@users.noreply.github.com>
Edmund Nadolski <edmund.nadolski@ixsystems.com> <137826107+ednadolski-ix@users.noreply.github.com>
Érico Nogueira <erico.erc@gmail.com> <34201958+ericonr@users.noreply.github.com>
Fedor Uporov <fuporov.vstack@gmail.com> <60701163+fuporovvStack@users.noreply.github.com>
Felix Dörre <felix@dogcraft.de> <felixdoerre@users.noreply.github.com>
Felix Neumärker <xdch47@posteo.de> <34678034+xdch47@users.noreply.github.com>
Finix Yan <yancw@info2soft.com> <Finix1979@users.noreply.github.com>
Gaurav Kumar <gauravk.18@gmail.com> <gaurkuma@users.noreply.github.com>
George Gaydarov <git@gg7.io> <gg7@users.noreply.github.com>
Georgy Yakovlev <gyakovlev@gentoo.org> <168902+gyakovlev@users.noreply.github.com>
Gerardwx <gerardw@alum.mit.edu> <Gerardwx@users.noreply.github.com>
Gian-Carlo DeFazio <defazio1@llnl.gov> <defaziogiancarlo@users.noreply.github.com>
Giuseppe Di Natale <dinatale2@llnl.gov> <dinatale2@users.noreply.github.com>
Hajo Möller <dasjoe@gmail.com> <dasjoe@users.noreply.github.com>
Harry Mallon <hjmallon@gmail.com> <1816667+hjmallon@users.noreply.github.com>
Hiếu Lê <leorize+oss@disroot.org> <alaviss@users.noreply.github.com>
Jake Howard <git@theorangeone.net> <RealOrangeOne@users.noreply.github.com>
James Cowgill <james.cowgill@mips.com> <jcowgill@users.noreply.github.com>
Jason King <jason.king@joyent.com> <jasonbking@users.noreply.github.com>
Jeff Dike <jdike@akamai.com> <52420226+jdike@users.noreply.github.com>
Jitendra Patidar <jitendra.patidar@nutanix.com> <53164267+jsai20@users.noreply.github.com>
João Carlos Mendes Luís <jonny@jonny.eng.br> <dioni21@users.noreply.github.com>
John Eismeier <john.eismeier@gmail.com> <32205350+jeis2497052@users.noreply.github.com>
John L. Hammond <john.hammond@intel.com> <35266395+jhammond-intel@users.noreply.github.com>
John-Mark Gurney <jmg@funkthat.com> <jmgurney@users.noreply.github.com>
John Ramsden <johnramsden@riseup.net> <johnramsden@users.noreply.github.com>
Jonathon Fernyhough <jonathon@m2x.dev> <559369+jonathonf@users.noreply.github.com>
Justin Hibbits <chmeeedalf@gmail.com> <chmeeedalf@users.noreply.github.com>
Kevin Jin <lostking2008@hotmail.com> <33590050+jxdking@users.noreply.github.com>
Kevin P. Fleming <kevin@km6g.us> <kpfleming@users.noreply.github.com>
Krzysztof Piecuch <piecuch@kpiecuch.pl> <3964215+pikrzysztof@users.noreply.github.com>
Kyle Evans <kevans@FreeBSD.org> <kevans91@users.noreply.github.com>
Laurențiu Nicola <lnicola@dend.ro> <lnicola@users.noreply.github.com>
loli10K <ezomori.nozomu@gmail.com> <loli10K@users.noreply.github.com>
Lorenz Hüdepohl <dev@stellardeath.org> <lhuedepohl@users.noreply.github.com>
Luís Henriques <henrix@camandro.org> <73643340+lumigch@users.noreply.github.com>
Marcin Skarbek <git@skarbek.name> <mskarbek@users.noreply.github.com>
Matt Fiddaman <github@m.fiddaman.uk> <81489167+matt-fidd@users.noreply.github.com>
Max Zettlmeißl <max@zettlmeissl.de> <6818198+maxz@users.noreply.github.com>
Michael Niewöhner <foss@mniewoehner.de> <c0d3z3r0@users.noreply.github.com>
Michael Zhivich <mzhivich@akamai.com> <33133421+mzhivich@users.noreply.github.com>
Mo Zhou <cdluminate@gmail.com> <5723047+cdluminate@users.noreply.github.com>
Nick Mattis <nickm970@gmail.com> <nmattis@users.noreply.github.com>
omni <omni+vagant@hack.org> <79493359+omnivagant@users.noreply.github.com>
Pablo Correa Gómez <ablocorrea@hotmail.com> <32678034+pablofsf@users.noreply.github.com>
Paul Zuchowski <pzuchowski@datto.com> <31706010+PaulZ-98@users.noreply.github.com>
Peter Ashford <ashford@accs.com> <pashford@users.noreply.github.com>
Peter Dave Hello <hsu@peterdavehello.org> <PeterDaveHello@users.noreply.github.com>
Peter Wirdemo <peter.wirdemo@gmail.com> <4224155+pewo@users.noreply.github.com>
Petros Koutoupis <petros@petroskoutoupis.com> <pkoutoupis@users.noreply.github.com>
Ping Huang <huangping@smartx.com> <101400146+hpingfs@users.noreply.github.com>
Piotr P. Stefaniak <pstef@freebsd.org> <pstef@users.noreply.github.com>
Richard Allen <belperite@gmail.com> <33836503+belperite@users.noreply.github.com>
Rich Ercolani <rincebrain@gmail.com> <214141+rincebrain@users.noreply.github.com>
Rob Wing <rob.wing@klarasystems.com> <98866084+rob-wing@users.noreply.github.com>
Roman Strashkin <roman.strashkin@nexenta.com> <Ramzec@users.noreply.github.com>
Ryan Hirasaki <ryanhirasaki@gmail.com> <4690732+RyanHir@users.noreply.github.com>
Samuel Wycliffe J <samwyc@hpe.com> <115969550+samwyc@users.noreply.github.com>
Samuel Wycliffe <samuelwycliffe@gmail.com> <50765275+npc203@users.noreply.github.com>
Savyasachee Jha <hi@savyasacheejha.com> <savyajha@users.noreply.github.com>
Scott Colby <scott@scolby.com> <scolby33@users.noreply.github.com>
Sean Eric Fagan <kithrup@mac.com> <kithrup@users.noreply.github.com>
Spencer Kinny <spencerkinny1995@gmail.com> <30333052+Spencer-Kinny@users.noreply.github.com>
Srikanth N S <srikanth.nagasubbaraoseetharaman@hpe.com> <75025422+nssrikanth@users.noreply.github.com>
Thomas Geppert <geppi@digitx.de> <geppi@users.noreply.github.com>
Tim Crawford <tcrawford@datto.com> <crawfxrd@users.noreply.github.com>
Tom Matthews <tom@axiom-partners.com> <tomtastic@users.noreply.github.com>
Tony Perkins <tperkins@datto.com> <62951051+tony-zfs@users.noreply.github.com>
Torsten Wörtwein <twoertwein@gmail.com> <twoertwein@users.noreply.github.com>
Tulsi Jain <tulsi.jain@delphix.com> <TulsiJain@users.noreply.github.com>
Václav Skála <skala@vshosting.cz> <33496485+vaclavskala@users.noreply.github.com>
Violet Purcell <vimproved@inventati.org> <66446404+vimproved@users.noreply.github.com>
Vipin Kumar Verma <vipin.verma@hpe.com> <75025470+vermavipinkumar@users.noreply.github.com>
Wolfgang Bumiller <w.bumiller@proxmox.com> <Blub@users.noreply.github.com>
xtouqh <xtouqh@hotmail.com> <72357159+xtouqh@users.noreply.github.com>
Yuri Pankov <yuripv@FreeBSD.org> <113725409+yuripv@users.noreply.github.com>
Yuri Pankov <yuripv@FreeBSD.org> <82001006+yuripv@users.noreply.github.com>

365
AUTHORS
View File

@ -10,228 +10,450 @@ PAST MAINTAINERS:
CONTRIBUTORS: CONTRIBUTORS:
Aaron Fineman <abyxcos@gmail.com> Aaron Fineman <abyxcos@gmail.com>
Adam D. Moss <c@yotes.com>
Adam Leventhal <ahl@delphix.com> Adam Leventhal <ahl@delphix.com>
Adam Stevko <adam.stevko@gmail.com> Adam Stevko <adam.stevko@gmail.com>
adisbladis <adis@blad.is>
Adrian Chadd <adrian@freebsd.org>
Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Ahmed G <ahmedg@delphix.com> Ahmed G <ahmedg@delphix.com>
Aidan Harris <me@aidanharr.is>
AJ Jordan <alex@strugee.net>
ajs124 <git@ajs124.de>
Akash Ayare <aayare@delphix.com> Akash Ayare <aayare@delphix.com>
Akash B <akash-b@hpe.com>
Alan Somers <asomers@gmail.com> Alan Somers <asomers@gmail.com>
Alar Aun <spamtoaun@gmail.com> Alar Aun <spamtoaun@gmail.com>
Albert Lee <trisk@nexenta.com> Albert Lee <trisk@nexenta.com>
Alec Salazar <alec.j.salazar@gmail.com> Alec Salazar <alec.j.salazar@gmail.com>
Alejandro Colomar <Colomar.6.4.3@GMail.com>
Alejandro R. Sedeño <asedeno@mit.edu> Alejandro R. Sedeño <asedeno@mit.edu>
Alek Pinchuk <alek@nexenta.com> Alek Pinchuk <alek@nexenta.com>
Aleksa Sarai <cyphar@cyphar.com> Aleksa Sarai <cyphar@cyphar.com>
Alexander Eremin <a.eremin@nexenta.com>
Alexander Lobakin <alobakin@pm.me>
Alexander Motin <mav@freebsd.org>
Alexander Pyhalov <apyhalov@gmail.com>
Alexander Richardson <Alexander.Richardson@cl.cam.ac.uk>
Alexander Stetsenko <ams@nexenta.com>
Alex Braunegg <alex.braunegg@gmail.com> Alex Braunegg <alex.braunegg@gmail.com>
Alexey Shvetsov <alexxy@gentoo.org>
Alexey Smirnoff <fling@member.fsf.org>
Alex John <alex@stty.io>
Alex McWhirter <alexmcwhirter@triadic.us> Alex McWhirter <alexmcwhirter@triadic.us>
Alex Reece <alex@delphix.com> Alex Reece <alex@delphix.com>
Alex Wilson <alex.wilson@joyent.com> Alex Wilson <alex.wilson@joyent.com>
Alex Zhuravlev <alexey.zhuravlev@intel.com> Alex Zhuravlev <alexey.zhuravlev@intel.com>
Alexander Eremin <a.eremin@nexenta.com>
Alexander Motin <mav@freebsd.org>
Alexander Pyhalov <apyhalov@gmail.com>
Alexander Stetsenko <ams@nexenta.com>
Alexey Shvetsov <alexxy@gentoo.org>
Alexey Smirnoff <fling@member.fsf.org>
Allan Jude <allanjude@freebsd.org> Allan Jude <allanjude@freebsd.org>
Allen Holl <allen.m.holl@gmail.com>
alteriks <alteriks@gmail.com>
Alyssa Ross <hi@alyssa.is>
Ameer Hamza <ahamza@ixsystems.com>
Anatoly Borodin <anatoly.borodin@gmail.com>
AndCycle <andcycle@andcycle.idv.tw> AndCycle <andcycle@andcycle.idv.tw>
Andrea Gelmini <andrea.gelmini@gelma.net>
Andrea Righi <andrea.righi@canonical.com>
Andreas Buschmann <andreas.buschmann@tech.net.de> Andreas Buschmann <andreas.buschmann@tech.net.de>
Andreas Dilger <adilger@intel.com> Andreas Dilger <adilger@intel.com>
Andreas Vögele <andreas@andreasvoegele.com>
Andrew Barnes <barnes333@gmail.com> Andrew Barnes <barnes333@gmail.com>
Andrew Hamilton <ahamilto@tjhsst.edu> Andrew Hamilton <ahamilto@tjhsst.edu>
Andrew Innes <andrew.c12@gmail.com>
Andrew J. Hesford <ajh@sideband.org>
Andrew Reid <ColdCanuck@nailedtotheperch.com> Andrew Reid <ColdCanuck@nailedtotheperch.com>
Andrew Stormont <andrew.stormont@nexenta.com> Andrew Stormont <andrew.stormont@nexenta.com>
Andrew Sun <me@andrewsun.com>
Andrew Tselischev <andrewtselischev@gmail.com> Andrew Tselischev <andrewtselischev@gmail.com>
Andrew Turner <andrew@fubar.geek.nz>
Andrew Walker <awalker@ixsystems.com>
Andrey Prokopenko <job@terem.fr>
Andrey Vesnovaty <andrey.vesnovaty@gmail.com> Andrey Vesnovaty <andrey.vesnovaty@gmail.com>
Andriy Gapon <avg@freebsd.org> Andriy Gapon <avg@freebsd.org>
Andy Bakun <github@thwartedefforts.org> Andy Bakun <github@thwartedefforts.org>
Andy Fiddaman <omnios@citrus-it.co.uk>
Aniruddha Shankar <k@191a.net> Aniruddha Shankar <k@191a.net>
Anton Gubarkov <anton.gubarkov@gmail.com>
Antonio Russo <antonio.e.russo@gmail.com> Antonio Russo <antonio.e.russo@gmail.com>
Arkadiusz Bubała <arkadiusz.bubala@open-e.com> Arkadiusz Bubała <arkadiusz.bubala@open-e.com>
Armin Wehrfritz <dkxls23@gmail.com>
Arne Jansen <arne@die-jansens.de> Arne Jansen <arne@die-jansens.de>
Aron Xu <happyaron.xu@gmail.com> Aron Xu <happyaron.xu@gmail.com>
Arshad Hussain <arshad.hussain@aeoncomputing.com>
Arun KV <arun.kv@datacore.com>
Arvind Sankar <nivedita@alum.mit.edu>
Attila Fülöp <attila@fueloep.org>
Avatat <kontakt@avatat.pl>
Bart Coddens <bart.coddens@gmail.com> Bart Coddens <bart.coddens@gmail.com>
Basil Crow <basil.crow@delphix.com> Basil Crow <basil.crow@delphix.com>
Huang Liu <liu.huang@zte.com.cn> Bassu <bassu@phi9.com>
Ben Allen <bsallen@alcf.anl.gov> Ben Allen <bsallen@alcf.anl.gov>
Ben Rubson <ben.rubson@gmail.com> Ben Cordero <bencord0@condi.me>
Benedikt Neuffer <github@itfriend.de>
Benjamin Albrecht <git@albrecht.io> Benjamin Albrecht <git@albrecht.io>
Benjamin Gentil <benjgentil.pro@gmail.com>
Ben McGough <bmcgough@fredhutch.org>
Ben Rubson <ben.rubson@gmail.com>
Ben Wolsieffer <benwolsieffer@gmail.com>
bernie1995 <bernie.pikes@gmail.com>
Bill McGonigle <bill-github.com-public1@bfccomputing.com> Bill McGonigle <bill-github.com-public1@bfccomputing.com>
Bill Pijewski <wdp@joyent.com> Bill Pijewski <wdp@joyent.com>
Boris Protopopov <boris.protopopov@nexenta.com> Boris Protopopov <boris.protopopov@nexenta.com>
Brad Forschinger <github@bnjf.id.au>
Brad Lewis <brad.lewis@delphix.com> Brad Lewis <brad.lewis@delphix.com>
Brandon Thetford <brandon@dodecatec.com>
Brian Atkinson <bwa@g.clemson.edu>
Brian Behlendorf <behlendorf1@llnl.gov> Brian Behlendorf <behlendorf1@llnl.gov>
Brian J. Murrell <brian@sun.com> Brian J. Murrell <brian@sun.com>
Brooks Davis <brooks@one-eyed-alien.net>
BtbN <btbn@btbn.de>
bunder2015 <omfgbunder@gmail.com>
buzzingwires <buzzingwires@outlook.com>
bzzz77 <bzzz.tomas@gmail.com>
cable2999 <cable2999@users.noreply.github.com>
Caleb James DeLisle <calebdelisle@lavabit.com> Caleb James DeLisle <calebdelisle@lavabit.com>
Cao Xuewen <cao.xuewen@zte.com.cn> Cao Xuewen <cao.xuewen@zte.com.cn>
Carlo Landmeter <clandmeter@gmail.com> Carlo Landmeter <clandmeter@gmail.com>
Carlos Alberto Lopez Perez <clopez@igalia.com> Carlos Alberto Lopez Perez <clopez@igalia.com>
Cedric Maunoury <cedric.maunoury@gmail.com>
Chaoyu Zhang <zhang.chaoyu@zte.com.cn> Chaoyu Zhang <zhang.chaoyu@zte.com.cn>
Charles Suh <charles.suh@gmail.com>
Chen Can <chen.can2@zte.com.cn> Chen Can <chen.can2@zte.com.cn>
Chengfei Zhu <chengfeix.zhu@intel.com>
Chen Haiquan <oc@yunify.com> Chen Haiquan <oc@yunify.com>
Chip Parker <aparker@enthought.com> Chip Parker <aparker@enthought.com>
Chris Burroughs <chris.burroughs@gmail.com> Chris Burroughs <chris.burroughs@gmail.com>
Chris Dunlap <cdunlap@llnl.gov> Chris Dunlap <cdunlap@llnl.gov>
Chris Dunlop <chris@onthe.net.au> Chris Dunlop <chris@onthe.net.au>
Chris Lindee <chris.lindee+github@gmail.com>
Chris McDonough <chrism@plope.com>
Chris Siden <chris.siden@delphix.com> Chris Siden <chris.siden@delphix.com>
Chris Wedgwood <cw@f00f.org> Chris Siebenmann <cks.github@cs.toronto.edu>
Chris Williamson <chris.williamson@delphix.com>
Chris Zubrzycki <github@mid-earth.net>
Christ Schlacta <aarcane@aarcane.info>
Christer Ekholm <che@chrekh.se> Christer Ekholm <che@chrekh.se>
Christian Kohlschütter <christian@kohlschutter.com> Christian Kohlschütter <christian@kohlschutter.com>
Christian Neukirchen <chneukirchen@gmail.com> Christian Neukirchen <chneukirchen@gmail.com>
Christian Schwarz <me@cschwarz.com> Christian Schwarz <me@cschwarz.com>
Christopher Voltz <cjunk@voltz.ws> Christopher Voltz <cjunk@voltz.ws>
Christ Schlacta <aarcane@aarcane.info>
Chris Wedgwood <cw@f00f.org>
Chris Williamson <chris.williamson@delphix.com>
Chris Zubrzycki <github@mid-earth.net>
Chuck Tuffli <ctuffli@gmail.com>
Chunwei Chen <david.chen@nutanix.com> Chunwei Chen <david.chen@nutanix.com>
Clemens Fruhwirth <clemens@endorphin.org> Clemens Fruhwirth <clemens@endorphin.org>
Clemens Lang <cl@clang.name>
Clint Armstrong <clint@clintarmstrong.net>
Coleman Kane <ckane@colemankane.org> Coleman Kane <ckane@colemankane.org>
Colin Ian King <colin.king@canonical.com> Colin Ian King <colin.king@canonical.com>
Colm Buckley <colm@tuatha.org>
Crag Wang <crag0715@gmail.com>
Craig Loomis <cloomis@astro.princeton.edu> Craig Loomis <cloomis@astro.princeton.edu>
Craig Sanders <github@taz.net.au> Craig Sanders <github@taz.net.au>
Cyril Plisko <cyril.plisko@infinidat.com> Cyril Plisko <cyril.plisko@infinidat.com>
DHE <git@dehacked.net> Cy Schubert <cy@FreeBSD.org>
Cédric Berger <cedric@precidata.com>
Dacian Reece-Stremtan <dacianstremtan@gmail.com>
Dag-Erling Smørgrav <des@FreeBSD.org>
Damiano Albani <damiano.albani@gmail.com>
Damian Szuberski <szuberskidamian@gmail.com>
Damian Wojsław <damian@wojslaw.pl> Damian Wojsław <damian@wojslaw.pl>
Daniel Hiepler <d-git@coderdu.de>
Daniel Hoffman <dj.hoffman@delphix.com>
Daniel Kobras <d.kobras@science-computing.de>
Daniel Kolesa <daniel@octaforge.org>
Daniel Reichelt <hacking@nachtgeist.net>
Daniel Stevenson <bot@dstev.net>
Daniel Verite <daniel@verite.pro>
Daniil Lunev <d.lunev.mail@gmail.com>
Dan Kimmel <dan.kimmel@delphix.com> Dan Kimmel <dan.kimmel@delphix.com>
Dan McDonald <danmcd@nexenta.com> Dan McDonald <danmcd@nexenta.com>
Dan Swartzendruber <dswartz@druber.com> Dan Swartzendruber <dswartz@druber.com>
Dan Vatca <dan.vatca@gmail.com> Dan Vatca <dan.vatca@gmail.com>
Daniel Hoffman <dj.hoffman@delphix.com>
Daniel Verite <daniel@verite.pro>
Daniil Lunev <d.lunev.mail@gmail.com>
Darik Horn <dajhorn@vanadac.com> Darik Horn <dajhorn@vanadac.com>
Dave Eddy <dave@daveeddy.com> Dave Eddy <dave@daveeddy.com>
David Hedberg <david@qzx.se>
David Lamparter <equinox@diac24.net> David Lamparter <equinox@diac24.net>
David Qian <david.qian@intel.com> David Qian <david.qian@intel.com>
David Quigley <david.quigley@intel.com> David Quigley <david.quigley@intel.com>
Debabrata Banerjee <dbanerje@akamai.com> Debabrata Banerjee <dbanerje@akamai.com>
D. Ebdrup <debdrup@freebsd.org>
Denys Rtveliashvili <denys@rtveliashvili.name> Denys Rtveliashvili <denys@rtveliashvili.name>
Derek Dai <daiderek@gmail.com> Derek Dai <daiderek@gmail.com>
DHE <git@dehacked.net>
Didier Roche <didrocks@ubuntu.com>
Dimitri John Ledkov <xnox@ubuntu.com> Dimitri John Ledkov <xnox@ubuntu.com>
Dimitry Andric <dimitry@andric.com>
Dirkjan Bussink <d.bussink@gmail.com>
Dmitry Khasanov <pik4ez@gmail.com> Dmitry Khasanov <pik4ez@gmail.com>
Dominic Pearson <dsp@technoanimal.net>
Dominik Hassler <hadfl@omniosce.org> Dominik Hassler <hadfl@omniosce.org>
Dominik Honnef <dominikh@fork-bomb.org> Dominik Honnef <dominikh@fork-bomb.org>
Don Brady <don.brady@delphix.com> Don Brady <don.brady@delphix.com>
Doug Rabson <dfr@rabson.org>
Dr. András Korn <korn-github.com@elan.rulez.org> Dr. András Korn <korn-github.com@elan.rulez.org>
Dries Michiels <driesm.michiels@gmail.com>
Edmund Nadolski <edmund.nadolski@ixsystems.com>
Eitan Adler <lists@eitanadler.com>
Eli Rosenthal <eli.rosenthal@delphix.com> Eli Rosenthal <eli.rosenthal@delphix.com>
Eli Schwartz <eschwartz93@gmail.com>
Eric Desrochers <eric.desrochers@canonical.com> Eric Desrochers <eric.desrochers@canonical.com>
Eric Dillmann <eric@jave.fr> Eric Dillmann <eric@jave.fr>
Eric Schrock <Eric.Schrock@delphix.com> Eric Schrock <Eric.Schrock@delphix.com>
Ethan Coe-Renner <coerenner1@llnl.gov>
Etienne Dechamps <etienne@edechamps.fr> Etienne Dechamps <etienne@edechamps.fr>
Evan Allrich <eallrich@gmail.com>
Evan Harris <eharris@puremagic.com>
Evan Susarret <evansus@gmail.com> Evan Susarret <evansus@gmail.com>
Fabian Grünbichler <f.gruenbichler@proxmox.com> Fabian Grünbichler <f.gruenbichler@proxmox.com>
Fabio Buso <dev.siroibaf@gmail.com>
Fabio Scaccabarozzi <fsvm88@gmail.com>
Fajar A. Nugraha <github@fajar.net> Fajar A. Nugraha <github@fajar.net>
Fan Yong <fan.yong@intel.com> Fan Yong <fan.yong@intel.com>
fbynite <fbynite@users.noreply.github.com>
Fedor Uporov <fuporov.vstack@gmail.com>
Felix Dörre <felix@dogcraft.de>
Felix Neumärker <xdch47@posteo.de>
Feng Sun <loyou85@gmail.com> Feng Sun <loyou85@gmail.com>
Finix Yan <yancw@info2soft.com>
Francesco Mazzoli <f@mazzo.li>
Frederik Wessels <wessels147@gmail.com> Frederik Wessels <wessels147@gmail.com>
Frédéric Vanniere <f.vanniere@planet-work.com> Frédéric Vanniere <f.vanniere@planet-work.com>
Gabriel A. Devenyi <gdevenyi@gmail.com>
Garrett D'Amore <garrett@nexenta.com> Garrett D'Amore <garrett@nexenta.com>
Garrett Fields <ghfields@gmail.com>
Garrison Jensen <garrison.jensen@gmail.com> Garrison Jensen <garrison.jensen@gmail.com>
Gary Mills <gary_mills@fastmail.fm> Gary Mills <gary_mills@fastmail.fm>
Gaurav Kumar <gauravk.18@gmail.com> Gaurav Kumar <gauravk.18@gmail.com>
GeLiXin <ge.lixin@zte.com.cn> GeLiXin <ge.lixin@zte.com.cn>
George Amanakis <g_amanakis@yahoo.com> George Amanakis <g_amanakis@yahoo.com>
George Diamantopoulos <georgediam@gmail.com>
George Gaydarov <git@gg7.io>
George Melikov <mail@gmelikov.ru> George Melikov <mail@gmelikov.ru>
George Wilson <gwilson@delphix.com> George Wilson <gwilson@delphix.com>
Georgy Yakovlev <ya@sysdump.net> Georgy Yakovlev <ya@sysdump.net>
Gerardwx <gerardw@alum.mit.edu>
Gian-Carlo DeFazio <defazio1@llnl.gov>
Gionatan Danti <g.danti@assyoma.it>
Giuseppe Di Natale <guss80@gmail.com> Giuseppe Di Natale <guss80@gmail.com>
Glenn Washburn <development@efficientek.com>
Gordan Bobic <gordan@redsleeve.org> Gordan Bobic <gordan@redsleeve.org>
Gordon Bergling <gbergling@googlemail.com>
Gordon Ross <gwr@nexenta.com> Gordon Ross <gwr@nexenta.com>
Graham Christensen <graham@grahamc.com>
Graham Perrin <grahamperrin@gmail.com>
Gregor Kopka <gregor@kopka.net> Gregor Kopka <gregor@kopka.net>
Gregory Bartholomew <gregory.lee.bartholomew@gmail.com>
grembo <freebsd@grem.de>
Grischa Zengel <github.zfsonlinux@zengel.info> Grischa Zengel <github.zfsonlinux@zengel.info>
grodik <pat@litke.dev>
Gunnar Beutner <gunnar@beutner.name> Gunnar Beutner <gunnar@beutner.name>
Gvozden Neskovic <neskovic@gmail.com> Gvozden Neskovic <neskovic@gmail.com>
Hajo Möller <dasjoe@gmail.com> Hajo Möller <dasjoe@gmail.com>
Han Gao <rabenda.cn@gmail.com>
Hans Rosenfeld <hans.rosenfeld@nexenta.com> Hans Rosenfeld <hans.rosenfeld@nexenta.com>
Harald van Dijk <harald@gigawatt.nl>
Harry Mallon <hjmallon@gmail.com>
Harry Sintonen <github-piru@kyber.fi>
HC <mmttdebbcc@yahoo.com>
hedong zhang <h_d_zhang@163.com>
Heitor Alves de Siqueira <halves@canonical.com>
Henrik Riomar <henrik.riomar@gmail.com>
Herb Wartens <wartens2@llnl.gov>
Hiếu Lê <leorize+oss@disroot.org>
Huang Liu <liu.huang@zte.com.cn>
Håkan Johansson <f96hajo@chalmers.se> Håkan Johansson <f96hajo@chalmers.se>
Igor K <igor@dilos.org>
Igor Kozhukhov <ikozhukhov@gmail.com> Igor Kozhukhov <ikozhukhov@gmail.com>
Igor Lvovsky <ilvovsky@gmail.com> Igor Lvovsky <ilvovsky@gmail.com>
ilbsmart <wgqimut@gmail.com>
illiliti <illiliti@protonmail.com>
ilovezfs <ilovezfs@icloud.com>
InsanePrawn <Insane.Prawny@gmail.com>
Isaac Huang <he.huang@intel.com> Isaac Huang <he.huang@intel.com>
JK Dingwall <james@dingwall.me.uk>
Jacek Fefliński <feflik@gmail.com> Jacek Fefliński <feflik@gmail.com>
Jacob Adams <tookmund@gmail.com>
Jake Howard <git@theorangeone.net>
James Cowgill <james.cowgill@mips.com> James Cowgill <james.cowgill@mips.com>
James H <james@kagisoft.co.uk>
James Lee <jlee@thestaticvoid.com> James Lee <jlee@thestaticvoid.com>
James Pan <jiaming.pan@yahoo.com> James Pan <jiaming.pan@yahoo.com>
James Wah <james@laird-wah.net>
Jan Engelhardt <jengelh@inai.de> Jan Engelhardt <jengelh@inai.de>
Jan Kryl <jan.kryl@nexenta.com> Jan Kryl <jan.kryl@nexenta.com>
Jan Sanislo <oystr@cs.washington.edu> Jan Sanislo <oystr@cs.washington.edu>
Jason Cohen <jwittlincohen@gmail.com>
Jason Harmening <jason.harmening@gmail.com>
Jason King <jason.brian.king@gmail.com> Jason King <jason.brian.king@gmail.com>
Jason Zaman <jasonzaman@gmail.com> Jason Zaman <jasonzaman@gmail.com>
Javen Wu <wu.javen@gmail.com> Javen Wu <wu.javen@gmail.com>
Jean-Baptiste Lallement <jean-baptiste@ubuntu.com>
Jeff Dike <jdike@akamai.com>
Jeremy Faulkner <gldisater@gmail.com>
Jeremy Gill <jgill@parallax-innovations.com> Jeremy Gill <jgill@parallax-innovations.com>
Jeremy Jones <jeremy@delphix.com> Jeremy Jones <jeremy@delphix.com>
Jeremy Visser <jeremy.visser@gmail.com>
Jerry Jelinek <jerry.jelinek@joyent.com> Jerry Jelinek <jerry.jelinek@joyent.com>
Jessica Clarke <jrtc27@jrtc27.com>
Jinshan Xiong <jinshan.xiong@intel.com> Jinshan Xiong <jinshan.xiong@intel.com>
Jitendra Patidar <jitendra.patidar@nutanix.com>
JK Dingwall <james@dingwall.me.uk>
Joe Stein <joe.stein@delphix.com> Joe Stein <joe.stein@delphix.com>
John-Mark Gurney <jmg@funkthat.com>
John Albietz <inthecloud247@gmail.com> John Albietz <inthecloud247@gmail.com>
John Eismeier <john.eismeier@gmail.com> John Eismeier <john.eismeier@gmail.com>
John L. Hammond <john.hammond@intel.com> John Gallagher <john.gallagher@delphix.com>
John Layman <jlayman@sagecloud.com> John Layman <jlayman@sagecloud.com>
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> John L. Hammond <john.hammond@intel.com>
John Wren Kennedy <john.kennedy@delphix.com> John M. Layman <jml@frijid.net>
Johnny Stenback <github@jstenback.com> Johnny Stenback <github@jstenback.com>
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
John Poduska <jpoduska@datto.com>
John Ramsden <johnramsden@riseup.net>
John Wren Kennedy <john.kennedy@delphix.com>
jokersus <lolivampireslave@gmail.com>
Jonathon Fernyhough <jonathon@m2x.dev>
Jorgen Lundman <lundman@lundman.net> Jorgen Lundman <lundman@lundman.net>
Josef 'Jeff' Sipek <josef.sipek@nexenta.com> Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Josh Soref <jsoref@users.noreply.github.com>
Joshua M. Clulow <josh@sysmgr.org> Joshua M. Clulow <josh@sysmgr.org>
José Luis Salvador Rufo <salvador.joseluis@gmail.com>
João Carlos Mendes Luís <jonny@jonny.eng.br>
Julian Brunner <julian.brunner@gmail.com>
Julian Heuking <JulianH@beckhoff.com>
jumbi77 <jumbi77@users.noreply.github.com>
Justin Bedő <cu@cua0.org> Justin Bedő <cu@cua0.org>
Justin Gottula <justin@jgottula.com>
Justin Hibbits <chmeeedalf@gmail.com>
Justin Keogh <github.com@v6y.net>
Justin Lecher <jlec@gentoo.org> Justin Lecher <jlec@gentoo.org>
Justin Scholz <git@justinscholz.de>
Justin T. Gibbs <gibbs@FreeBSD.org> Justin T. Gibbs <gibbs@FreeBSD.org>
jyxent <jordanp@gmail.com>
Jörg Thalheim <joerg@higgsboson.tk> Jörg Thalheim <joerg@higgsboson.tk>
KORN Andras <korn@elan.rulez.org> ka7 <ka7@la-evento.com>
Ka Ho Ng <khng@FreeBSD.org>
Kamil Domański <kamil@domanski.co> Kamil Domański <kamil@domanski.co>
Karsten Kretschmer <kkretschmer@gmail.com> Karsten Kretschmer <kkretschmer@gmail.com>
Kash Pande <kash@tripleback.net> Kash Pande <kash@tripleback.net>
Kay Pedersen <christianpe96@gmail.com>
Keith M Wesolowski <wesolows@foobazco.org> Keith M Wesolowski <wesolows@foobazco.org>
KernelOfTruth <kerneloftruth@gmail.com>
Kevin Bowling <kevin.bowling@kev009.com>
Kevin Jin <lostking2008@hotmail.com>
Kevin P. Fleming <kevin@km6g.us>
Kevin Tanguy <kevin.tanguy@ovh.net> Kevin Tanguy <kevin.tanguy@ovh.net>
KireinaHoro <i@jsteward.moe> KireinaHoro <i@jsteward.moe>
Kjeld Schouten-Lebbing <kjeld@schouten-lebbing.nl> Kjeld Schouten-Lebbing <kjeld@schouten-lebbing.nl>
Kleber Tarcísio <klebertarcisio@yahoo.com.br>
Kody A Kantor <kody.kantor@gmail.com>
Kohsuke Kawaguchi <kk@kohsuke.org> Kohsuke Kawaguchi <kk@kohsuke.org>
Konstantin Khorenko <khorenko@virtuozzo.com>
KORN Andras <korn@elan.rulez.org>
Kristof Provost <github@sigsegv.be>
Krzysztof Piecuch <piecuch@kpiecuch.pl>
Kyle Blatter <kyleblatter@llnl.gov> Kyle Blatter <kyleblatter@llnl.gov>
Kyle Evans <kevans@FreeBSD.org>
Kyle Fuller <inbox@kylefuller.co.uk> Kyle Fuller <inbox@kylefuller.co.uk>
Loli <ezomori.nozomu@gmail.com> Laevos <Laevos@users.noreply.github.com>
Lalufu <Lalufu@users.noreply.github.com>
Lars Johannsen <laj@it.dk> Lars Johannsen <laj@it.dk>
Laura Hild <lsh@jlab.org>
Laurențiu Nicola <lnicola@dend.ro>
Lauri Tirkkonen <lauri@hacktheplanet.fi>
liaoyuxiangqin <guo.yong33@zte.com.cn>
Li Dongyang <dongyang.li@anu.edu.au> Li Dongyang <dongyang.li@anu.edu.au>
Liu Hua <liu.hua130@zte.com.cn>
Liu Qing <winglq@gmail.com>
Li Wei <W.Li@Sun.COM> Li Wei <W.Li@Sun.COM>
Loli <ezomori.nozomu@gmail.com>
lorddoskias <lorddoskias@gmail.com>
Lorenz Brun <lorenz@dolansoft.org>
Lorenz Hüdepohl <dev@stellardeath.org>
louwrentius <louwrentius@gmail.com>
Lukas Wunner <lukas@wunner.de> Lukas Wunner <lukas@wunner.de>
luozhengzheng <luo.zhengzheng@zte.com.cn>
Luís Henriques <henrix@camandro.org>
Madhav Suresh <madhav.suresh@delphix.com> Madhav Suresh <madhav.suresh@delphix.com>
manfromafar <jonsonb10@gmail.com>
Manoj Joseph <manoj.joseph@delphix.com> Manoj Joseph <manoj.joseph@delphix.com>
Manuel Amador (Rudd-O) <rudd-o@rudd-o.com> Manuel Amador (Rudd-O) <rudd-o@rudd-o.com>
Marcel Huber <marcelhuberfoo@gmail.com> Marcel Huber <marcelhuberfoo@gmail.com>
Marcel Menzel <mail@mcl.gg>
Marcel Schilling <marcel.schilling@uni-luebeck.de>
Marcel Telka <marcel.telka@nexenta.com> Marcel Telka <marcel.telka@nexenta.com>
Marcel Wysocki <maci.stgn@gmail.com> Marcel Wysocki <maci.stgn@gmail.com>
Marcin Skarbek <git@skarbek.name>
Mariusz Zaborski <mariusz.zaborski@klarasystems.com>
Mark Johnston <markj@FreeBSD.org>
Mark Maybee <mark.maybee@delphix.com>
Mark Roper <markroper@gmail.com>
Mark Shellenbaum <Mark.Shellenbaum@Oracle.COM> Mark Shellenbaum <Mark.Shellenbaum@Oracle.COM>
marku89 <mar42@kola.li>
Mark Wright <markwright@internode.on.net> Mark Wright <markwright@internode.on.net>
Martin Matuska <mm@FreeBSD.org> Martin Matuska <mm@FreeBSD.org>
Martin Rüegg <martin.rueegg@metaworx.ch>
Massimo Maggi <me@massimo-maggi.eu> Massimo Maggi <me@massimo-maggi.eu>
Matt Johnston <matt@fugro-fsi.com.au> Mateusz Guzik <mjguzik@gmail.com>
Matt Kemp <matt@mattikus.com> Mateusz Piotrowski <0mp@FreeBSD.org>
Mathieu Velten <matmaul@gmail.com>
Matt Fiddaman <github@m.fiddaman.uk>
Matthew Ahrens <matt@delphix.com> Matthew Ahrens <matt@delphix.com>
Matthew Thode <mthode@mthode.org> Matthew Thode <mthode@mthode.org>
Matthias Blankertz <matthias@blankertz.org>
Matt Johnston <matt@fugro-fsi.com.au>
Matt Kemp <matt@mattikus.com>
Matt Macy <mmacy@freebsd.org>
Matus Kral <matuskral@me.com> Matus Kral <matuskral@me.com>
Mauricio Faria de Oliveira <mfo@canonical.com>
Max Grossman <max.grossman@delphix.com> Max Grossman <max.grossman@delphix.com>
Maximilian Mehnert <maximilian.mehnert@gmx.de> Maximilian Mehnert <maximilian.mehnert@gmx.de>
Max Zettlmeißl <max@zettlmeissl.de>
Md Islam <mdnahian@outlook.com>
megari <megari@iki.fi>
Michael D Labriola <michael.d.labriola@gmail.com>
Michael Franzl <michael@franzl.name>
Michael Gebetsroither <michael@mgeb.org> Michael Gebetsroither <michael@mgeb.org>
Michael Kjorling <michael@kjorling.se> Michael Kjorling <michael@kjorling.se>
Michael Martin <mgmartin.mgm@gmail.com> Michael Martin <mgmartin.mgm@gmail.com>
Michael Niewöhner <foss@mniewoehner.de> Michael Niewöhner <foss@mniewoehner.de>
Michael Zhivich <mzhivich@akamai.com>
Michal Vasilek <michal@vasilek.cz>
Mike Gerdts <mike.gerdts@joyent.com> Mike Gerdts <mike.gerdts@joyent.com>
Mike Harsch <mike@harschsystems.com> Mike Harsch <mike@harschsystems.com>
Mike Leddy <mike.leddy@gmail.com> Mike Leddy <mike.leddy@gmail.com>
Mike Swanson <mikeonthecomputer@gmail.com> Mike Swanson <mikeonthecomputer@gmail.com>
Milan Jurik <milan.jurik@xylab.cz> Milan Jurik <milan.jurik@xylab.cz>
Minsoo Choo <minsoochoo0122@proton.me>
Mohamed Tawfik <m_tawfik@aucegypt.edu>
Morgan Jones <mjones@rice.edu> Morgan Jones <mjones@rice.edu>
Moritz Maxeiner <moritz@ucworks.org> Moritz Maxeiner <moritz@ucworks.org>
Mo Zhou <cdluminate@gmail.com>
naivekun <naivekun@outlook.com>
nathancheek <myself@nathancheek.com>
Nathaniel Clark <Nathaniel.Clark@misrule.us> Nathaniel Clark <Nathaniel.Clark@misrule.us>
Nathaniel Wesley Filardo <nwf@cs.jhu.edu> Nathaniel Wesley Filardo <nwf@cs.jhu.edu>
Nathan Lewis <linux.robotdude@gmail.com>
Nav Ravindranath <nav@delphix.com> Nav Ravindranath <nav@delphix.com>
Neal Gompa (ニール・ゴンパ) <ngompa13@gmail.com> Neal Gompa (ニール・ゴンパ) <ngompa13@gmail.com>
Ned Bass <bass6@llnl.gov> Ned Bass <bass6@llnl.gov>
Neependra Khare <neependra@kqinfotech.com> Neependra Khare <neependra@kqinfotech.com>
Neil Stockbridge <neil@dist.ro> Neil Stockbridge <neil@dist.ro>
Nick Black <dank@qemfd.net>
Nick Garvey <garvey.nick@gmail.com> Nick Garvey <garvey.nick@gmail.com>
Nick Mattis <nickm970@gmail.com>
Nick Terrell <terrelln@fb.com>
Niklas Haas <github-c6e1c8@haasn.xyz>
Nikolay Borisov <n.borisov.lkml@gmail.com> Nikolay Borisov <n.borisov.lkml@gmail.com>
nordaux <nordaux@gmail.com>
ofthesun9 <olivier@ofthesun.net>
Olaf Faaland <faaland1@llnl.gov> Olaf Faaland <faaland1@llnl.gov>
Oleg Drokin <green@linuxhacker.ru> Oleg Drokin <green@linuxhacker.ru>
Oleg Stepura <oleg@stepura.com> Oleg Stepura <oleg@stepura.com>
Olivier Mazouffre <olivier.mazouffre@ims-bordeaux.fr>
omni <omni+vagant@hack.org>
Orivej Desh <orivej@gmx.fr>
Pablo Correa Gómez <ablocorrea@hotmail.com>
Palash Gandhi <pbg4930@rit.edu>
Patrick Mooney <pmooney@pfmooney.com>
Patrik Greco <sikevux@sikevux.se> Patrik Greco <sikevux@sikevux.se>
Paul B. Henson <henson@acm.org> Paul B. Henson <henson@acm.org>
Paul Dagnelie <pcd@delphix.com> Paul Dagnelie <pcd@delphix.com>
@ -243,69 +465,160 @@ CONTRIBUTORS:
Pedro Giffuni <pfg@freebsd.org> Pedro Giffuni <pfg@freebsd.org>
Peng <peng.hse@xtaotech.com> Peng <peng.hse@xtaotech.com>
Peter Ashford <ashford@accs.com> Peter Ashford <ashford@accs.com>
Peter Dave Hello <hsu@peterdavehello.org>
Peter Levine <plevine457@gmail.com>
Peter Wirdemo <peter.wirdemo@gmail.com>
Petros Koutoupis <petros@petroskoutoupis.com>
Philip Pokorny <ppokorny@penguincomputing.com>
Philipp Riederer <pt@philipptoelke.de>
Phil Kauffman <philip@kauffman.me>
Ping Huang <huangping@smartx.com>
Piotr Kubaj <pkubaj@anongoth.pl>
Piotr P. Stefaniak <pstef@freebsd.org>
Prakash Surya <prakash.surya@delphix.com> Prakash Surya <prakash.surya@delphix.com>
Prasad Joshi <prasadjoshi124@gmail.com> Prasad Joshi <prasadjoshi124@gmail.com>
privb0x23 <privb0x23@users.noreply.github.com>
P.SCH <p88@yahoo.com>
Quentin Zdanis <zdanisq@gmail.com>
Rafael Kitover <rkitover@gmail.com>
RageLtMan <sempervictus@users.noreply.github.com>
Ralf Ertzinger <ralf@skytale.net> Ralf Ertzinger <ralf@skytale.net>
Randall Mason <ClashTheBunny@gmail.com> Randall Mason <ClashTheBunny@gmail.com>
Remy Blank <remy.blank@pobox.com> Remy Blank <remy.blank@pobox.com>
renelson <bnelson@nelsonbe.com>
Reno Reckling <e-github@wthack.de>
Ricardo M. Correia <ricardo.correia@oracle.com> Ricardo M. Correia <ricardo.correia@oracle.com>
Rich Ercolani <rincebrain@gmail.com> Riccardo Schirone <rschirone91@gmail.com>
Richard Allen <belperite@gmail.com>
Richard Elling <Richard.Elling@RichardElling.com> Richard Elling <Richard.Elling@RichardElling.com>
Richard Laager <rlaager@wiktel.com> Richard Laager <rlaager@wiktel.com>
Richard Lowe <richlowe@richlowe.net> Richard Lowe <richlowe@richlowe.net>
Richard Sharpe <rsharpe@samba.org> Richard Sharpe <rsharpe@samba.org>
Richard Yao <ryao@gentoo.org> Richard Yao <ryao@gentoo.org>
Rich Ercolani <rincebrain@gmail.com>
Robert Novak <sailnfool@gmail.com>
Roberto Ricci <ricci@disroot.org>
Rob Norris <robn@despairlabs.com>
Rob Wing <rew@FreeBSD.org>
Rohan Puri <rohan.puri15@gmail.com> Rohan Puri <rohan.puri15@gmail.com>
Romain Dolbeau <romain.dolbeau@atos.net> Romain Dolbeau <romain.dolbeau@atos.net>
Roman Strashkin <roman.strashkin@nexenta.com> Roman Strashkin <roman.strashkin@nexenta.com>
Ross Williams <ross@ross-williams.net>
Ruben Kerkhof <ruben@rubenkerkhof.com> Ruben Kerkhof <ruben@rubenkerkhof.com>
Ryan Hirasaki <ryanhirasaki@gmail.com>
Ryan Lahfa <masterancpp@gmail.com>
Ryan Libby <rlibby@FreeBSD.org>
Ryan Moeller <freqlabs@FreeBSD.org>
Sam Hathaway <github.com@munkynet.org>
Sam Lunt <samuel.j.lunt@gmail.com>
Samuel VERSCHELDE <stormi-github@ylix.fr>
Samuel Wycliffe <samuelwycliffe@gmail.com>
Samuel Wycliffe J <samwyc@hpe.com>
Sanjeev Bagewadi <sanjeev.bagewadi@gmail.com>
Sara Hartse <sara.hartse@delphix.com>
Saso Kiselkov <saso.kiselkov@nexenta.com> Saso Kiselkov <saso.kiselkov@nexenta.com>
Satadru Pramanik <satadru@gmail.com>
Savyasachee Jha <genghizkhan91@hawkradius.com>
Scott Colby <scott@scolby.com>
Scot W. Stevenson <scot.stevenson@gmail.com> Scot W. Stevenson <scot.stevenson@gmail.com>
Sean Eric Fagan <sef@ixsystems.com> Sean Eric Fagan <sef@ixsystems.com>
Sebastian Gottschall <s.gottschall@dd-wrt.com> Sebastian Gottschall <s.gottschall@dd-wrt.com>
Sebastien Roy <seb@delphix.com>
Sen Haerens <sen@senhaerens.be> Sen Haerens <sen@senhaerens.be>
Serapheim Dimitropoulos <serapheim@delphix.com> Serapheim Dimitropoulos <serapheim@delphix.com>
Seth Forshee <seth.forshee@canonical.com> Seth Forshee <seth.forshee@canonical.com>
Shaan Nobee <sniper111@gmail.com>
Shampavman <sham.pavman@nexenta.com> Shampavman <sham.pavman@nexenta.com>
Shaun Tancheff <shaun@aeonazure.com>
Shen Yan <shenyanxxxy@qq.com> Shen Yan <shenyanxxxy@qq.com>
Simon Guest <simon.guest@tesujimath.org> Simon Guest <simon.guest@tesujimath.org>
Simon Klinkert <simon.klinkert@gmail.com> Simon Klinkert <simon.klinkert@gmail.com>
Sowrabha Gopal <sowrabha.gopal@delphix.com> Sowrabha Gopal <sowrabha.gopal@delphix.com>
Spencer Kinny <spencerkinny1995@gmail.com>
Srikanth N S <srikanth.nagasubbaraoseetharaman@hpe.com>
Stanislav Seletskiy <s.seletskiy@gmail.com> Stanislav Seletskiy <s.seletskiy@gmail.com>
Steffen Müthing <steffen.muething@iwr.uni-heidelberg.de> Steffen Müthing <steffen.muething@iwr.uni-heidelberg.de>
Stephen Blinick <stephen.blinick@delphix.com> Stephen Blinick <stephen.blinick@delphix.com>
sterlingjensen <sterlingjensen@users.noreply.github.com>
Steve Dougherty <sdougherty@barracuda.com> Steve Dougherty <sdougherty@barracuda.com>
Steve Mokris <smokris@softpixel.com>
Steven Burgess <sburgess@dattobackup.com> Steven Burgess <sburgess@dattobackup.com>
Steven Hartland <smh@freebsd.org> Steven Hartland <smh@freebsd.org>
Steven Johnson <sjohnson@sakuraindustries.com> Steven Johnson <sjohnson@sakuraindustries.com>
Steven Noonan <steven@uplinklabs.net>
stf <s@ctrlc.hu>
Stian Ellingsen <stian@plaimi.net> Stian Ellingsen <stian@plaimi.net>
Stoiko Ivanov <github@nomore.at>
Stéphane Lesimple <speed47_github@speed47.net>
Suman Chakravartula <schakrava@gmail.com> Suman Chakravartula <schakrava@gmail.com>
Sydney Vanda <sydney.m.vanda@intel.com> Sydney Vanda <sydney.m.vanda@intel.com>
Sören Tempel <soeren+git@soeren-tempel.net> Sören Tempel <soeren+git@soeren-tempel.net>
Tamas TEVESZ <ice@extreme.hu>
Teodor Spæren <teodor_spaeren@riseup.net>
TerraTech <TerraTech@users.noreply.github.com>
Thijs Cramer <thijs.cramer@gmail.com> Thijs Cramer <thijs.cramer@gmail.com>
Thomas Geppert <geppi@digitx.de>
Thomas Lamprecht <guggentom@hotmail.de>
Till Maas <opensource@till.name>
Tim Chase <tim@chase2k.com> Tim Chase <tim@chase2k.com>
Tim Connors <tconnors@rather.puzzling.org> Tim Connors <tconnors@rather.puzzling.org>
Tim Crawford <tcrawford@datto.com> Tim Crawford <tcrawford@datto.com>
Tim Haley <Tim.Haley@Sun.COM> Tim Haley <Tim.Haley@Sun.COM>
timor <timor.dd@googlemail.com>
Timothy Day <tday141@gmail.com>
Tim Schumacher <timschumi@gmx.de>
Tino Reichardt <milky-zfs@mcmilk.de> Tino Reichardt <milky-zfs@mcmilk.de>
Tobin Harding <me@tobin.cc> Tobin Harding <me@tobin.cc>
Tom Caputi <tcaputi@datto.com> Tom Caputi <tcaputi@datto.com>
Tom Matthews <tom@axiom-partners.com> Tom Matthews <tom@axiom-partners.com>
Tom Prince <tom.prince@ualberta.net>
Tomohiro Kusumi <kusumi.tomohiro@gmail.com> Tomohiro Kusumi <kusumi.tomohiro@gmail.com>
Tom Prince <tom.prince@ualberta.net>
Tony Hutter <hutter2@llnl.gov> Tony Hutter <hutter2@llnl.gov>
Tony Nguyen <tony.nguyen@delphix.com>
Tony Perkins <tperkins@datto.com>
Toomas Soome <tsoome@me.com> Toomas Soome <tsoome@me.com>
Torsten Wörtwein <twoertwein@gmail.com>
Toyam Cox <aviator45003@gmail.com>
Trevor Bautista <trevrb@trevrb.net>
Trey Dockendorf <treydock@gmail.com> Trey Dockendorf <treydock@gmail.com>
Troels Nørgaard <tnn@tradeshift.com>
Tulsi Jain <tulsi.jain@delphix.com>
Turbo Fredriksson <turbo@bayour.com> Turbo Fredriksson <turbo@bayour.com>
Tyler J. Stachecki <stachecki.tyler@gmail.com> Tyler J. Stachecki <stachecki.tyler@gmail.com>
Umer Saleem <usaleem@ixsystems.com>
Valmiky Arquissandas <kayvlim@gmail.com>
Val Packett <val@packett.cool>
Vince van Oosten <techhazard@codeforyouand.me>
Violet Purcell <vimproved@inventati.org>
Vipin Kumar Verma <vipin.verma@hpe.com>
Vitaut Bajaryn <vitaut.bayaryn@gmail.com> Vitaut Bajaryn <vitaut.bayaryn@gmail.com>
Volker Mauel <volkermauel@gmail.com>
Václav Skála <skala@vshosting.cz>
Walter Huf <hufman@gmail.com>
Warner Losh <imp@bsdimp.com>
Weigang Li <weigang.li@intel.com> Weigang Li <weigang.li@intel.com>
WHR <msl0000023508@gmail.com>
Will Andrews <will@freebsd.org> Will Andrews <will@freebsd.org>
Will Rouesnel <w.rouesnel@gmail.com> Will Rouesnel <w.rouesnel@gmail.com>
Windel Bouwman <windel@windel.nl>
Wojciech Małota-Wójcik <outofforest@users.noreply.github.com>
Wolfgang Bumiller <w.bumiller@proxmox.com> Wolfgang Bumiller <w.bumiller@proxmox.com>
Xin Li <delphij@FreeBSD.org> Xin Li <delphij@FreeBSD.org>
Xinliang Liu <xinliang.liu@linaro.org>
xtouqh <xtouqh@hotmail.com>
Yann Collet <cyan@fb.com>
Yanping Gao <yanping.gao@xtaotech.com>
Ying Zhu <casualfisher@gmail.com> Ying Zhu <casualfisher@gmail.com>
Youzhong Yang <youzhong@gmail.com>
yparitcher <y@paritcher.com>
yuina822 <ayuichi@club.kyutech.ac.jp>
YunQiang Su <syq@debian.org> YunQiang Su <syq@debian.org>
Yuri Pankov <yuri.pankov@gmail.com> Yuri Pankov <yuri.pankov@gmail.com>
Yuxuan Shui <yshuiv7@gmail.com> Yuxuan Shui <yshuiv7@gmail.com>
Zachary Bedell <zac@thebedells.org> Zachary Bedell <zac@thebedells.org>
Zach Dykstra <dykstra.zachary@gmail.com>
zgock <zgock@nuc.base.zgock-lab.net>
Zhu Chuang <chuang@melty.land>
Érico Nogueira <erico.erc@gmail.com>
Đoàn Trần Công Danh <congdanhqx@gmail.com>
韩朴宇 <w12101111@gmail.com>

6
META
View File

@ -1,10 +1,10 @@
Meta: 1 Meta: 1
Name: zfs Name: zfs
Branch: 1.0 Branch: 1.0
Version: 2.2.0 Version: 2.2.1
Release: rc4 Release: 1
Release-Tags: relext Release-Tags: relext
License: CDDL License: CDDL
Author: OpenZFS Author: OpenZFS
Linux-Maximum: 6.5 Linux-Maximum: 6.6
Linux-Minimum: 3.10 Linux-Minimum: 3.10

View File

@ -711,7 +711,7 @@ def section_archits(kstats_dict):
pd_total = int(arc_stats['prefetch_data_hits']) +\ pd_total = int(arc_stats['prefetch_data_hits']) +\
int(arc_stats['prefetch_data_iohits']) +\ int(arc_stats['prefetch_data_iohits']) +\
int(arc_stats['prefetch_data_misses']) int(arc_stats['prefetch_data_misses'])
prt_2('ARC prefetch metadata accesses:', f_perc(pd_total, all_accesses), prt_2('ARC prefetch data accesses:', f_perc(pd_total, all_accesses),
f_hits(pd_total)) f_hits(pd_total))
pd_todo = (('Prefetch data hits:', arc_stats['prefetch_data_hits']), pd_todo = (('Prefetch data hits:', arc_stats['prefetch_data_hits']),
('Prefetch data I/O hits:', arc_stats['prefetch_data_iohits']), ('Prefetch data I/O hits:', arc_stats['prefetch_data_iohits']),

View File

@ -5179,7 +5179,7 @@ dump_label(const char *dev)
if (nvlist_size(config, &size, NV_ENCODE_XDR) != 0) if (nvlist_size(config, &size, NV_ENCODE_XDR) != 0)
size = buflen; size = buflen;
/* If the device is a cache device clear the header. */ /* If the device is a cache device read the header. */
if (!read_l2arc_header) { if (!read_l2arc_header) {
if (nvlist_lookup_uint64(config, if (nvlist_lookup_uint64(config,
ZPOOL_CONFIG_POOL_STATE, &l2cache) == 0 && ZPOOL_CONFIG_POOL_STATE, &l2cache) == 0 &&

View File

@ -24,6 +24,7 @@
* Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2016, 2017, Intel Corporation. * Copyright (c) 2016, 2017, Intel Corporation.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2023, Klara Inc.
*/ */
/* /*
@ -146,6 +147,17 @@ zfs_unavail_pool(zpool_handle_t *zhp, void *data)
return (0); return (0);
} }
/*
* Write an array of strings to the zed log
*/
static void lines_to_zed_log_msg(char **lines, int lines_cnt)
{
int i;
for (i = 0; i < lines_cnt; i++) {
zed_log_msg(LOG_INFO, "%s", lines[i]);
}
}
/* /*
* Two stage replace on Linux * Two stage replace on Linux
* since we get disk notifications * since we get disk notifications
@ -193,14 +205,21 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
uint64_t is_spare = 0; uint64_t is_spare = 0;
const char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL; const char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
char rawpath[PATH_MAX], fullpath[PATH_MAX]; char rawpath[PATH_MAX], fullpath[PATH_MAX];
char devpath[PATH_MAX]; char pathbuf[PATH_MAX];
int ret; int ret;
int online_flag = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE; int online_flag = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;
boolean_t is_sd = B_FALSE; boolean_t is_sd = B_FALSE;
boolean_t is_mpath_wholedisk = B_FALSE; boolean_t is_mpath_wholedisk = B_FALSE;
uint_t c; uint_t c;
vdev_stat_t *vs; vdev_stat_t *vs;
char **lines = NULL;
int lines_cnt = 0;
/*
* Get the persistent path, typically under the '/dev/disk/by-id' or
* '/dev/disk/by-vdev' directories. Note that this path can change
* when a vdev is replaced with a new disk.
*/
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0) if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
return; return;
@ -359,15 +378,17 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
(void) snprintf(rawpath, sizeof (rawpath), "%s%s", (void) snprintf(rawpath, sizeof (rawpath), "%s%s",
is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath); is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath);
if (realpath(rawpath, devpath) == NULL && !is_mpath_wholedisk) { if (realpath(rawpath, pathbuf) == NULL && !is_mpath_wholedisk) {
zed_log_msg(LOG_INFO, " realpath: %s failed (%s)", zed_log_msg(LOG_INFO, " realpath: %s failed (%s)",
rawpath, strerror(errno)); rawpath, strerror(errno));
(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, int err = zpool_vdev_online(zhp, fullpath,
&newstate); ZFS_ONLINE_FORCEFAULT, &newstate);
zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s)", zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s) "
fullpath, libzfs_error_description(g_zfshdl)); "err %d, new state %d",
fullpath, libzfs_error_description(g_zfshdl), err,
err ? (int)newstate : 0);
return; return;
} }
@ -385,6 +406,22 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
if (is_mpath_wholedisk) { if (is_mpath_wholedisk) {
/* Don't label device mapper or multipath disks. */ /* Don't label device mapper or multipath disks. */
zed_log_msg(LOG_INFO,
" it's a multipath wholedisk, don't label");
if (zpool_prepare_disk(zhp, vdev, "autoreplace", &lines,
&lines_cnt) != 0) {
zed_log_msg(LOG_INFO,
" zpool_prepare_disk: could not "
"prepare '%s' (%s)", fullpath,
libzfs_error_description(g_zfshdl));
if (lines_cnt > 0) {
zed_log_msg(LOG_INFO,
" zfs_prepare_disk output:");
lines_to_zed_log_msg(lines, lines_cnt);
}
libzfs_free_str_array(lines, lines_cnt);
return;
}
} else if (!labeled) { } else if (!labeled) {
/* /*
* we're auto-replacing a raw disk, so label it first * we're auto-replacing a raw disk, so label it first
@ -401,16 +438,24 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
* to trigger a ZFS fault for the device (and any hot spare * to trigger a ZFS fault for the device (and any hot spare
* replacement). * replacement).
*/ */
leafname = strrchr(devpath, '/') + 1; leafname = strrchr(pathbuf, '/') + 1;
/* /*
* If this is a request to label a whole disk, then attempt to * If this is a request to label a whole disk, then attempt to
* write out the label. * write out the label.
*/ */
if (zpool_label_disk(g_zfshdl, zhp, leafname) != 0) { if (zpool_prepare_and_label_disk(g_zfshdl, zhp, leafname,
zed_log_msg(LOG_INFO, " zpool_label_disk: could not " vdev, "autoreplace", &lines, &lines_cnt) != 0) {
zed_log_msg(LOG_WARNING,
" zpool_prepare_and_label_disk: could not "
"label '%s' (%s)", leafname, "label '%s' (%s)", leafname,
libzfs_error_description(g_zfshdl)); libzfs_error_description(g_zfshdl));
if (lines_cnt > 0) {
zed_log_msg(LOG_INFO,
" zfs_prepare_disk output:");
lines_to_zed_log_msg(lines, lines_cnt);
}
libzfs_free_str_array(lines, lines_cnt);
(void) zpool_vdev_online(zhp, fullpath, (void) zpool_vdev_online(zhp, fullpath,
ZFS_ONLINE_FORCEFAULT, &newstate); ZFS_ONLINE_FORCEFAULT, &newstate);
@ -433,7 +478,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
sizeof (device->pd_physpath)); sizeof (device->pd_physpath));
list_insert_tail(&g_device_list, device); list_insert_tail(&g_device_list, device);
zed_log_msg(LOG_INFO, " zpool_label_disk: async '%s' (%llu)", zed_log_msg(LOG_NOTICE, " zpool_label_disk: async '%s' (%llu)",
leafname, (u_longlong_t)guid); leafname, (u_longlong_t)guid);
return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */ return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */
@ -456,8 +501,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
} }
if (!found) { if (!found) {
/* unexpected partition slice encountered */ /* unexpected partition slice encountered */
zed_log_msg(LOG_INFO, "labeled disk %s unexpected here", zed_log_msg(LOG_WARNING, "labeled disk %s was "
fullpath); "unexpected here", fullpath);
(void) zpool_vdev_online(zhp, fullpath, (void) zpool_vdev_online(zhp, fullpath,
ZFS_ONLINE_FORCEFAULT, &newstate); ZFS_ONLINE_FORCEFAULT, &newstate);
return; return;
@ -466,10 +511,21 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
zed_log_msg(LOG_INFO, " zpool_label_disk: resume '%s' (%llu)", zed_log_msg(LOG_INFO, " zpool_label_disk: resume '%s' (%llu)",
physpath, (u_longlong_t)guid); physpath, (u_longlong_t)guid);
(void) snprintf(devpath, sizeof (devpath), "%s%s", /*
DEV_BYID_PATH, new_devid); * Paths that begin with '/dev/disk/by-id/' will change and so
* they must be updated before calling zpool_vdev_attach().
*/
if (strncmp(path, DEV_BYID_PATH, strlen(DEV_BYID_PATH)) == 0) {
(void) snprintf(pathbuf, sizeof (pathbuf), "%s%s",
DEV_BYID_PATH, new_devid);
zed_log_msg(LOG_INFO, " zpool_label_disk: path '%s' "
"replaced by '%s'", path, pathbuf);
path = pathbuf;
}
} }
libzfs_free_str_array(lines, lines_cnt);
/* /*
* Construct the root vdev to pass to zpool_vdev_attach(). While adding * Construct the root vdev to pass to zpool_vdev_attach(). While adding
* the entire vdev structure is harmless, we construct a reduced set of * the entire vdev structure is harmless, we construct a reduced set of
@ -508,9 +564,11 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
* Wait for udev to verify the links exist, then auto-replace * Wait for udev to verify the links exist, then auto-replace
* the leaf disk at same physical location. * the leaf disk at same physical location.
*/ */
if (zpool_label_disk_wait(path, 3000) != 0) { if (zpool_label_disk_wait(path, DISK_LABEL_WAIT) != 0) {
zed_log_msg(LOG_WARNING, "zfs_mod: expected replacement " zed_log_msg(LOG_WARNING, "zfs_mod: pool '%s', after labeling "
"disk %s is missing", path); "replacement disk, the expected disk partition link '%s' "
"is missing after waiting %u ms",
zpool_get_name(zhp), path, DISK_LABEL_WAIT);
nvlist_free(nvroot); nvlist_free(nvroot);
return; return;
} }
@ -525,7 +583,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
B_TRUE, B_FALSE); B_TRUE, B_FALSE);
} }
zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)", zed_log_msg(LOG_WARNING, " zpool_vdev_replace: %s with %s (%s)",
fullpath, path, (ret == 0) ? "no errors" : fullpath, path, (ret == 0) ? "no errors" :
libzfs_error_description(g_zfshdl)); libzfs_error_description(g_zfshdl));
@ -623,7 +681,7 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
dp->dd_prop, path); dp->dd_prop, path);
dp->dd_found = B_TRUE; dp->dd_found = B_TRUE;
/* pass the new devid for use by replacing code */ /* pass the new devid for use by auto-replacing code */
if (dp->dd_new_devid != NULL) { if (dp->dd_new_devid != NULL) {
(void) nvlist_add_string(nvl, "new_devid", (void) nvlist_add_string(nvl, "new_devid",
dp->dd_new_devid); dp->dd_new_devid);

View File

@ -6,7 +6,6 @@ edonr
embedded_data embedded_data
empty_bpobj empty_bpobj
enabled_txg enabled_txg
encryption
extensible_dataset extensible_dataset
filesystem_limits filesystem_limits
hole_birth hole_birth

View File

@ -443,37 +443,22 @@ vdev_run_cmd(vdev_cmd_data_t *data, char *cmd)
{ {
int rc; int rc;
char *argv[2] = {cmd}; char *argv[2] = {cmd};
char *env[5] = {(char *)"PATH=/bin:/sbin:/usr/bin:/usr/sbin"}; char **env;
char **lines = NULL; char **lines = NULL;
int lines_cnt = 0; int lines_cnt = 0;
int i; int i;
/* Setup our custom environment variables */ env = zpool_vdev_script_alloc_env(data->pool, data->path, data->upath,
rc = asprintf(&env[1], "VDEV_PATH=%s", data->vdev_enc_sysfs_path, NULL, NULL);
data->path ? data->path : ""); if (env == NULL)
if (rc == -1) {
env[1] = NULL;
goto out; goto out;
}
rc = asprintf(&env[2], "VDEV_UPATH=%s",
data->upath ? data->upath : "");
if (rc == -1) {
env[2] = NULL;
goto out;
}
rc = asprintf(&env[3], "VDEV_ENC_SYSFS_PATH=%s",
data->vdev_enc_sysfs_path ?
data->vdev_enc_sysfs_path : "");
if (rc == -1) {
env[3] = NULL;
goto out;
}
/* Run the command */ /* Run the command */
rc = libzfs_run_process_get_stdout_nopath(cmd, argv, env, &lines, rc = libzfs_run_process_get_stdout_nopath(cmd, argv, env, &lines,
&lines_cnt); &lines_cnt);
zpool_vdev_script_free_env(env);
if (rc != 0) if (rc != 0)
goto out; goto out;
@ -485,10 +470,6 @@ vdev_run_cmd(vdev_cmd_data_t *data, char *cmd)
out: out:
if (lines != NULL) if (lines != NULL)
libzfs_free_str_array(lines, lines_cnt); libzfs_free_str_array(lines, lines_cnt);
/* Start with i = 1 since env[0] was statically allocated */
for (i = 1; i < ARRAY_SIZE(env); i++)
free(env[i]);
} }
/* /*

View File

@ -3122,12 +3122,21 @@ zfs_force_import_required(nvlist_t *config)
nvlist_t *nvinfo; nvlist_t *nvinfo;
state = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE); state = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE);
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
/*
* The hostid on LOAD_INFO comes from the MOS label via
* spa_tryimport(). If its not there then we're likely talking to an
* older kernel, so use the top one, which will be from the label
* discovered in zpool_find_import(), or if a cachefile is in use, the
* local hostid.
*/
if (nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_HOSTID, &hostid) != 0)
nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid()) if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid())
return (B_TRUE); return (B_TRUE);
nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) { if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) {
mmp_state_t mmp_state = fnvlist_lookup_uint64(nvinfo, mmp_state_t mmp_state = fnvlist_lookup_uint64(nvinfo,
ZPOOL_CONFIG_MMP_STATE); ZPOOL_CONFIG_MMP_STATE);
@ -3198,7 +3207,10 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
time_t timestamp = 0; time_t timestamp = 0;
uint64_t hostid = 0; uint64_t hostid = 0;
if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME)) if (nvlist_exists(nvinfo, ZPOOL_CONFIG_HOSTNAME))
hostname = fnvlist_lookup_string(nvinfo,
ZPOOL_CONFIG_HOSTNAME);
else if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME))
hostname = fnvlist_lookup_string(config, hostname = fnvlist_lookup_string(config,
ZPOOL_CONFIG_HOSTNAME); ZPOOL_CONFIG_HOSTNAME);
@ -3206,7 +3218,10 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
timestamp = fnvlist_lookup_uint64(config, timestamp = fnvlist_lookup_uint64(config,
ZPOOL_CONFIG_TIMESTAMP); ZPOOL_CONFIG_TIMESTAMP);
if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID)) if (nvlist_exists(nvinfo, ZPOOL_CONFIG_HOSTID))
hostid = fnvlist_lookup_uint64(nvinfo,
ZPOOL_CONFIG_HOSTID);
else if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID))
hostid = fnvlist_lookup_uint64(config, hostid = fnvlist_lookup_uint64(config,
ZPOOL_CONFIG_HOSTID); ZPOOL_CONFIG_HOSTID);

View File

@ -126,6 +126,10 @@ vdev_cmd_data_list_t *all_pools_for_each_vdev_run(int argc, char **argv,
void free_vdev_cmd_data_list(vdev_cmd_data_list_t *vcdl); void free_vdev_cmd_data_list(vdev_cmd_data_list_t *vcdl);
void free_vdev_cmd_data(vdev_cmd_data_t *data);
int vdev_run_cmd_simple(char *path, char *cmd);
int check_device(const char *path, boolean_t force, int check_device(const char *path, boolean_t force,
boolean_t isspare, boolean_t iswholedisk); boolean_t isspare, boolean_t iswholedisk);
boolean_t check_sector_size_database(char *path, int *sector_size); boolean_t check_sector_size_database(char *path, int *sector_size);

View File

@ -936,6 +936,15 @@ zero_label(const char *path)
return (0); return (0);
} }
static void
lines_to_stderr(char *lines[], int lines_cnt)
{
int i;
for (i = 0; i < lines_cnt; i++) {
fprintf(stderr, "%s\n", lines[i]);
}
}
/* /*
* Go through and find any whole disks in the vdev specification, labelling them * Go through and find any whole disks in the vdev specification, labelling them
* as appropriate. When constructing the vdev spec, we were unable to open this * as appropriate. When constructing the vdev spec, we were unable to open this
@ -947,7 +956,7 @@ zero_label(const char *path)
* need to get the devid after we label the disk. * need to get the devid after we label the disk.
*/ */
static int static int
make_disks(zpool_handle_t *zhp, nvlist_t *nv) make_disks(zpool_handle_t *zhp, nvlist_t *nv, boolean_t replacing)
{ {
nvlist_t **child; nvlist_t **child;
uint_t c, children; uint_t c, children;
@ -1036,6 +1045,8 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
*/ */
if (!is_exclusive && !is_spare(NULL, udevpath)) { if (!is_exclusive && !is_spare(NULL, udevpath)) {
char *devnode = strrchr(devpath, '/') + 1; char *devnode = strrchr(devpath, '/') + 1;
char **lines = NULL;
int lines_cnt = 0;
ret = strncmp(udevpath, UDISK_ROOT, strlen(UDISK_ROOT)); ret = strncmp(udevpath, UDISK_ROOT, strlen(UDISK_ROOT));
if (ret == 0) { if (ret == 0) {
@ -1047,9 +1058,27 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
/* /*
* When labeling a pool the raw device node name * When labeling a pool the raw device node name
* is provided as it appears under /dev/. * is provided as it appears under /dev/.
*
* Note that 'zhp' will be NULL when we're creating a
* pool.
*/ */
if (zpool_label_disk(g_zfs, zhp, devnode) == -1) if (zpool_prepare_and_label_disk(g_zfs, zhp, devnode,
nv, zhp == NULL ? "create" :
replacing ? "replace" : "add", &lines,
&lines_cnt) != 0) {
(void) fprintf(stderr,
gettext(
"Error preparing/labeling disk.\n"));
if (lines_cnt > 0) {
(void) fprintf(stderr,
gettext("zfs_prepare_disk output:\n"));
lines_to_stderr(lines, lines_cnt);
}
libzfs_free_str_array(lines, lines_cnt);
return (-1); return (-1);
}
libzfs_free_str_array(lines, lines_cnt);
/* /*
* Wait for udev to signal the device is available * Wait for udev to signal the device is available
@ -1086,19 +1115,19 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
} }
for (c = 0; c < children; c++) for (c = 0; c < children; c++)
if ((ret = make_disks(zhp, child[c])) != 0) if ((ret = make_disks(zhp, child[c], replacing)) != 0)
return (ret); return (ret);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
&child, &children) == 0) &child, &children) == 0)
for (c = 0; c < children; c++) for (c = 0; c < children; c++)
if ((ret = make_disks(zhp, child[c])) != 0) if ((ret = make_disks(zhp, child[c], replacing)) != 0)
return (ret); return (ret);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0) &child, &children) == 0)
for (c = 0; c < children; c++) for (c = 0; c < children; c++)
if ((ret = make_disks(zhp, child[c])) != 0) if ((ret = make_disks(zhp, child[c], replacing)) != 0)
return (ret); return (ret);
return (0); return (0);
@ -1758,7 +1787,7 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
return (NULL); return (NULL);
} }
if (!flags.dryrun && make_disks(zhp, newroot) != 0) { if (!flags.dryrun && make_disks(zhp, newroot, B_FALSE) != 0) {
nvlist_free(newroot); nvlist_free(newroot);
return (NULL); return (NULL);
} }
@ -1879,7 +1908,7 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
/* /*
* Run through the vdev specification and label any whole disks found. * Run through the vdev specification and label any whole disks found.
*/ */
if (!dryrun && make_disks(zhp, newroot) != 0) { if (!dryrun && make_disks(zhp, newroot, replacing) != 0) {
nvlist_free(newroot); nvlist_free(newroot);
return (NULL); return (NULL);
} }

View File

@ -42,6 +42,7 @@ AM_CPPFLAGS += -D_REENTRANT
AM_CPPFLAGS += -D_FILE_OFFSET_BITS=64 AM_CPPFLAGS += -D_FILE_OFFSET_BITS=64
AM_CPPFLAGS += -D_LARGEFILE64_SOURCE AM_CPPFLAGS += -D_LARGEFILE64_SOURCE
AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\" AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\"
AM_CPPFLAGS += -DZFSEXECDIR=\"$(zfsexecdir)\"
AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\" AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\"
AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\" AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\"
AM_CPPFLAGS += -DSYSCONFDIR=\"$(sysconfdir)\" AM_CPPFLAGS += -DSYSCONFDIR=\"$(sysconfdir)\"

View File

@ -0,0 +1,36 @@
dnl #
dnl # 6.6 API change,
dnl # fsync_bdev was removed in favor of sync_blockdev
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_SYNC_BDEV], [
ZFS_LINUX_TEST_SRC([fsync_bdev], [
#include <linux/blkdev.h>
],[
fsync_bdev(NULL);
])
ZFS_LINUX_TEST_SRC([sync_blockdev], [
#include <linux/blkdev.h>
],[
sync_blockdev(NULL);
])
])
AC_DEFUN([ZFS_AC_KERNEL_SYNC_BDEV], [
AC_MSG_CHECKING([whether fsync_bdev() exists])
ZFS_LINUX_TEST_RESULT([fsync_bdev], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_FSYNC_BDEV, 1,
[fsync_bdev() is declared in include/blkdev.h])
],[
AC_MSG_CHECKING([whether sync_blockdev() exists])
ZFS_LINUX_TEST_RESULT([sync_blockdev], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SYNC_BLOCKDEV, 1,
[sync_blockdev() is declared in include/blkdev.h])
],[
ZFS_LINUX_TEST_ERROR(
[neither fsync_bdev() nor sync_blockdev() exist])
])
])
])

View File

@ -7,6 +7,10 @@ dnl #
dnl # 6.3 API dnl # 6.3 API
dnl # generic_fillattr() now takes struct mnt_idmap* as the first argument dnl # generic_fillattr() now takes struct mnt_idmap* as the first argument
dnl # dnl #
dnl # 6.6 API
dnl # generic_fillattr() now takes u32 as second argument, representing a
dnl # request_mask for statx
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [
ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [ ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [
#include <linux/fs.h> #include <linux/fs.h>
@ -25,22 +29,39 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [
struct kstat *k = NULL; struct kstat *k = NULL;
generic_fillattr(idmap, in, k); generic_fillattr(idmap, in, k);
]) ])
ZFS_LINUX_TEST_SRC([generic_fillattr_mnt_idmap_reqmask], [
#include <linux/fs.h>
],[
struct mnt_idmap *idmap = NULL;
struct inode *in = NULL;
struct kstat *k = NULL;
generic_fillattr(idmap, 0, in, k);
])
]) ])
AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [
AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*]) AC_MSG_CHECKING(
ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [ [whether generic_fillattr requires struct mnt_idmap* and request_mask])
ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap_reqmask], [
AC_MSG_RESULT([yes]) AC_MSG_RESULT([yes])
AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1, AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK, 1,
[generic_fillattr requires struct mnt_idmap*]) [generic_fillattr requires struct mnt_idmap* and u32 request_mask])
],[ ],[
AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*]) AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*])
ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [ ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [
AC_MSG_RESULT([yes]) AC_MSG_RESULT([yes])
AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1, AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1,
[generic_fillattr requires struct user_namespace*]) [generic_fillattr requires struct mnt_idmap*])
],[ ],[
AC_MSG_RESULT([no]) AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*])
ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
AC_MSG_RESULT([yes])
AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
[generic_fillattr requires struct user_namespace*])
],[
AC_MSG_RESULT([no])
])
]) ])
]) ])
]) ])

View File

@ -27,6 +27,31 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [
memset(&ip, 0, sizeof(ip)); memset(&ip, 0, sizeof(ip));
ts = ip.i_mtime; ts = ip.i_mtime;
]) ])
dnl #
dnl # 6.6 API change
dnl # i_ctime no longer directly accessible, must use
dnl # inode_get_ctime(ip), inode_set_ctime*(ip) to
dnl # read/write.
dnl #
ZFS_LINUX_TEST_SRC([inode_get_ctime], [
#include <linux/fs.h>
],[
struct inode ip;
memset(&ip, 0, sizeof(ip));
inode_get_ctime(&ip);
])
ZFS_LINUX_TEST_SRC([inode_set_ctime_to_ts], [
#include <linux/fs.h>
],[
struct inode ip;
struct timespec64 ts;
memset(&ip, 0, sizeof(ip));
inode_set_ctime_to_ts(&ip, ts);
])
]) ])
AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [
@ -47,4 +72,22 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [
AC_DEFINE(HAVE_INODE_TIMESPEC64_TIMES, 1, AC_DEFINE(HAVE_INODE_TIMESPEC64_TIMES, 1,
[inode->i_*time's are timespec64]) [inode->i_*time's are timespec64])
]) ])
AC_MSG_CHECKING([whether inode_get_ctime() exists])
ZFS_LINUX_TEST_RESULT([inode_get_ctime], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_INODE_GET_CTIME, 1,
[inode_get_ctime() exists in linux/fs.h])
],[
AC_MSG_RESULT(no)
])
AC_MSG_CHECKING([whether inode_set_ctime_to_ts() exists])
ZFS_LINUX_TEST_RESULT([inode_set_ctime_to_ts], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_INODE_SET_CTIME_TO_TS, 1,
[inode_set_ctime_to_ts() exists in linux/fs.h])
],[
AC_MSG_RESULT(no)
])
]) ])

View File

@ -162,6 +162,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_RECLAIMED ZFS_AC_KERNEL_SRC_RECLAIMED
ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ
ZFS_AC_KERNEL_SRC_SYNC_BDEV
case "$host_cpu" in case "$host_cpu" in
powerpc*) powerpc*)
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@ -303,6 +304,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_RECLAIMED ZFS_AC_KERNEL_RECLAIMED
ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
ZFS_AC_KERNEL_COPY_SPLICE_READ ZFS_AC_KERNEL_COPY_SPLICE_READ
ZFS_AC_KERNEL_SYNC_BDEV
case "$host_cpu" in case "$host_cpu" in
powerpc*) powerpc*)
ZFS_AC_KERNEL_CPU_HAS_FEATURE ZFS_AC_KERNEL_CPU_HAS_FEATURE

View File

@ -358,6 +358,9 @@ AC_DEFUN([ZFS_AC_RPM], [
AS_IF([test -n "$udevruledir" ], [ AS_IF([test -n "$udevruledir" ], [
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevruledir $(udevruledir)"' RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevruledir $(udevruledir)"'
]) ])
AS_IF([test -n "$bashcompletiondir" ], [
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_bashcompletiondir $(bashcompletiondir)"'
])
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_SYSTEMD)' RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_SYSTEMD)'
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYZFS)' RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYZFS)'
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PAM)' RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PAM)'
@ -617,6 +620,18 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [
AC_MSG_RESULT([no]) AC_MSG_RESULT([no])
fi fi
AC_SUBST(RPM_DEFINE_INITRAMFS) AC_SUBST(RPM_DEFINE_INITRAMFS)
AC_MSG_CHECKING([default bash completion directory])
case "$VENDOR" in
ubuntu) bashcompletiondir=/usr/share/bash-completion/completions ;;
debian) bashcompletiondir=/usr/share/bash-completion/completions ;;
freebsd) bashcompletiondir=$sysconfdir/bash_completion.d;;
gentoo) bashcompletiondir=/usr/share/bash-completion/completions ;;
*) bashcompletiondir=/etc/bash_completion.d ;;
esac
AC_MSG_RESULT([$bashcompletiondir])
AC_SUBST(bashcompletiondir)
]) ])
dnl # dnl #

View File

@ -1,5 +1,3 @@
bashcompletiondir = $(sysconfdir)/bash_completion.d
nodist_bashcompletion_DATA = %D%/zfs nodist_bashcompletion_DATA = %D%/zfs
SUBSTFILES += $(nodist_bashcompletion_DATA) SUBSTFILES += $(nodist_bashcompletion_DATA)

View File

@ -1,7 +1,6 @@
etc/default/zfs etc/default/zfs
etc/zfs/zfs-functions etc/zfs/zfs-functions
etc/zfs/zpool.d/ etc/zfs/zpool.d/
etc/bash_completion.d/zfs
lib/systemd/system-generators/ lib/systemd/system-generators/
lib/systemd/system-preset/ lib/systemd/system-preset/
lib/systemd/system/zfs-import-cache.service lib/systemd/system/zfs-import-cache.service
@ -35,6 +34,7 @@ usr/bin/zvol_wait
usr/lib/modules-load.d/ lib/ usr/lib/modules-load.d/ lib/
usr/lib/zfs-linux/zpool.d/ usr/lib/zfs-linux/zpool.d/
usr/lib/zfs-linux/zpool_influxdb usr/lib/zfs-linux/zpool_influxdb
usr/lib/zfs-linux/zfs_prepare_disk
usr/sbin/arc_summary usr/sbin/arc_summary
usr/sbin/arcstat usr/sbin/arcstat
usr/sbin/dbufstat usr/sbin/dbufstat
@ -88,6 +88,7 @@ usr/share/man/man8/zfs-wait.8
usr/share/man/man8/zfs-zone.8 usr/share/man/man8/zfs-zone.8
usr/share/man/man8/zfs.8 usr/share/man/man8/zfs.8
usr/share/man/man8/zfs_ids_to_path.8 usr/share/man/man8/zfs_ids_to_path.8
usr/share/man/man8/zfs_prepare_disk.8
usr/share/man/man7/zfsconcepts.7 usr/share/man/man7/zfsconcepts.7
usr/share/man/man7/zfsprops.7 usr/share/man/man7/zfsprops.7
usr/share/man/man8/zgenhostid.8 usr/share/man/man8/zgenhostid.8

View File

@ -71,10 +71,6 @@ override_dh_auto_install:
@# Install the utilities. @# Install the utilities.
$(MAKE) install DESTDIR='$(CURDIR)/debian/tmp' $(MAKE) install DESTDIR='$(CURDIR)/debian/tmp'
# Use upstream's bash completion
install -D -t '$(CURDIR)/debian/tmp/usr/share/bash-completion/completions/' \
'$(CURDIR)/contrib/bash_completion.d/zfs'
# Move from bin_dir to /usr/sbin # Move from bin_dir to /usr/sbin
# Remove suffix (.py) as per policy 10.4 - Scripts # Remove suffix (.py) as per policy 10.4 - Scripts
# https://www.debian.org/doc/debian-policy/ch-files.html#s-scripts # https://www.debian.org/doc/debian-policy/ch-files.html#s-scripts
@ -136,7 +132,6 @@ override_dh_auto_install:
chmod a-x '$(CURDIR)/debian/tmp/etc/zfs/zfs-functions' chmod a-x '$(CURDIR)/debian/tmp/etc/zfs/zfs-functions'
chmod a-x '$(CURDIR)/debian/tmp/etc/default/zfs' chmod a-x '$(CURDIR)/debian/tmp/etc/default/zfs'
chmod a-x '$(CURDIR)/debian/tmp/usr/share/bash-completion/completions/zfs'
override_dh_python3: override_dh_python3:
dh_python3 -p openzfs-python3-pyzfs dh_python3 -p openzfs-python3-pyzfs

View File

@ -326,6 +326,15 @@ _LIBZFS_H nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
boolean_t *, boolean_t *, boolean_t *); boolean_t *, boolean_t *, boolean_t *);
_LIBZFS_H int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, _LIBZFS_H int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *,
const char *); const char *);
_LIBZFS_H int zpool_prepare_disk(zpool_handle_t *zhp, nvlist_t *vdev_nv,
const char *prepare_str, char **lines[], int *lines_cnt);
_LIBZFS_H int zpool_prepare_and_label_disk(libzfs_handle_t *hdl,
zpool_handle_t *, const char *, nvlist_t *vdev_nv, const char *prepare_str,
char **lines[], int *lines_cnt);
_LIBZFS_H char ** zpool_vdev_script_alloc_env(const char *pool_name,
const char *vdev_path, const char *vdev_upath,
const char *vdev_enc_sysfs_path, const char *opt_key, const char *opt_val);
_LIBZFS_H void zpool_vdev_script_free_env(char **env);
_LIBZFS_H uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp, _LIBZFS_H uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp,
const char *path); const char *path);

View File

@ -34,7 +34,7 @@ extern "C" {
#endif #endif
/* /*
* Default wait time for a device name to be created. * Default wait time in milliseconds for a device name to be created.
*/ */
#define DISK_LABEL_WAIT (30 * 1000) /* 30 seconds */ #define DISK_LABEL_WAIT (30 * 1000) /* 30 seconds */

View File

@ -167,7 +167,7 @@ atomic_dec_64_nv(volatile uint64_t *target)
return (atomic_add_64_nv(target, -1)); return (atomic_add_64_nv(target, -1));
} }
#if !defined(COMPAT_32BIT) && defined(__LP64__) #ifdef __LP64__
static __inline void * static __inline void *
atomic_cas_ptr(volatile void *target, void *cmp, void *newval) atomic_cas_ptr(volatile void *target, void *cmp, void *newval)
{ {
@ -181,7 +181,7 @@ atomic_cas_ptr(volatile void *target, void *cmp, void *newval)
return ((void *)atomic_cas_32((volatile uint32_t *)target, return ((void *)atomic_cas_32((volatile uint32_t *)target,
(uint32_t)cmp, (uint32_t)newval)); (uint32_t)cmp, (uint32_t)newval));
} }
#endif /* !defined(COMPAT_32BIT) && defined(__LP64__) */ #endif /* __LP64__ */
#else /* _STANDALONE */ #else /* _STANDALONE */
/* /*
@ -190,6 +190,8 @@ atomic_cas_ptr(volatile void *target, void *cmp, void *newval)
*/ */
#undef atomic_add_64 #undef atomic_add_64
#define atomic_add_64(ptr, val) *(ptr) += val #define atomic_add_64(ptr, val) *(ptr) += val
#undef atomic_sub_64
#define atomic_sub_64(ptr, val) *(ptr) -= val
#endif /* !_STANDALONE */ #endif /* !_STANDALONE */
#endif /* !_OPENSOLARIS_SYS_ATOMIC_H_ */ #endif /* !_OPENSOLARIS_SYS_ATOMIC_H_ */

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD * SPDX-License-Identifier: BSD-2-Clause
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -39,5 +39,6 @@
((C) >= 0x3A && (C) <= 0x40) || \ ((C) >= 0x3A && (C) <= 0x40) || \
((C) >= 0x5B && (C) <= 0x60) || \ ((C) >= 0x5B && (C) <= 0x60) || \
((C) >= 0x7B && (C) <= 0x7E)) ((C) >= 0x7B && (C) <= 0x7E))
#define isspace(C) ((C) == 0x20 || ((C) >= 0x9 && (C) <= 0xD))
#endif #endif

View File

@ -64,6 +64,7 @@ typedef enum {
} while (0) } while (0)
#define mutex_destroy(lock) sx_destroy(lock) #define mutex_destroy(lock) sx_destroy(lock)
#define mutex_enter(lock) sx_xlock(lock) #define mutex_enter(lock) sx_xlock(lock)
#define mutex_enter_interruptible(lock) sx_xlock_sig(lock)
#define mutex_enter_nested(lock, type) sx_xlock(lock) #define mutex_enter_nested(lock, type) sx_xlock(lock)
#define mutex_tryenter(lock) sx_try_xlock(lock) #define mutex_tryenter(lock) sx_try_xlock(lock)
#define mutex_exit(lock) sx_xunlock(lock) #define mutex_exit(lock) sx_xunlock(lock)

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD * SPDX-License-Identifier: BSD-2-Clause
* *
* Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>. * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
* All rights reserved. * All rights reserved.

View File

@ -30,9 +30,9 @@
#include <sys/types.h> #include <sys/types.h>
#include <sys/proc.h> #include <sys/proc.h>
#include <sys/queue.h>
#include <sys/taskqueue.h> #include <sys/taskqueue.h>
#include <sys/thread.h> #include <sys/thread.h>
#include <sys/ck.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -48,16 +48,16 @@ typedef uintptr_t taskqid_t;
typedef void (task_func_t)(void *); typedef void (task_func_t)(void *);
typedef struct taskq_ent { typedef struct taskq_ent {
struct task tqent_task; union {
struct timeout_task tqent_timeout_task; struct task tqent_task;
struct timeout_task tqent_timeout_task;
};
task_func_t *tqent_func; task_func_t *tqent_func;
void *tqent_arg; void *tqent_arg;
taskqid_t tqent_id; taskqid_t tqent_id;
CK_LIST_ENTRY(taskq_ent) tqent_hash; LIST_ENTRY(taskq_ent) tqent_hash;
uint8_t tqent_type; uint_t tqent_type;
uint8_t tqent_registered; volatile uint_t tqent_rc;
uint8_t tqent_cancelled;
volatile uint32_t tqent_rc;
} taskq_ent_t; } taskq_ent_t;
/* /*

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD * SPDX-License-Identifier: BSD-2-Clause
* *
* Copyright (c) 2022 Martin Matuska * Copyright (c) 2022 Martin Matuska
* *

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD * SPDX-License-Identifier: BSD-2-Clause
* *
* Copyright (c) 2022 Rob Wing * Copyright (c) 2022 Rob Wing
* *

View File

@ -78,7 +78,7 @@ extern int hz;
extern int tick; extern int tick;
typedef int fstrans_cookie_t; typedef int fstrans_cookie_t;
#define spl_fstrans_mark() (0) #define spl_fstrans_mark() (0)
#define spl_fstrans_unmark(x) (x = 0) #define spl_fstrans_unmark(x) ((void)x)
#define signal_pending(x) SIGPENDING(x) #define signal_pending(x) SIGPENDING(x)
#define current curthread #define current curthread
#define thread_join(x) #define thread_join(x)

View File

@ -286,6 +286,7 @@ typedef struct zfid_long {
extern uint_t zfs_fsyncer_key; extern uint_t zfs_fsyncer_key;
extern int zfs_super_owner; extern int zfs_super_owner;
extern int zfs_bclone_enabled;
extern void zfs_init(void); extern void zfs_init(void);
extern void zfs_fini(void); extern void zfs_fini(void);

View File

@ -461,10 +461,16 @@ zpl_is_32bit_api(void)
* 6.3 API change * 6.3 API change
* generic_fillattr() first arg is changed to struct mnt_idmap * * generic_fillattr() first arg is changed to struct mnt_idmap *
* *
* 6.6 API change
* generic_fillattr() gets new second arg request_mask, a u32 type
*
*/ */
#ifdef HAVE_GENERIC_FILLATTR_IDMAP #ifdef HAVE_GENERIC_FILLATTR_IDMAP
#define zpl_generic_fillattr(idmap, ip, sp) \ #define zpl_generic_fillattr(idmap, ip, sp) \
generic_fillattr(idmap, ip, sp) generic_fillattr(idmap, ip, sp)
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
#define zpl_generic_fillattr(idmap, rqm, ip, sp) \
generic_fillattr(idmap, rqm, ip, sp)
#elif defined(HAVE_GENERIC_FILLATTR_USERNS) #elif defined(HAVE_GENERIC_FILLATTR_USERNS)
#define zpl_generic_fillattr(user_ns, ip, sp) \ #define zpl_generic_fillattr(user_ns, ip, sp) \
generic_fillattr(user_ns, ip, sp) generic_fillattr(user_ns, ip, sp)

View File

@ -108,7 +108,7 @@ typedef struct spl_kmem_magazine {
uint32_t skm_refill; /* Batch refill size */ uint32_t skm_refill; /* Batch refill size */
struct spl_kmem_cache *skm_cache; /* Owned by cache */ struct spl_kmem_cache *skm_cache; /* Owned by cache */
unsigned int skm_cpu; /* Owned by cpu */ unsigned int skm_cpu; /* Owned by cpu */
void *skm_objs[0]; /* Object pointers */ void *skm_objs[]; /* Object pointers */
} spl_kmem_magazine_t; } spl_kmem_magazine_t;
typedef struct spl_kmem_obj { typedef struct spl_kmem_obj {

View File

@ -128,7 +128,6 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \
#define NESTED_SINGLE 1 #define NESTED_SINGLE 1
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#define mutex_enter_nested(mp, subclass) \ #define mutex_enter_nested(mp, subclass) \
{ \ { \
ASSERT3P(mutex_owner(mp), !=, current); \ ASSERT3P(mutex_owner(mp), !=, current); \
@ -137,16 +136,22 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \
spl_mutex_lockdep_on_maybe(mp); \ spl_mutex_lockdep_on_maybe(mp); \
spl_mutex_set_owner(mp); \ spl_mutex_set_owner(mp); \
} }
#else /* CONFIG_DEBUG_LOCK_ALLOC */
#define mutex_enter_nested(mp, subclass) \ #define mutex_enter_interruptible(mp) \
{ \ /* CSTYLED */ \
({ \
int _rc_; \
\
ASSERT3P(mutex_owner(mp), !=, current); \ ASSERT3P(mutex_owner(mp), !=, current); \
spl_mutex_lockdep_off_maybe(mp); \ spl_mutex_lockdep_off_maybe(mp); \
mutex_lock(MUTEX(mp)); \ _rc_ = mutex_lock_interruptible(MUTEX(mp)); \
spl_mutex_lockdep_on_maybe(mp); \ spl_mutex_lockdep_on_maybe(mp); \
spl_mutex_set_owner(mp); \ if (!_rc_) { \
} spl_mutex_set_owner(mp); \
#endif /* CONFIG_DEBUG_LOCK_ALLOC */ } \
\
_rc_; \
})
#define mutex_enter(mp) mutex_enter_nested((mp), 0) #define mutex_enter(mp) mutex_enter_nested((mp), 0)

View File

@ -73,13 +73,6 @@ typedef struct zfs_uio {
size_t uio_skip; size_t uio_skip;
struct request *rq; struct request *rq;
/*
* Used for saving rq_for_each_segment() state between calls
* to zfs_uiomove_bvec_rq().
*/
struct req_iterator iter;
struct bio_vec bv;
} zfs_uio_t; } zfs_uio_t;
@ -138,7 +131,6 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
} else { } else {
uio->uio_bvec = NULL; uio->uio_bvec = NULL;
uio->uio_iovcnt = 0; uio->uio_iovcnt = 0;
memset(&uio->iter, 0, sizeof (uio->iter));
} }
uio->uio_loffset = io_offset(bio, rq); uio->uio_loffset = io_offset(bio, rq);

View File

@ -51,7 +51,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__array(uint64_t, hdr_dva_word, 2) __array(uint64_t, hdr_dva_word, 2)
__field(uint64_t, hdr_birth) __field(uint64_t, hdr_birth)
__field(uint32_t, hdr_flags) __field(uint32_t, hdr_flags)
__field(uint32_t, hdr_bufcnt)
__field(arc_buf_contents_t, hdr_type) __field(arc_buf_contents_t, hdr_type)
__field(uint16_t, hdr_psize) __field(uint16_t, hdr_psize)
__field(uint16_t, hdr_lsize) __field(uint16_t, hdr_lsize)
@ -70,7 +69,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__entry->hdr_dva_word[1] = ab->b_dva.dva_word[1]; __entry->hdr_dva_word[1] = ab->b_dva.dva_word[1];
__entry->hdr_birth = ab->b_birth; __entry->hdr_birth = ab->b_birth;
__entry->hdr_flags = ab->b_flags; __entry->hdr_flags = ab->b_flags;
__entry->hdr_bufcnt = ab->b_l1hdr.b_bufcnt;
__entry->hdr_psize = ab->b_psize; __entry->hdr_psize = ab->b_psize;
__entry->hdr_lsize = ab->b_lsize; __entry->hdr_lsize = ab->b_lsize;
__entry->hdr_spa = ab->b_spa; __entry->hdr_spa = ab->b_spa;
@ -84,12 +82,12 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__entry->hdr_refcount = ab->b_l1hdr.b_refcnt.rc_count; __entry->hdr_refcount = ab->b_l1hdr.b_refcnt.rc_count;
), ),
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu " TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
"flags 0x%x bufcnt %u type %u psize %u lsize %u spa %llu " "flags 0x%x type %u psize %u lsize %u spa %llu "
"state_type %u access %lu mru_hits %u mru_ghost_hits %u " "state_type %u access %lu mru_hits %u mru_ghost_hits %u "
"mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }", "mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }",
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1], __entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
__entry->hdr_birth, __entry->hdr_flags, __entry->hdr_birth, __entry->hdr_flags,
__entry->hdr_bufcnt, __entry->hdr_type, __entry->hdr_psize, __entry->hdr_type, __entry->hdr_psize,
__entry->hdr_lsize, __entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_lsize, __entry->hdr_spa, __entry->hdr_state_type,
__entry->hdr_access, __entry->hdr_mru_hits, __entry->hdr_access, __entry->hdr_mru_hits,
__entry->hdr_mru_ghost_hits, __entry->hdr_mfu_hits, __entry->hdr_mru_ghost_hits, __entry->hdr_mfu_hits,
@ -192,7 +190,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__array(uint64_t, hdr_dva_word, 2) __array(uint64_t, hdr_dva_word, 2)
__field(uint64_t, hdr_birth) __field(uint64_t, hdr_birth)
__field(uint32_t, hdr_flags) __field(uint32_t, hdr_flags)
__field(uint32_t, hdr_bufcnt)
__field(arc_buf_contents_t, hdr_type) __field(arc_buf_contents_t, hdr_type)
__field(uint16_t, hdr_psize) __field(uint16_t, hdr_psize)
__field(uint16_t, hdr_lsize) __field(uint16_t, hdr_lsize)
@ -223,7 +220,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__entry->hdr_dva_word[1] = hdr->b_dva.dva_word[1]; __entry->hdr_dva_word[1] = hdr->b_dva.dva_word[1];
__entry->hdr_birth = hdr->b_birth; __entry->hdr_birth = hdr->b_birth;
__entry->hdr_flags = hdr->b_flags; __entry->hdr_flags = hdr->b_flags;
__entry->hdr_bufcnt = hdr->b_l1hdr.b_bufcnt;
__entry->hdr_psize = hdr->b_psize; __entry->hdr_psize = hdr->b_psize;
__entry->hdr_lsize = hdr->b_lsize; __entry->hdr_lsize = hdr->b_lsize;
__entry->hdr_spa = hdr->b_spa; __entry->hdr_spa = hdr->b_spa;
@ -255,7 +251,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__entry->zb_blkid = zb->zb_blkid; __entry->zb_blkid = zb->zb_blkid;
), ),
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu " TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
"flags 0x%x bufcnt %u psize %u lsize %u spa %llu state_type %u " "flags 0x%x psize %u lsize %u spa %llu state_type %u "
"access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u " "access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u "
"mfu_ghost_hits %u l2_hits %u refcount %lli } " "mfu_ghost_hits %u l2_hits %u refcount %lli } "
"bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 " "bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 "
@ -264,7 +260,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
"blkid %llu }", "blkid %llu }",
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1], __entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
__entry->hdr_birth, __entry->hdr_flags, __entry->hdr_birth, __entry->hdr_flags,
__entry->hdr_bufcnt, __entry->hdr_psize, __entry->hdr_lsize, __entry->hdr_psize, __entry->hdr_lsize,
__entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access, __entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access,
__entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits, __entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits,
__entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits, __entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits,

View File

@ -60,8 +60,12 @@
#define DBUF_TP_FAST_ASSIGN \ #define DBUF_TP_FAST_ASSIGN \
if (db != NULL) { \ if (db != NULL) { \
__assign_str(os_spa, \ if (POINTER_IS_VALID(DB_DNODE(db)->dn_objset)) { \
spa_name(DB_DNODE(db)->dn_objset->os_spa)); \ __assign_str(os_spa, \
spa_name(DB_DNODE(db)->dn_objset->os_spa)); \
} else { \
__assign_str(os_spa, "NULL"); \
} \
\ \
__entry->ds_object = db->db_objset->os_dsl_dataset ? \ __entry->ds_object = db->db_objset->os_dsl_dataset ? \
db->db_objset->os_dsl_dataset->ds_object : 0; \ db->db_objset->os_dsl_dataset->ds_object : 0; \

View File

@ -45,6 +45,8 @@ extern "C" {
typedef struct zfsvfs zfsvfs_t; typedef struct zfsvfs zfsvfs_t;
struct znode; struct znode;
extern int zfs_bclone_enabled;
/* /*
* This structure emulates the vfs_t from other platforms. It's purpose * This structure emulates the vfs_t from other platforms. It's purpose
* is to facilitate the handling of mount options and minimize structural * is to facilitate the handling of mount options and minimize structural

View File

@ -56,7 +56,12 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap,
extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd,
cred_t *cr, int flags); cred_t *cr, int flags);
extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr); extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr);
#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
extern int zfs_getattr_fast(zidmap_t *, u32 request_mask, struct inode *ip,
struct kstat *sp);
#else
extern int zfs_getattr_fast(zidmap_t *, struct inode *ip, struct kstat *sp); extern int zfs_getattr_fast(zidmap_t *, struct inode *ip, struct kstat *sp);
#endif
extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr, extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr,
zidmap_t *mnt_ns); zidmap_t *mnt_ns);
extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,

View File

@ -60,7 +60,7 @@ extern const struct file_operations zpl_file_operations;
extern const struct file_operations zpl_dir_file_operations; extern const struct file_operations zpl_dir_file_operations;
/* zpl_super.c */ /* zpl_super.c */
extern void zpl_prune_sb(int64_t nr_to_scan, void *arg); extern void zpl_prune_sb(uint64_t nr_to_scan, void *arg);
extern const struct super_operations zpl_super_operations; extern const struct super_operations zpl_super_operations;
extern const struct export_operations zpl_export_operations; extern const struct export_operations zpl_export_operations;
@ -272,4 +272,15 @@ extern long zpl_ioctl_fideduperange(struct file *filp, void *arg);
#define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(dentry, ia) #define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(dentry, ia)
#endif #endif
#ifdef HAVE_INODE_GET_CTIME
#define zpl_inode_get_ctime(ip) inode_get_ctime(ip)
#else
#define zpl_inode_get_ctime(ip) (ip->i_ctime)
#endif
#ifdef HAVE_INODE_SET_CTIME_TO_TS
#define zpl_inode_set_ctime_to_ts(ip, ts) inode_set_ctime_to_ts(ip, ts)
#else
#define zpl_inode_set_ctime_to_ts(ip, ts) (ip->i_ctime = ts)
#endif
#endif /* _SYS_ZPL_H */ #endif /* _SYS_ZPL_H */

View File

@ -81,7 +81,7 @@ typedef struct arc_prune arc_prune_t;
typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb, typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
const blkptr_t *bp, arc_buf_t *buf, void *priv); const blkptr_t *bp, arc_buf_t *buf, void *priv);
typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv); typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
typedef void arc_prune_func_t(int64_t bytes, void *priv); typedef void arc_prune_func_t(uint64_t bytes, void *priv);
/* Shared module parameters */ /* Shared module parameters */
extern uint_t zfs_arc_average_blocksize; extern uint_t zfs_arc_average_blocksize;

View File

@ -159,10 +159,6 @@ struct arc_write_callback {
* these two allocation states. * these two allocation states.
*/ */
typedef struct l1arc_buf_hdr { typedef struct l1arc_buf_hdr {
/* for waiting on reads to complete */
kcondvar_t b_cv;
uint8_t b_byteswap;
/* protected by arc state mutex */ /* protected by arc state mutex */
arc_state_t *b_state; arc_state_t *b_state;
multilist_node_t b_arc_node; multilist_node_t b_arc_node;
@ -173,7 +169,7 @@ typedef struct l1arc_buf_hdr {
uint32_t b_mru_ghost_hits; uint32_t b_mru_ghost_hits;
uint32_t b_mfu_hits; uint32_t b_mfu_hits;
uint32_t b_mfu_ghost_hits; uint32_t b_mfu_ghost_hits;
uint32_t b_bufcnt; uint8_t b_byteswap;
arc_buf_t *b_buf; arc_buf_t *b_buf;
/* self protecting */ /* self protecting */
@ -436,12 +432,12 @@ typedef struct l2arc_dev {
*/ */
typedef struct arc_buf_hdr_crypt { typedef struct arc_buf_hdr_crypt {
abd_t *b_rabd; /* raw encrypted data */ abd_t *b_rabd; /* raw encrypted data */
dmu_object_type_t b_ot; /* object type */
uint32_t b_ebufcnt; /* count of encrypted buffers */
/* dsobj for looking up encryption key for l2arc encryption */ /* dsobj for looking up encryption key for l2arc encryption */
uint64_t b_dsobj; uint64_t b_dsobj;
dmu_object_type_t b_ot; /* object type */
/* encryption parameters */ /* encryption parameters */
uint8_t b_salt[ZIO_DATA_SALT_LEN]; uint8_t b_salt[ZIO_DATA_SALT_LEN];
uint8_t b_iv[ZIO_DATA_IV_LEN]; uint8_t b_iv[ZIO_DATA_IV_LEN];
@ -1069,7 +1065,6 @@ extern void arc_wait_for_eviction(uint64_t, boolean_t);
extern void arc_lowmem_init(void); extern void arc_lowmem_init(void);
extern void arc_lowmem_fini(void); extern void arc_lowmem_fini(void);
extern void arc_prune_async(uint64_t);
extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg); extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
extern uint64_t arc_free_memory(void); extern uint64_t arc_free_memory(void);
extern int64_t arc_available_memory(void); extern int64_t arc_available_memory(void);

View File

@ -250,7 +250,6 @@ struct metaslab_group {
int64_t mg_activation_count; int64_t mg_activation_count;
metaslab_class_t *mg_class; metaslab_class_t *mg_class;
vdev_t *mg_vd; vdev_t *mg_vd;
taskq_t *mg_taskq;
metaslab_group_t *mg_prev; metaslab_group_t *mg_prev;
metaslab_group_t *mg_next; metaslab_group_t *mg_next;

View File

@ -837,7 +837,7 @@ extern kmutex_t spa_namespace_lock;
extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t); extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
extern void spa_config_load(void); extern void spa_config_load(void);
extern nvlist_t *spa_all_configs(uint64_t *); extern int spa_all_configs(uint64_t *generation, nvlist_t **pools);
extern void spa_config_set(spa_t *spa, nvlist_t *config); extern void spa_config_set(spa_t *spa, nvlist_t *config);
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
int getstats); int getstats);

View File

@ -423,7 +423,9 @@ struct spa {
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */ hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor management */ taskq_t *spa_zvol_taskq; /* Taskq for minor management */
taskq_t *spa_metaslab_taskq; /* Taskq for metaslab preload */
taskq_t *spa_prefetch_taskq; /* Taskq for prefetch threads */ taskq_t *spa_prefetch_taskq; /* Taskq for prefetch threads */
taskq_t *spa_upgrade_taskq; /* Taskq for upgrade jobs */
uint64_t spa_multihost; /* multihost aware (mmp) */ uint64_t spa_multihost; /* multihost aware (mmp) */
mmp_thread_t spa_mmp; /* multihost mmp thread */ mmp_thread_t spa_mmp; /* multihost mmp thread */
list_t spa_leaf_list; /* list of leaf vdevs */ list_t spa_leaf_list; /* list of leaf vdevs */
@ -447,8 +449,6 @@ struct spa {
*/ */
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */ spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
zfs_refcount_t spa_refcount; /* number of opens */ zfs_refcount_t spa_refcount; /* number of opens */
taskq_t *spa_upgrade_taskq; /* taskq for upgrade jobs */
}; };
extern char *spa_config_path; extern char *spa_config_path;

View File

@ -73,8 +73,7 @@ struct tx_cpu {
kcondvar_t tc_cv[TXG_SIZE]; kcondvar_t tc_cv[TXG_SIZE];
uint64_t tc_count[TXG_SIZE]; /* tx hold count on each txg */ uint64_t tc_count[TXG_SIZE]; /* tx hold count on each txg */
list_t tc_callbacks[TXG_SIZE]; /* commit cb list */ list_t tc_callbacks[TXG_SIZE]; /* commit cb list */
char tc_pad[8]; /* pad to fill 3 cache lines */ } ____cacheline_aligned;
};
/* /*
* The tx_state structure maintains the state information about the different * The tx_state structure maintains the state information about the different

View File

@ -131,7 +131,10 @@ typedef const struct vdev_ops {
* Virtual device properties * Virtual device properties
*/ */
typedef union vdev_queue_class { typedef union vdev_queue_class {
list_t vqc_list; struct {
ulong_t vqc_list_numnodes;
list_t vqc_list;
};
avl_tree_t vqc_tree; avl_tree_t vqc_tree;
} vdev_queue_class_t; } vdev_queue_class_t;

View File

@ -130,7 +130,7 @@ typedef struct raidz_row {
uint64_t rr_offset; /* Logical offset for *_io_verify() */ uint64_t rr_offset; /* Logical offset for *_io_verify() */
uint64_t rr_size; /* Physical size for *_io_verify() */ uint64_t rr_size; /* Physical size for *_io_verify() */
#endif #endif
raidz_col_t rr_col[0]; /* Flexible array of I/O columns */ raidz_col_t rr_col[]; /* Flexible array of I/O columns */
} raidz_row_t; } raidz_row_t;
typedef struct raidz_map { typedef struct raidz_map {
@ -139,7 +139,7 @@ typedef struct raidz_map {
int rm_nskip; /* RAIDZ sectors skipped for padding */ int rm_nskip; /* RAIDZ sectors skipped for padding */
int rm_skipstart; /* Column index of padding start */ int rm_skipstart; /* Column index of padding start */
const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */ const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
raidz_row_t *rm_row[0]; /* flexible array of rows */ raidz_row_t *rm_row[]; /* flexible array of rows */
} raidz_map_t; } raidz_map_t;

View File

@ -274,11 +274,13 @@ typedef struct kmutex {
extern void mutex_init(kmutex_t *mp, char *name, int type, void *cookie); extern void mutex_init(kmutex_t *mp, char *name, int type, void *cookie);
extern void mutex_destroy(kmutex_t *mp); extern void mutex_destroy(kmutex_t *mp);
extern void mutex_enter(kmutex_t *mp); extern void mutex_enter(kmutex_t *mp);
extern int mutex_enter_check_return(kmutex_t *mp);
extern void mutex_exit(kmutex_t *mp); extern void mutex_exit(kmutex_t *mp);
extern int mutex_tryenter(kmutex_t *mp); extern int mutex_tryenter(kmutex_t *mp);
#define NESTED_SINGLE 1 #define NESTED_SINGLE 1
#define mutex_enter_nested(mp, class) mutex_enter(mp) #define mutex_enter_nested(mp, class) mutex_enter(mp)
#define mutex_enter_interruptible(mp) mutex_enter_check_return(mp)
/* /*
* RW locks * RW locks
*/ */

View File

@ -515,6 +515,8 @@
<elf-symbol name='zpool_open' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_open' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_open_canfail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_open_canfail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_pool_state_to_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_pool_state_to_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prepare_and_label_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prepare_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_print_unsup_feat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_print_unsup_feat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prop_align_right' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_prop_align_right' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prop_column_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_prop_column_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -562,6 +564,8 @@
<elf-symbol name='zpool_vdev_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_remove_cancel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_remove_cancel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_remove_wanted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_remove_wanted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_script_alloc_env' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_script_free_env' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>

View File

@ -2071,3 +2071,196 @@ printf_color(const char *color, const char *format, ...)
return (rc); return (rc);
} }
/* PATH + 5 env vars + a NULL entry = 7 */
#define ZPOOL_VDEV_SCRIPT_ENV_COUNT 7
/*
* There's a few places where ZFS will call external scripts (like the script
* in zpool.d/ and `zfs_prepare_disk`). These scripts are called with a
* reduced $PATH, and some vdev specific environment vars set. This function
* will allocate an populate the environment variable array that is passed to
* these scripts. The user must free the arrays with zpool_vdev_free_env() when
* they are done.
*
* The following env vars will be set (but value could be blank):
*
* POOL_NAME
* VDEV_PATH
* VDEV_UPATH
* VDEV_ENC_SYSFS_PATH
*
* In addition, you can set an optional environment variable named 'opt_key'
* to 'opt_val' if you want.
*
* Returns allocated env[] array on success, NULL otherwise.
*/
char **
zpool_vdev_script_alloc_env(const char *pool_name,
const char *vdev_path, const char *vdev_upath,
const char *vdev_enc_sysfs_path, const char *opt_key, const char *opt_val)
{
char **env = NULL;
int rc;
env = calloc(ZPOOL_VDEV_SCRIPT_ENV_COUNT, sizeof (*env));
if (!env)
return (NULL);
env[0] = strdup("PATH=/bin:/sbin:/usr/bin:/usr/sbin");
if (!env[0])
goto error;
/* Setup our custom environment variables */
rc = asprintf(&env[1], "POOL_NAME=%s", pool_name ? pool_name : "");
if (rc == -1) {
env[1] = NULL;
goto error;
}
rc = asprintf(&env[2], "VDEV_PATH=%s", vdev_path ? vdev_path : "");
if (rc == -1) {
env[2] = NULL;
goto error;
}
rc = asprintf(&env[3], "VDEV_UPATH=%s", vdev_upath ? vdev_upath : "");
if (rc == -1) {
env[3] = NULL;
goto error;
}
rc = asprintf(&env[4], "VDEV_ENC_SYSFS_PATH=%s",
vdev_enc_sysfs_path ? vdev_enc_sysfs_path : "");
if (rc == -1) {
env[4] = NULL;
goto error;
}
if (opt_key != NULL) {
rc = asprintf(&env[5], "%s=%s", opt_key,
opt_val ? opt_val : "");
if (rc == -1) {
env[5] = NULL;
goto error;
}
}
return (env);
error:
for (int i = 0; i < ZPOOL_VDEV_SCRIPT_ENV_COUNT; i++)
free(env[i]);
free(env);
return (NULL);
}
/*
* Free the env[] array that was allocated by zpool_vdev_script_alloc_env().
*/
void
zpool_vdev_script_free_env(char **env)
{
for (int i = 0; i < ZPOOL_VDEV_SCRIPT_ENV_COUNT; i++)
free(env[i]);
free(env);
}
/*
* Prepare a disk by (optionally) running a program before labeling the disk.
* This can be useful for installing disk firmware or doing some pre-flight
* checks on the disk before it becomes part of the pool. The program run is
* located at ZFSEXECDIR/zfs_prepare_disk
* (E.x: /usr/local/libexec/zfs/zfs_prepare_disk).
*
* Return 0 on success, non-zero on failure.
*/
int
zpool_prepare_disk(zpool_handle_t *zhp, nvlist_t *vdev_nv,
const char *prepare_str, char **lines[], int *lines_cnt)
{
const char *script_path = ZFSEXECDIR "/zfs_prepare_disk";
const char *pool_name;
int rc = 0;
/* Path to script and a NULL entry */
char *argv[2] = {(char *)script_path};
char **env = NULL;
const char *path = NULL, *enc_sysfs_path = NULL;
char *upath;
*lines_cnt = 0;
if (access(script_path, X_OK) != 0) {
/* No script, nothing to do */
return (0);
}
(void) nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH, &path);
(void) nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
&enc_sysfs_path);
upath = zfs_get_underlying_path(path);
pool_name = zhp ? zpool_get_name(zhp) : NULL;
env = zpool_vdev_script_alloc_env(pool_name, path, upath,
enc_sysfs_path, "VDEV_PREPARE", prepare_str);
free(upath);
if (env == NULL) {
return (ENOMEM);
}
rc = libzfs_run_process_get_stdout(script_path, argv, env, lines,
lines_cnt);
zpool_vdev_script_free_env(env);
return (rc);
}
/*
* Optionally run a script and then label a disk. The script can be used to
* prepare a disk for inclusion into the pool. For example, it might update
* the disk's firmware or check its health.
*
* The 'name' provided is the short name, stripped of any leading
* /dev path, and is passed to zpool_label_disk. vdev_nv is the nvlist for
* the vdev. prepare_str is a string that gets passed as the VDEV_PREPARE
* env variable to the script.
*
* The following env vars are passed to the script:
*
* POOL_NAME: The pool name (blank during zpool create)
* VDEV_PREPARE: Reason why the disk is being prepared for inclusion:
* "create", "add", "replace", or "autoreplace"
* VDEV_PATH: Path to the disk
* VDEV_UPATH: One of the 'underlying paths' to the disk. This is
* useful for DM devices.
* VDEV_ENC_SYSFS_PATH: Path to the disk's enclosure sysfs path, if available.
*
* Note, some of these values can be blank.
*
* Return 0 on success, non-zero otherwise.
*/
int
zpool_prepare_and_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp,
const char *name, nvlist_t *vdev_nv, const char *prepare_str,
char **lines[], int *lines_cnt)
{
int rc;
char vdev_path[MAXPATHLEN];
(void) snprintf(vdev_path, sizeof (vdev_path), "%s/%s", DISK_ROOT,
name);
/* zhp will be NULL when creating a pool */
rc = zpool_prepare_disk(zhp, vdev_nv, prepare_str, lines, lines_cnt);
if (rc != 0)
return (rc);
rc = zpool_label_disk(hdl, zhp, name);
return (rc);
}

View File

@ -205,6 +205,15 @@ mutex_enter(kmutex_t *mp)
mp->m_owner = pthread_self(); mp->m_owner = pthread_self();
} }
int
mutex_enter_check_return(kmutex_t *mp)
{
int error = pthread_mutex_lock(&mp->m_lock);
if (error == 0)
mp->m_owner = pthread_self();
return (error);
}
int int
mutex_tryenter(kmutex_t *mp) mutex_tryenter(kmutex_t *mp)
{ {

View File

@ -582,9 +582,8 @@ zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
* Wait up to timeout_ms for udev to set up the device node. The device is * Wait up to timeout_ms for udev to set up the device node. The device is
* considered ready when libudev determines it has been initialized, all of * considered ready when libudev determines it has been initialized, all of
* the device links have been verified to exist, and it has been allowed to * the device links have been verified to exist, and it has been allowed to
* settle. At this point the device the device can be accessed reliably. * settle. At this point the device can be accessed reliably. Depending on
* Depending on the complexity of the udev rules this process could take * the complexity of the udev rules this process could take several seconds.
* several seconds.
*/ */
int int
zpool_label_disk_wait(const char *path, int timeout_ms) zpool_label_disk_wait(const char *path, int timeout_ms)

View File

@ -62,6 +62,7 @@ dist_man_MANS = \
%D%/man8/zfs-userspace.8 \ %D%/man8/zfs-userspace.8 \
%D%/man8/zfs-wait.8 \ %D%/man8/zfs-wait.8 \
%D%/man8/zfs_ids_to_path.8 \ %D%/man8/zfs_ids_to_path.8 \
%D%/man8/zfs_prepare_disk.8 \
%D%/man8/zgenhostid.8 \ %D%/man8/zgenhostid.8 \
%D%/man8/zinject.8 \ %D%/man8/zinject.8 \
%D%/man8/zpool.8 \ %D%/man8/zpool.8 \

View File

@ -402,6 +402,12 @@ Practical upper limit of total metaslabs per top-level vdev.
.It Sy metaslab_preload_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int .It Sy metaslab_preload_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Enable metaslab group preloading. Enable metaslab group preloading.
. .
.It Sy metaslab_preload_limit Ns = Ns Sy 10 Pq uint
Maximum number of metaslabs per group to preload
.
.It Sy metaslab_preload_pct Ns = Ns Sy 50 Pq uint
Percentage of CPUs to run a metaslab preload taskq
.
.It Sy metaslab_lba_weighting_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int .It Sy metaslab_lba_weighting_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Give more weight to metaslabs with lower LBAs, Give more weight to metaslabs with lower LBAs,
assuming they have greater bandwidth, assuming they have greater bandwidth,
@ -1131,6 +1137,11 @@ Selecting any option other than
results in vector instructions results in vector instructions
from the respective CPU instruction set being used. from the respective CPU instruction set being used.
. .
.It Sy zfs_bclone_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Enable the experimental block cloning feature.
If this setting is 0, then even if feature@block_cloning is enabled,
attempts to clone blocks will act as though the feature is disabled.
.
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string .It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
Select a BLAKE3 implementation. Select a BLAKE3 implementation.
.Pp .Pp
@ -2144,6 +2155,11 @@ On very fragmented pools, lowering this
.Pq typically to Sy 36 KiB .Pq typically to Sy 36 KiB
can improve performance. can improve performance.
. .
.It Sy zil_maxcopied Ns = Ns Sy 7680 Ns B Po 7.5 KiB Pc Pq uint
This sets the maximum number of write bytes logged via WR_COPIED.
It tunes a tradeoff between additional memory copy and possibly worse log
space efficiency vs additional range lock/unlock.
.
.It Sy zil_min_commit_timeout Ns = Ns Sy 5000 Pq u64 .It Sy zil_min_commit_timeout Ns = Ns Sy 5000 Pq u64
This sets the minimum delay in nanoseconds ZIL care to delay block commit, This sets the minimum delay in nanoseconds ZIL care to delay block commit,
waiting for more records. waiting for more records.
@ -2161,7 +2177,7 @@ if a volatile out-of-order write cache is enabled.
Disable intent logging replay. Disable intent logging replay.
Can be disabled for recovery from corrupted ZIL. Can be disabled for recovery from corrupted ZIL.
. .
.It Sy zil_slog_bulk Ns = Ns Sy 786432 Ns B Po 768 KiB Pc Pq u64 .It Sy zil_slog_bulk Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq u64
Limit SLOG write size per commit executed with synchronous priority. Limit SLOG write size per commit executed with synchronous priority.
Any writes above that will be executed with lower (asynchronous) priority Any writes above that will be executed with lower (asynchronous) priority
to limit potential SLOG device abuse by single active ZIL writer. to limit potential SLOG device abuse by single active ZIL writer.

View File

@ -28,8 +28,9 @@
.\" Copyright 2019 Richard Laager. All rights reserved. .\" Copyright 2019 Richard Laager. All rights reserved.
.\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc. .\" Copyright 2019 Joyent, Inc.
.\" Copyright 2023 Klara, Inc.
.\" .\"
.Dd June 30, 2019 .Dd October 6, 2023
.Dt ZFSCONCEPTS 7 .Dt ZFSCONCEPTS 7
.Os .Os
. .
@ -205,3 +206,40 @@ practices, such as regular backups.
Consider using the Consider using the
.Sy compression .Sy compression
property as a less resource-intensive alternative. property as a less resource-intensive alternative.
.Ss Block cloning
Block cloning is a facility that allows a file (or parts of a file) to be
.Qq cloned ,
that is, a shallow copy made where the existing data blocks are referenced
rather than copied.
Later modifications to the data will cause a copy of the data block to be taken
and that copy modified.
This facility is used to implement
.Qq reflinks
or
.Qq file-level copy-on-write .
.Pp
Cloned blocks are tracked in a special on-disk structure called the Block
Reference Table
.Po BRT
.Pc .
Unlike deduplication, this table has minimal overhead, so can be enabled at all
times.
.Pp
Also unlike deduplication, cloning must be requested by a user program.
Many common file copying programs, including newer versions of
.Nm /bin/cp ,
will try to create clones automatically.
Look for
.Qq clone ,
.Qq dedupe
or
.Qq reflink
in the documentation for more information.
.Pp
There are some limitations to block cloning.
Only whole blocks can be cloned, and blocks can not be cloned if they are not
yet written to disk, or if they are encrypted, or the source and destination
.Sy recordsize
properties differ.
The OS may add additional restrictions;
for example, most versions of Linux will not allow clones across datasets.

View File

@ -219,8 +219,11 @@ to the end of the line is ignored.
.Bd -literal -compact -offset 4n .Bd -literal -compact -offset 4n
.No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2 .No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2
# Features which are supported by GRUB2 # Features which are supported by GRUB2
allocation_classes
async_destroy async_destroy
block_cloning
bookmarks bookmarks
device_rebuild
embedded_data embedded_data
empty_bpobj empty_bpobj
enabled_txg enabled_txg
@ -229,8 +232,14 @@ filesystem_limits
hole_birth hole_birth
large_blocks large_blocks
livelist livelist
log_spacemap
lz4_compress lz4_compress
project_quota
resilver_defer
spacemap_histogram spacemap_histogram
spacemap_v2
userobj_accounting
zilsaxattr
zpool_checkpoint zpool_checkpoint
.No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev .No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev

1
man/man8/.gitignore vendored
View File

@ -1,2 +1,3 @@
/zed.8 /zed.8
/zfs-mount-generator.8 /zfs-mount-generator.8
/zfs_prepare_disk.8

View File

@ -0,0 +1,70 @@
.\"
.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
.\" Copyright (C) 2023 Lawrence Livermore National Security, LLC.
.\" Refer to the OpenZFS git commit log for authoritative copyright attribution.
.\"
.\" The contents of this file are subject to the terms of the
.\" Common Development and Distribution License Version 1.0 (CDDL-1.0).
.\" You can obtain a copy of the license from the top-level file
.\" "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
.\" You may not use this file except in compliance with the license.
.\"
.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049)
.\"
.Dd August 30, 2023
.Dt ZFS_PREPARE_DISK 8
.Os
.
.Sh NAME
.Nm zfs_prepare_disk
.Nd special script that gets run before bringing a disk into a pool
.Sh DESCRIPTION
.Nm
is an optional script that gets called by libzfs before bringing a disk into a
pool.
It can be modified by the user to run whatever commands are necessary to prepare
a disk for inclusion into the pool.
For example, users can add lines to
.Nm zfs_prepare_disk
to do things like update the drive's firmware or check the drive's health.
.Nm zfs_prepare_disk
is optional and can be removed if not needed.
libzfs will look for the script at @zfsexecdir@/zfs_prepare_disk.
.
.Ss Properties
.Nm zfs_prepare_disk
will be passed the following environment variables:
.sp
.Bl -tag -compact -width "VDEV_ENC_SYSFS_PATH"
.
.It Nm POOL_NAME
.No Name of the pool
.It Nm VDEV_PATH
.No Path to the disk (like /dev/sda)
.It Nm VDEV_PREPARE
.No Reason why the disk is being prepared for inclusion
('create', 'add', 'replace', or 'autoreplace').
This can be useful if you only want the script to be run under certain actions.
.It Nm VDEV_UPATH
.No Path to one of the underlying devices for the
disk.
For multipath this would return one of the /dev/sd* paths to the disk.
If the device is not a device mapper device, then
.Nm VDEV_UPATH
just returns the same value as
.Nm VDEV_PATH
.It Nm VDEV_ENC_SYSFS_PATH
.No Path to the disk's enclosure sysfs path, if available
.El
.Pp
Note that some of these variables may have a blank value.
.Nm POOL_NAME
is blank at pool creation time, for example.
.Sh ENVIRONMENT
.Nm zfs_prepare_disk
runs with a limited $PATH.
.Sh EXIT STATUS
.Nm zfs_prepare_disk
should return 0 on success, non-zero otherwise.
If non-zero is returned, the disk will not be included in the pool.
.

View File

@ -489,6 +489,10 @@ zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64)) zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64)) zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
UBSAN_SANITIZE_zap_leaf.o := n
UBSAN_SANITIZE_zap_micro.o := n
UBSAN_SANITIZE_sa.o := n
# Suppress incorrect warnings from versions of objtool which are not # Suppress incorrect warnings from versions of objtool which are not
# aware of x86 EVEX prefix instructions used for AVX512. # aware of x86 EVEX prefix instructions used for AVX512.
OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y

View File

@ -49,6 +49,7 @@
.type zfs_sha256_block_armv7,%function .type zfs_sha256_block_armv7,%function
.align 6 .align 6
zfs_sha256_block_armv7: zfs_sha256_block_armv7:
hint #34 // bti c
stp x29,x30,[sp,#-128]! stp x29,x30,[sp,#-128]!
add x29,sp,#0 add x29,sp,#0
@ -1015,6 +1016,7 @@ zfs_sha256_block_armv7:
.type zfs_sha256_block_armv8,%function .type zfs_sha256_block_armv8,%function
.align 6 .align 6
zfs_sha256_block_armv8: zfs_sha256_block_armv8:
hint #34 // bti c
.Lv8_entry: .Lv8_entry:
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
@ -1155,6 +1157,7 @@ zfs_sha256_block_armv8:
.type zfs_sha256_block_neon,%function .type zfs_sha256_block_neon,%function
.align 4 .align 4
zfs_sha256_block_neon: zfs_sha256_block_neon:
hint #34 // bti c
.Lneon_entry: .Lneon_entry:
stp x29, x30, [sp, #-16]! stp x29, x30, [sp, #-16]!
mov x29, sp mov x29, sp

View File

@ -73,6 +73,7 @@
.type zfs_sha512_block_armv7,%function .type zfs_sha512_block_armv7,%function
.align 6 .align 6
zfs_sha512_block_armv7: zfs_sha512_block_armv7:
hint #34 // bti c
stp x29,x30,[sp,#-128]! stp x29,x30,[sp,#-128]!
add x29,sp,#0 add x29,sp,#0
@ -1040,6 +1041,7 @@ zfs_sha512_block_armv7:
.type zfs_sha512_block_armv8,%function .type zfs_sha512_block_armv8,%function
.align 6 .align 6
zfs_sha512_block_armv8: zfs_sha512_block_armv8:
hint #34 // bti c
.Lv8_entry: .Lv8_entry:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!

View File

@ -30,8 +30,6 @@
__FBSDID("$FreeBSD$"); __FBSDID("$FreeBSD$");
#include <sys/param.h> #include <sys/param.h>
#include <sys/ck.h>
#include <sys/epoch.h>
#include <sys/kernel.h> #include <sys/kernel.h>
#include <sys/kmem.h> #include <sys/kmem.h>
#include <sys/lock.h> #include <sys/lock.h>
@ -66,11 +64,9 @@ taskq_t *dynamic_taskq = NULL;
proc_t *system_proc; proc_t *system_proc;
extern int uma_align_cache;
static MALLOC_DEFINE(M_TASKQ, "taskq", "taskq structures"); static MALLOC_DEFINE(M_TASKQ, "taskq", "taskq structures");
static CK_LIST_HEAD(tqenthashhead, taskq_ent) *tqenthashtbl; static LIST_HEAD(tqenthashhead, taskq_ent) *tqenthashtbl;
static unsigned long tqenthash; static unsigned long tqenthash;
static unsigned long tqenthashlock; static unsigned long tqenthashlock;
static struct sx *tqenthashtbl_lock; static struct sx *tqenthashtbl_lock;
@ -80,8 +76,8 @@ static taskqid_t tqidnext;
#define TQIDHASH(tqid) (&tqenthashtbl[(tqid) & tqenthash]) #define TQIDHASH(tqid) (&tqenthashtbl[(tqid) & tqenthash])
#define TQIDHASHLOCK(tqid) (&tqenthashtbl_lock[((tqid) & tqenthashlock)]) #define TQIDHASHLOCK(tqid) (&tqenthashtbl_lock[((tqid) & tqenthashlock)])
#define NORMAL_TASK 0
#define TIMEOUT_TASK 1 #define TIMEOUT_TASK 1
#define NORMAL_TASK 2
static void static void
system_taskq_init(void *arg) system_taskq_init(void *arg)
@ -121,7 +117,7 @@ system_taskq_fini(void *arg)
for (i = 0; i < tqenthashlock + 1; i++) for (i = 0; i < tqenthashlock + 1; i++)
sx_destroy(&tqenthashtbl_lock[i]); sx_destroy(&tqenthashtbl_lock[i]);
for (i = 0; i < tqenthash + 1; i++) for (i = 0; i < tqenthash + 1; i++)
VERIFY(CK_LIST_EMPTY(&tqenthashtbl[i])); VERIFY(LIST_EMPTY(&tqenthashtbl[i]));
free(tqenthashtbl_lock, M_TASKQ); free(tqenthashtbl_lock, M_TASKQ);
free(tqenthashtbl, M_TASKQ); free(tqenthashtbl, M_TASKQ);
} }
@ -162,27 +158,27 @@ taskq_lookup(taskqid_t tqid)
{ {
taskq_ent_t *ent = NULL; taskq_ent_t *ent = NULL;
sx_xlock(TQIDHASHLOCK(tqid)); if (tqid == 0)
CK_LIST_FOREACH(ent, TQIDHASH(tqid), tqent_hash) { return (NULL);
sx_slock(TQIDHASHLOCK(tqid));
LIST_FOREACH(ent, TQIDHASH(tqid), tqent_hash) {
if (ent->tqent_id == tqid) if (ent->tqent_id == tqid)
break; break;
} }
if (ent != NULL) if (ent != NULL)
refcount_acquire(&ent->tqent_rc); refcount_acquire(&ent->tqent_rc);
sx_xunlock(TQIDHASHLOCK(tqid)); sx_sunlock(TQIDHASHLOCK(tqid));
return (ent); return (ent);
} }
static taskqid_t static taskqid_t
taskq_insert(taskq_ent_t *ent) taskq_insert(taskq_ent_t *ent)
{ {
taskqid_t tqid; taskqid_t tqid = __taskq_genid();
tqid = __taskq_genid();
ent->tqent_id = tqid; ent->tqent_id = tqid;
ent->tqent_registered = B_TRUE;
sx_xlock(TQIDHASHLOCK(tqid)); sx_xlock(TQIDHASHLOCK(tqid));
CK_LIST_INSERT_HEAD(TQIDHASH(tqid), ent, tqent_hash); LIST_INSERT_HEAD(TQIDHASH(tqid), ent, tqent_hash);
sx_xunlock(TQIDHASHLOCK(tqid)); sx_xunlock(TQIDHASHLOCK(tqid));
return (tqid); return (tqid);
} }
@ -192,13 +188,14 @@ taskq_remove(taskq_ent_t *ent)
{ {
taskqid_t tqid = ent->tqent_id; taskqid_t tqid = ent->tqent_id;
if (!ent->tqent_registered) if (tqid == 0)
return; return;
sx_xlock(TQIDHASHLOCK(tqid)); sx_xlock(TQIDHASHLOCK(tqid));
CK_LIST_REMOVE(ent, tqent_hash); if (ent->tqent_id != 0) {
LIST_REMOVE(ent, tqent_hash);
ent->tqent_id = 0;
}
sx_xunlock(TQIDHASHLOCK(tqid)); sx_xunlock(TQIDHASHLOCK(tqid));
ent->tqent_registered = B_FALSE;
} }
static void static void
@ -285,21 +282,22 @@ taskq_cancel_id(taskq_t *tq, taskqid_t tid)
int rc; int rc;
taskq_ent_t *ent; taskq_ent_t *ent;
if (tid == 0)
return (0);
if ((ent = taskq_lookup(tid)) == NULL) if ((ent = taskq_lookup(tid)) == NULL)
return (0); return (0);
ent->tqent_cancelled = B_TRUE; if (ent->tqent_type == NORMAL_TASK) {
if (ent->tqent_type == TIMEOUT_TASK) { rc = taskqueue_cancel(tq->tq_queue, &ent->tqent_task, &pend);
if (rc == EBUSY)
taskqueue_drain(tq->tq_queue, &ent->tqent_task);
} else {
rc = taskqueue_cancel_timeout(tq->tq_queue, rc = taskqueue_cancel_timeout(tq->tq_queue,
&ent->tqent_timeout_task, &pend); &ent->tqent_timeout_task, &pend);
} else if (rc == EBUSY) {
rc = taskqueue_cancel(tq->tq_queue, &ent->tqent_task, &pend); taskqueue_drain_timeout(tq->tq_queue,
if (rc == EBUSY) { &ent->tqent_timeout_task);
taskqueue_drain(tq->tq_queue, &ent->tqent_task); }
} else if (pend) { }
if (pend) {
/* /*
* Tasks normally free themselves when run, but here the task * Tasks normally free themselves when run, but here the task
* was cancelled so it did not free itself. * was cancelled so it did not free itself.
@ -312,12 +310,13 @@ taskq_cancel_id(taskq_t *tq, taskqid_t tid)
} }
static void static void
taskq_run(void *arg, int pending __unused) taskq_run(void *arg, int pending)
{ {
taskq_ent_t *task = arg; taskq_ent_t *task = arg;
if (!task->tqent_cancelled) if (pending == 0)
task->tqent_func(task->tqent_arg); return;
task->tqent_func(task->tqent_arg);
taskq_free(task); taskq_free(task);
} }
@ -345,7 +344,6 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
task->tqent_func = func; task->tqent_func = func;
task->tqent_arg = arg; task->tqent_arg = arg;
task->tqent_type = TIMEOUT_TASK; task->tqent_type = TIMEOUT_TASK;
task->tqent_cancelled = B_FALSE;
refcount_init(&task->tqent_rc, 1); refcount_init(&task->tqent_rc, 1);
tqid = taskq_insert(task); tqid = taskq_insert(task);
TIMEOUT_TASK_INIT(tq->tq_queue, &task->tqent_timeout_task, 0, TIMEOUT_TASK_INIT(tq->tq_queue, &task->tqent_timeout_task, 0,
@ -379,7 +377,6 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
refcount_init(&task->tqent_rc, 1); refcount_init(&task->tqent_rc, 1);
task->tqent_func = func; task->tqent_func = func;
task->tqent_arg = arg; task->tqent_arg = arg;
task->tqent_cancelled = B_FALSE;
task->tqent_type = NORMAL_TASK; task->tqent_type = NORMAL_TASK;
tqid = taskq_insert(task); tqid = taskq_insert(task);
TASK_INIT(&task->tqent_task, prio, taskq_run, task); TASK_INIT(&task->tqent_task, prio, taskq_run, task);
@ -388,10 +385,12 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
} }
static void static void
taskq_run_ent(void *arg, int pending __unused) taskq_run_ent(void *arg, int pending)
{ {
taskq_ent_t *task = arg; taskq_ent_t *task = arg;
if (pending == 0)
return;
task->tqent_func(task->tqent_arg); task->tqent_func(task->tqent_arg);
} }
@ -406,8 +405,6 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint32_t flags,
* can go at the front of the queue. * can go at the front of the queue.
*/ */
prio = !!(flags & TQ_FRONT); prio = !!(flags & TQ_FRONT);
task->tqent_cancelled = B_FALSE;
task->tqent_registered = B_FALSE;
task->tqent_id = 0; task->tqent_id = 0;
task->tqent_func = func; task->tqent_func = func;
task->tqent_arg = arg; task->tqent_arg = arg;
@ -427,12 +424,13 @@ taskq_wait_id(taskq_t *tq, taskqid_t tid)
{ {
taskq_ent_t *ent; taskq_ent_t *ent;
if (tid == 0)
return;
if ((ent = taskq_lookup(tid)) == NULL) if ((ent = taskq_lookup(tid)) == NULL)
return; return;
taskqueue_drain(tq->tq_queue, &ent->tqent_task); if (ent->tqent_type == NORMAL_TASK)
taskqueue_drain(tq->tq_queue, &ent->tqent_task);
else
taskqueue_drain_timeout(tq->tq_queue, &ent->tqent_timeout_task);
taskq_free(ent); taskq_free(ent);
} }

View File

@ -52,11 +52,6 @@
#include <sys/vm.h> #include <sys/vm.h>
#include <sys/vmmeter.h> #include <sys/vmmeter.h>
#if __FreeBSD_version >= 1300139
static struct sx arc_vnlru_lock;
static struct vnode *arc_vnlru_marker;
#endif
extern struct vfsops zfs_vfsops; extern struct vfsops zfs_vfsops;
uint_t zfs_arc_free_target = 0; uint_t zfs_arc_free_target = 0;
@ -131,53 +126,6 @@ arc_default_max(uint64_t min, uint64_t allmem)
return (MAX(allmem * 5 / 8, size)); return (MAX(allmem * 5 / 8, size));
} }
/*
* Helper function for arc_prune_async() it is responsible for safely
* handling the execution of a registered arc_prune_func_t.
*/
static void
arc_prune_task(void *arg)
{
uint64_t nr_scan = (uintptr_t)arg;
#ifndef __ILP32__
if (nr_scan > INT_MAX)
nr_scan = INT_MAX;
#endif
#if __FreeBSD_version >= 1300139
sx_xlock(&arc_vnlru_lock);
vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);
sx_xunlock(&arc_vnlru_lock);
#else
vnlru_free(nr_scan, &zfs_vfsops);
#endif
}
/*
* Notify registered consumers they must drop holds on a portion of the ARC
* buffered they reference. This provides a mechanism to ensure the ARC can
* honor the metadata limit and reclaim otherwise pinned ARC buffers. This
* is analogous to dnlc_reduce_cache() but more generic.
*
* This operation is performed asynchronously so it may be safely called
* in the context of the arc_reclaim_thread(). A reference is taken here
* for each registered arc_prune_t and the arc_prune_task() is responsible
* for releasing it once the registered arc_prune_func_t has completed.
*/
void
arc_prune_async(uint64_t adjust)
{
#ifndef __LP64__
if (adjust > UINTPTR_MAX)
adjust = UINTPTR_MAX;
#endif
taskq_dispatch(arc_prune_taskq, arc_prune_task,
(void *)(intptr_t)adjust, TQ_SLEEP);
ARCSTAT_BUMP(arcstat_prune);
}
uint64_t uint64_t
arc_all_memory(void) arc_all_memory(void)
{ {
@ -228,10 +176,6 @@ arc_lowmem_init(void)
{ {
arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL, arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
EVENTHANDLER_PRI_FIRST); EVENTHANDLER_PRI_FIRST);
#if __FreeBSD_version >= 1300139
arc_vnlru_marker = vnlru_alloc_marker();
sx_init(&arc_vnlru_lock, "arc vnlru lock");
#endif
} }
void void
@ -239,12 +183,6 @@ arc_lowmem_fini(void)
{ {
if (arc_event_lowmem != NULL) if (arc_event_lowmem != NULL)
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem); EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
#if __FreeBSD_version >= 1300139
if (arc_vnlru_marker != NULL) {
vnlru_free_marker(arc_vnlru_marker);
sx_destroy(&arc_vnlru_lock);
}
#endif
} }
void void

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD * SPDX-License-Identifier: BSD-2-Clause
* *
* Copyright (c) 2022 Rob Wing * Copyright (c) 2022 Rob Wing
* *

View File

@ -141,7 +141,7 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
if (len != sizeof (zfs_iocparm_t)) if (len != sizeof (zfs_iocparm_t))
return (EINVAL); return (EINVAL);
uaddr = (void *)zp->zfs_cmd; uaddr = (void *)(uintptr_t)zp->zfs_cmd;
zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
#ifdef ZFS_LEGACY_SUPPORT #ifdef ZFS_LEGACY_SUPPORT
/* /*

View File

@ -596,28 +596,6 @@ SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct,
" space map to continue allocations in a first-fit fashion"); " space map to continue allocations in a first-fit fashion");
/* END CSTYLED */ /* END CSTYLED */
/*
* Percentage of all cpus that can be used by the metaslab taskq.
*/
extern int metaslab_load_pct;
/* BEGIN CSTYLED */
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct,
CTLFLAG_RWTUN, &metaslab_load_pct, 0,
"Percentage of cpus that can be used by the metaslab taskq");
/* END CSTYLED */
/*
* Max number of metaslabs per group to preload.
*/
extern uint_t metaslab_preload_limit;
/* BEGIN CSTYLED */
SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, preload_limit,
CTLFLAG_RWTUN, &metaslab_preload_limit, 0,
"Max number of metaslabs per group to preload");
/* END CSTYLED */
/* mmp.c */ /* mmp.c */
int int

View File

@ -89,6 +89,10 @@ int zfs_debug_level;
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
"Debug level"); "Debug level");
int zfs_bclone_enabled = 0;
SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
&zfs_bclone_enabled, 0, "Enable block cloning");
struct zfs_jailparam { struct zfs_jailparam {
int mount_snapshot; int mount_snapshot;
}; };
@ -2070,6 +2074,26 @@ zfs_vnodes_adjust_back(void)
#endif #endif
} }
#if __FreeBSD_version >= 1300139
static struct sx zfs_vnlru_lock;
static struct vnode *zfs_vnlru_marker;
#endif
static arc_prune_t *zfs_prune;
static void
zfs_prune_task(uint64_t nr_to_scan, void *arg __unused)
{
if (nr_to_scan > INT_MAX)
nr_to_scan = INT_MAX;
#if __FreeBSD_version >= 1300139
sx_xlock(&zfs_vnlru_lock);
vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker);
sx_xunlock(&zfs_vnlru_lock);
#else
vnlru_free(nr_to_scan, &zfs_vfsops);
#endif
}
void void
zfs_init(void) zfs_init(void)
{ {
@ -2096,11 +2120,23 @@ zfs_init(void)
dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
#if __FreeBSD_version >= 1300139
zfs_vnlru_marker = vnlru_alloc_marker();
sx_init(&zfs_vnlru_lock, "zfs vnlru lock");
#endif
zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL);
} }
void void
zfs_fini(void) zfs_fini(void)
{ {
arc_remove_prune_callback(zfs_prune);
#if __FreeBSD_version >= 1300139
vnlru_free_marker(zfs_vnlru_marker);
sx_destroy(&zfs_vnlru_lock);
#endif
taskq_destroy(zfsvfs_taskq); taskq_destroy(zfsvfs_taskq);
zfsctl_fini(); zfsctl_fini();
zfs_znode_fini(); zfs_znode_fini();

View File

@ -6243,6 +6243,11 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
int error; int error;
uint64_t len = *ap->a_lenp; uint64_t len = *ap->a_lenp;
if (!zfs_bclone_enabled) {
mp = NULL;
goto bad_write_fallback;
}
/* /*
* TODO: If offset/length is not aligned to recordsize, use * TODO: If offset/length is not aligned to recordsize, use
* vn_generic_copy_file_range() on this fragment. * vn_generic_copy_file_range() on this fragment.

View File

@ -489,56 +489,5 @@ arc_unregister_hotplug(void)
} }
#endif /* _KERNEL */ #endif /* _KERNEL */
/*
* Helper function for arc_prune_async() it is responsible for safely
* handling the execution of a registered arc_prune_func_t.
*/
static void
arc_prune_task(void *ptr)
{
arc_prune_t *ap = (arc_prune_t *)ptr;
arc_prune_func_t *func = ap->p_pfunc;
if (func != NULL)
func(ap->p_adjust, ap->p_private);
zfs_refcount_remove(&ap->p_refcnt, func);
}
/*
* Notify registered consumers they must drop holds on a portion of the ARC
* buffered they reference. This provides a mechanism to ensure the ARC can
* honor the metadata limit and reclaim otherwise pinned ARC buffers. This
* is analogous to dnlc_reduce_cache() but more generic.
*
* This operation is performed asynchronously so it may be safely called
* in the context of the arc_reclaim_thread(). A reference is taken here
* for each registered arc_prune_t and the arc_prune_task() is responsible
* for releasing it once the registered arc_prune_func_t has completed.
*/
void
arc_prune_async(uint64_t adjust)
{
arc_prune_t *ap;
mutex_enter(&arc_prune_mtx);
for (ap = list_head(&arc_prune_list); ap != NULL;
ap = list_next(&arc_prune_list, ap)) {
if (zfs_refcount_count(&ap->p_refcnt) >= 2)
continue;
zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
ap->p_adjust = adjust;
if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
ap, TQ_SLEEP) == TASKQID_INVALID) {
zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
continue;
}
ARCSTAT_BUMP(arcstat_prune);
}
mutex_exit(&arc_prune_mtx);
}
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW,
"Limit on number of pages that ARC shrinker can reclaim at once"); "Limit on number of pages that ARC shrinker can reclaim at once");

View File

@ -522,7 +522,7 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
ip->i_blkbits = SPA_MINBLOCKSHIFT; ip->i_blkbits = SPA_MINBLOCKSHIFT;
ip->i_atime = now; ip->i_atime = now;
ip->i_mtime = now; ip->i_mtime = now;
ip->i_ctime = now; zpl_inode_set_ctime_to_ts(ip, now);
ip->i_fop = fops; ip->i_fop = fops;
ip->i_op = ops; ip->i_op = ops;
#if defined(IOP_XATTR) #if defined(IOP_XATTR)

View File

@ -204,22 +204,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
this_seg_start = orig_loffset; this_seg_start = orig_loffset;
rq_for_each_segment(bv, rq, iter) { rq_for_each_segment(bv, rq, iter) {
if (uio->iter.bio) {
/*
* If uio->iter.bio is present, then we know we've saved
* uio->iter from a previous call to this function, and
* we can skip ahead in this rq_for_each_segment() loop
* to where we last left off. That way, we don't need
* to iterate over tons of segments we've already
* processed - we can just restore the "saved state".
*/
iter = uio->iter;
bv = uio->bv;
this_seg_start = uio->uio_loffset;
memset(&uio->iter, 0, sizeof (uio->iter));
continue;
}
/* /*
* Lookup what the logical offset of the last byte of this * Lookup what the logical offset of the last byte of this
* segment is. * segment is.
@ -260,19 +244,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
copied = 1; /* We copied some data */ copied = 1; /* We copied some data */
} }
if (n == 0) {
/*
* All done copying. Save our 'iter' value to the uio.
* This allows us to "save our state" and skip ahead in
* the rq_for_each_segment() loop the next time we call
* call zfs_uiomove_bvec_rq() on this uio (which we
* will be doing for any remaining data in the uio).
*/
uio->iter = iter; /* make a copy of the struct data */
uio->bv = bv;
return (0);
}
this_seg_start = this_seg_end + 1; this_seg_start = this_seg_end + 1;
} }

View File

@ -1506,7 +1506,7 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
* read-only flag, pretend it was set, as done for snapshots. * read-only flag, pretend it was set, as done for snapshots.
*/ */
if (!canwrite) if (!canwrite)
vfs->vfs_readonly = true; vfs->vfs_readonly = B_TRUE;
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs); error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
if (error) { if (error) {

View File

@ -1684,7 +1684,12 @@ out:
* RETURN: 0 (always succeeds) * RETURN: 0 (always succeeds)
*/ */
int int
#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
zfs_getattr_fast(zidmap_t *user_ns, u32 request_mask, struct inode *ip,
struct kstat *sp)
#else
zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp) zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
#endif
{ {
znode_t *zp = ITOZ(ip); znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip); zfsvfs_t *zfsvfs = ITOZSB(ip);
@ -1697,7 +1702,11 @@ zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
mutex_enter(&zp->z_lock); mutex_enter(&zp->z_lock);
#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
zpl_generic_fillattr(user_ns, request_mask, ip, sp);
#else
zpl_generic_fillattr(user_ns, ip, sp); zpl_generic_fillattr(user_ns, ip, sp);
#endif
/* /*
* +1 link count for root inode with visible '.zfs' directory. * +1 link count for root inode with visible '.zfs' directory.
*/ */
@ -2471,8 +2480,8 @@ top:
if (mask & (ATTR_CTIME | ATTR_SIZE)) { if (mask & (ATTR_CTIME | ATTR_SIZE)) {
ZFS_TIME_ENCODE(&vap->va_ctime, ctime); ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
ZTOI(zp)->i_ctime = zpl_inode_timestamp_truncate(vap->va_ctime, zpl_inode_set_ctime_to_ts(ZTOI(zp),
ZTOI(zp)); zpl_inode_timestamp_truncate(vap->va_ctime, ZTOI(zp)));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
ctime, sizeof (ctime)); ctime, sizeof (ctime));
} }
@ -3677,6 +3686,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
caddr_t va; caddr_t va;
int err = 0; int err = 0;
uint64_t mtime[2], ctime[2]; uint64_t mtime[2], ctime[2];
inode_timespec_t tmp_ctime;
sa_bulk_attr_t bulk[3]; sa_bulk_attr_t bulk[3];
int cnt = 0; int cnt = 0;
struct address_space *mapping; struct address_space *mapping;
@ -3841,7 +3851,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
/* Preserve the mtime and ctime provided by the inode */ /* Preserve the mtime and ctime provided by the inode */
ZFS_TIME_ENCODE(&ip->i_mtime, mtime); ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
ZFS_TIME_ENCODE(&ip->i_ctime, ctime); tmp_ctime = zpl_inode_get_ctime(ip);
ZFS_TIME_ENCODE(&tmp_ctime, ctime);
zp->z_atime_dirty = B_FALSE; zp->z_atime_dirty = B_FALSE;
zp->z_seq++; zp->z_seq++;
@ -3891,6 +3902,7 @@ zfs_dirty_inode(struct inode *ip, int flags)
zfsvfs_t *zfsvfs = ITOZSB(ip); zfsvfs_t *zfsvfs = ITOZSB(ip);
dmu_tx_t *tx; dmu_tx_t *tx;
uint64_t mode, atime[2], mtime[2], ctime[2]; uint64_t mode, atime[2], mtime[2], ctime[2];
inode_timespec_t tmp_ctime;
sa_bulk_attr_t bulk[4]; sa_bulk_attr_t bulk[4];
int error = 0; int error = 0;
int cnt = 0; int cnt = 0;
@ -3937,7 +3949,8 @@ zfs_dirty_inode(struct inode *ip, int flags)
/* Preserve the mode, mtime and ctime provided by the inode */ /* Preserve the mode, mtime and ctime provided by the inode */
ZFS_TIME_ENCODE(&ip->i_atime, atime); ZFS_TIME_ENCODE(&ip->i_atime, atime);
ZFS_TIME_ENCODE(&ip->i_mtime, mtime); ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
ZFS_TIME_ENCODE(&ip->i_ctime, ctime); tmp_ctime = zpl_inode_get_ctime(ip);
ZFS_TIME_ENCODE(&tmp_ctime, ctime);
mode = ip->i_mode; mode = ip->i_mode;
zp->z_mode = mode; zp->z_mode = mode;
@ -4087,8 +4100,8 @@ zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
return (error); return (error);
if ((vm_flags & VM_WRITE) && (zp->z_pflags & if ((vm_flags & VM_WRITE) && (vm_flags & VM_SHARED) &&
(ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
zfs_exit(zfsvfs, FTAG); zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EPERM)); return (SET_ERROR(EPERM));
} }
@ -4258,4 +4271,8 @@ EXPORT_SYMBOL(zfs_map);
module_param(zfs_delete_blocks, ulong, 0644); module_param(zfs_delete_blocks, ulong, 0644);
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
/* CSTYLED */
module_param(zfs_bclone_enabled, uint, 0644);
MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
#endif #endif

View File

@ -542,6 +542,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
uint64_t links; uint64_t links;
uint64_t z_uid, z_gid; uint64_t z_uid, z_gid;
uint64_t atime[2], mtime[2], ctime[2], btime[2]; uint64_t atime[2], mtime[2], ctime[2], btime[2];
inode_timespec_t tmp_ctime;
uint64_t projid = ZFS_DEFAULT_PROJID; uint64_t projid = ZFS_DEFAULT_PROJID;
sa_bulk_attr_t bulk[12]; sa_bulk_attr_t bulk[12];
int count = 0; int count = 0;
@ -615,7 +616,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
ZFS_TIME_DECODE(&ip->i_atime, atime); ZFS_TIME_DECODE(&ip->i_atime, atime);
ZFS_TIME_DECODE(&ip->i_mtime, mtime); ZFS_TIME_DECODE(&ip->i_mtime, mtime);
ZFS_TIME_DECODE(&ip->i_ctime, ctime); ZFS_TIME_DECODE(&tmp_ctime, ctime);
zpl_inode_set_ctime_to_ts(ip, tmp_ctime);
ZFS_TIME_DECODE(&zp->z_btime, btime); ZFS_TIME_DECODE(&zp->z_btime, btime);
ip->i_ino = zp->z_id; ip->i_ino = zp->z_id;
@ -1195,6 +1197,7 @@ zfs_rezget(znode_t *zp)
uint64_t gen; uint64_t gen;
uint64_t z_uid, z_gid; uint64_t z_uid, z_gid;
uint64_t atime[2], mtime[2], ctime[2], btime[2]; uint64_t atime[2], mtime[2], ctime[2], btime[2];
inode_timespec_t tmp_ctime;
uint64_t projid = ZFS_DEFAULT_PROJID; uint64_t projid = ZFS_DEFAULT_PROJID;
znode_hold_t *zh; znode_hold_t *zh;
@ -1289,7 +1292,8 @@ zfs_rezget(znode_t *zp)
ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime); ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime); ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime); ZFS_TIME_DECODE(&tmp_ctime, ctime);
zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime);
ZFS_TIME_DECODE(&zp->z_btime, btime); ZFS_TIME_DECODE(&zp->z_btime, btime);
if ((uint32_t)gen != ZTOI(zp)->i_generation) { if ((uint32_t)gen != ZTOI(zp)->i_generation) {
@ -1397,7 +1401,7 @@ zfs_zinactive(znode_t *zp)
boolean_t boolean_t
zfs_relatime_need_update(const struct inode *ip) zfs_relatime_need_update(const struct inode *ip)
{ {
inode_timespec_t now; inode_timespec_t now, tmp_ctime;
gethrestime(&now); gethrestime(&now);
/* /*
@ -1408,7 +1412,8 @@ zfs_relatime_need_update(const struct inode *ip)
if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0) if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
return (B_TRUE); return (B_TRUE);
if (zfs_compare_timespec(&ip->i_ctime, &ip->i_atime) >= 0) tmp_ctime = zpl_inode_get_ctime(ip);
if (zfs_compare_timespec(&tmp_ctime, &ip->i_atime) >= 0)
return (B_TRUE); return (B_TRUE);
if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60) if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
@ -1434,7 +1439,7 @@ void
zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
uint64_t ctime[2]) uint64_t ctime[2])
{ {
inode_timespec_t now; inode_timespec_t now, tmp_ctime;
gethrestime(&now); gethrestime(&now);
@ -1451,7 +1456,8 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
if (flag & ATTR_CTIME) { if (flag & ATTR_CTIME) {
ZFS_TIME_ENCODE(&now, ctime); ZFS_TIME_ENCODE(&now, ctime);
ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime); ZFS_TIME_DECODE(&tmp_ctime, ctime);
zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime);
if (ZTOZSB(zp)->z_use_fuids) if (ZTOZSB(zp)->z_use_fuids)
zp->z_pflags |= ZFS_ARCHIVE; zp->z_pflags |= ZFS_ARCHIVE;
} }

View File

@ -124,6 +124,8 @@ zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
generic_fillattr(user_ns, ip, stat); generic_fillattr(user_ns, ip, stat);
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP) #elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
generic_fillattr(user_ns, ip, stat); generic_fillattr(user_ns, ip, stat);
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
generic_fillattr(user_ns, request_mask, ip, stat);
#else #else
(void) user_ns; (void) user_ns;
#endif #endif
@ -435,6 +437,8 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
generic_fillattr(user_ns, ip, stat); generic_fillattr(user_ns, ip, stat);
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP) #elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
generic_fillattr(user_ns, ip, stat); generic_fillattr(user_ns, ip, stat);
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
generic_fillattr(user_ns, request_mask, ip, stat);
#else #else
(void) user_ns; (void) user_ns;
#endif #endif
@ -609,6 +613,8 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
generic_fillattr(user_ns, path->dentry->d_inode, stat); generic_fillattr(user_ns, path->dentry->d_inode, stat);
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP) #elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
generic_fillattr(user_ns, path->dentry->d_inode, stat); generic_fillattr(user_ns, path->dentry->d_inode, stat);
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
generic_fillattr(user_ns, request_mask, ip, stat);
#else #else
(void) user_ns; (void) user_ns;
#endif #endif
@ -623,7 +629,10 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp); error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
if (error == 0) { if (error == 0) {
#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR)) #ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
error = -zfs_getattr_fast(user_ns, request_mask, ZTOI(dzp),
stat);
#elif (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat); error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat);
#else #else
error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat); error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat);

View File

@ -31,6 +31,8 @@
#include <sys/zfs_vnops.h> #include <sys/zfs_vnops.h>
#include <sys/zfeature.h> #include <sys/zfeature.h>
int zfs_bclone_enabled = 0;
/* /*
* Clone part of a file via block cloning. * Clone part of a file via block cloning.
* *
@ -50,6 +52,9 @@ __zpl_clone_file_range(struct file *src_file, loff_t src_off,
fstrans_cookie_t cookie; fstrans_cookie_t cookie;
int err; int err;
if (!zfs_bclone_enabled)
return (-EOPNOTSUPP);
if (!spa_feature_is_enabled( if (!spa_feature_is_enabled(
dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING)) dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
return (-EOPNOTSUPP); return (-EOPNOTSUPP);
@ -202,8 +207,10 @@ zpl_ioctl_ficlone(struct file *dst_file, void *arg)
if (src_file == NULL) if (src_file == NULL)
return (-EBADF); return (-EBADF);
if (dst_file->f_op != src_file->f_op) if (dst_file->f_op != src_file->f_op) {
fput(src_file);
return (-EXDEV); return (-EXDEV);
}
size_t len = i_size_read(file_inode(src_file)); size_t len = i_size_read(file_inode(src_file));
@ -237,8 +244,10 @@ zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
if (src_file == NULL) if (src_file == NULL)
return (-EBADF); return (-EBADF);
if (dst_file->f_op != src_file->f_op) if (dst_file->f_op != src_file->f_op) {
fput(src_file);
return (-EXDEV); return (-EXDEV);
}
size_t len = fcr.fcr_src_length; size_t len = fcr.fcr_src_length;
if (len == 0) if (len == 0)

View File

@ -435,7 +435,9 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
* XXX query_flags currently ignored. * XXX query_flags currently ignored.
*/ */
#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR)) #ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
error = -zfs_getattr_fast(user_ns, request_mask, ip, stat);
#elif (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
error = -zfs_getattr_fast(user_ns, ip, stat); error = -zfs_getattr_fast(user_ns, ip, stat);
#else #else
error = -zfs_getattr_fast(kcred->user_ns, ip, stat); error = -zfs_getattr_fast(kcred->user_ns, ip, stat);
@ -774,7 +776,7 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
return (-EMLINK); return (-EMLINK);
crhold(cr); crhold(cr);
ip->i_ctime = current_time(ip); zpl_inode_set_ctime_to_ts(ip, current_time(ip));
/* Must have an existing ref, so igrab() cannot return NULL */ /* Must have an existing ref, so igrab() cannot return NULL */
VERIFY3P(igrab(ip), !=, NULL); VERIFY3P(igrab(ip), !=, NULL);

View File

@ -378,7 +378,7 @@ zpl_kill_sb(struct super_block *sb)
} }
void void
zpl_prune_sb(int64_t nr_to_scan, void *arg) zpl_prune_sb(uint64_t nr_to_scan, void *arg)
{ {
struct super_block *sb = (struct super_block *)arg; struct super_block *sb = (struct super_block *)arg;
int objects = 0; int objects = 0;

View File

@ -544,7 +544,7 @@ zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
error = -zfs_write_simple(xzp, value, size, pos, NULL); error = -zfs_write_simple(xzp, value, size, pos, NULL);
out: out:
if (error == 0) { if (error == 0) {
ip->i_ctime = current_time(ip); zpl_inode_set_ctime_to_ts(ip, current_time(ip));
zfs_mark_inode_dirty(ip); zfs_mark_inode_dirty(ip);
} }
@ -1042,7 +1042,8 @@ zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type)
*/ */
if (ip->i_mode != mode) { if (ip->i_mode != mode) {
ip->i_mode = ITOZ(ip)->z_mode = mode; ip->i_mode = ITOZ(ip)->z_mode = mode;
ip->i_ctime = current_time(ip); zpl_inode_set_ctime_to_ts(ip,
current_time(ip));
zfs_mark_inode_dirty(ip); zfs_mark_inode_dirty(ip);
} }
@ -1201,7 +1202,7 @@ zpl_init_acl(struct inode *ip, struct inode *dir)
return (PTR_ERR(acl)); return (PTR_ERR(acl));
if (!acl) { if (!acl) {
ITOZ(ip)->z_mode = (ip->i_mode &= ~current_umask()); ITOZ(ip)->z_mode = (ip->i_mode &= ~current_umask());
ip->i_ctime = current_time(ip); zpl_inode_set_ctime_to_ts(ip, current_time(ip));
zfs_mark_inode_dirty(ip); zfs_mark_inode_dirty(ip);
return (0); return (0);
} }

View File

@ -873,7 +873,13 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode,
switch (cmd) { switch (cmd) {
case BLKFLSBUF: case BLKFLSBUF:
#ifdef HAVE_FSYNC_BDEV
fsync_bdev(bdev); fsync_bdev(bdev);
#elif defined(HAVE_SYNC_BLOCKDEV)
sync_blockdev(bdev);
#else
#error "Neither fsync_bdev() nor sync_blockdev() found"
#endif
invalidate_bdev(bdev); invalidate_bdev(bdev);
rw_enter(&zv->zv_suspend_lock, RW_READER); rw_enter(&zv->zv_suspend_lock, RW_READER);

View File

@ -748,8 +748,7 @@ taskq_t *arc_prune_taskq;
* Other sizes * Other sizes
*/ */
#define HDR_FULL_CRYPT_SIZE ((int64_t)sizeof (arc_buf_hdr_t)) #define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
#define HDR_FULL_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_crypt_hdr))
#define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr)) #define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
/* /*
@ -887,6 +886,8 @@ static void l2arc_do_free_on_write(void);
static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr, static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
boolean_t state_only); boolean_t state_only);
static void arc_prune_async(uint64_t adjust);
#define l2arc_hdr_arcstats_increment(hdr) \ #define l2arc_hdr_arcstats_increment(hdr) \
l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE) l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE)
#define l2arc_hdr_arcstats_decrement(hdr) \ #define l2arc_hdr_arcstats_decrement(hdr) \
@ -1113,7 +1114,6 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
*/ */
static kmem_cache_t *hdr_full_cache; static kmem_cache_t *hdr_full_cache;
static kmem_cache_t *hdr_full_crypt_cache;
static kmem_cache_t *hdr_l2only_cache; static kmem_cache_t *hdr_l2only_cache;
static kmem_cache_t *buf_cache; static kmem_cache_t *buf_cache;
@ -1134,7 +1134,6 @@ buf_fini(void)
for (int i = 0; i < BUF_LOCKS; i++) for (int i = 0; i < BUF_LOCKS; i++)
mutex_destroy(BUF_HASH_LOCK(i)); mutex_destroy(BUF_HASH_LOCK(i));
kmem_cache_destroy(hdr_full_cache); kmem_cache_destroy(hdr_full_cache);
kmem_cache_destroy(hdr_full_crypt_cache);
kmem_cache_destroy(hdr_l2only_cache); kmem_cache_destroy(hdr_l2only_cache);
kmem_cache_destroy(buf_cache); kmem_cache_destroy(buf_cache);
} }
@ -1151,7 +1150,6 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
memset(hdr, 0, HDR_FULL_SIZE); memset(hdr, 0, HDR_FULL_SIZE);
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL);
zfs_refcount_create(&hdr->b_l1hdr.b_refcnt); zfs_refcount_create(&hdr->b_l1hdr.b_refcnt);
#ifdef ZFS_DEBUG #ifdef ZFS_DEBUG
mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
@ -1163,19 +1161,6 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
return (0); return (0);
} }
static int
hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag)
{
(void) unused;
arc_buf_hdr_t *hdr = vbuf;
hdr_full_cons(vbuf, unused, kmflag);
memset(&hdr->b_crypt_hdr, 0, sizeof (hdr->b_crypt_hdr));
arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
return (0);
}
static int static int
hdr_l2only_cons(void *vbuf, void *unused, int kmflag) hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
{ {
@ -1211,7 +1196,6 @@ hdr_full_dest(void *vbuf, void *unused)
arc_buf_hdr_t *hdr = vbuf; arc_buf_hdr_t *hdr = vbuf;
ASSERT(HDR_EMPTY(hdr)); ASSERT(HDR_EMPTY(hdr));
cv_destroy(&hdr->b_l1hdr.b_cv);
zfs_refcount_destroy(&hdr->b_l1hdr.b_refcnt); zfs_refcount_destroy(&hdr->b_l1hdr.b_refcnt);
#ifdef ZFS_DEBUG #ifdef ZFS_DEBUG
mutex_destroy(&hdr->b_l1hdr.b_freeze_lock); mutex_destroy(&hdr->b_l1hdr.b_freeze_lock);
@ -1220,16 +1204,6 @@ hdr_full_dest(void *vbuf, void *unused)
arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS); arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS);
} }
static void
hdr_full_crypt_dest(void *vbuf, void *unused)
{
(void) vbuf, (void) unused;
hdr_full_dest(vbuf, unused);
arc_space_return(sizeof (((arc_buf_hdr_t *)NULL)->b_crypt_hdr),
ARC_SPACE_HDRS);
}
static void static void
hdr_l2only_dest(void *vbuf, void *unused) hdr_l2only_dest(void *vbuf, void *unused)
{ {
@ -1285,9 +1259,6 @@ retry:
hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE, hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0); 0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0);
hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt",
HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest,
NULL, NULL, NULL, 0);
hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only", hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL, HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL,
NULL, NULL, 0); NULL, NULL, 0);
@ -1395,7 +1366,7 @@ arc_buf_is_shared(arc_buf_t *buf)
abd_is_linear(buf->b_hdr->b_l1hdr.b_pabd) && abd_is_linear(buf->b_hdr->b_l1hdr.b_pabd) &&
buf->b_data == abd_to_buf(buf->b_hdr->b_l1hdr.b_pabd)); buf->b_data == abd_to_buf(buf->b_hdr->b_l1hdr.b_pabd));
IMPLY(shared, HDR_SHARED_DATA(buf->b_hdr)); IMPLY(shared, HDR_SHARED_DATA(buf->b_hdr));
IMPLY(shared, ARC_BUF_SHARED(buf)); EQUIV(shared, ARC_BUF_SHARED(buf));
IMPLY(shared, ARC_BUF_COMPRESSED(buf) || ARC_BUF_LAST(buf)); IMPLY(shared, ARC_BUF_COMPRESSED(buf) || ARC_BUF_LAST(buf));
/* /*
@ -2006,7 +1977,6 @@ arc_buf_untransform_in_place(arc_buf_t *buf)
arc_buf_size(buf)); arc_buf_size(buf));
buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED; buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED;
buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED; buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED;
hdr->b_crypt_hdr.b_ebufcnt -= 1;
} }
/* /*
@ -2041,7 +2011,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
IMPLY(encrypted, HDR_ENCRYPTED(hdr)); IMPLY(encrypted, HDR_ENCRYPTED(hdr));
IMPLY(encrypted, ARC_BUF_ENCRYPTED(buf)); IMPLY(encrypted, ARC_BUF_ENCRYPTED(buf));
IMPLY(encrypted, ARC_BUF_COMPRESSED(buf)); IMPLY(encrypted, ARC_BUF_COMPRESSED(buf));
IMPLY(encrypted, !ARC_BUF_SHARED(buf)); IMPLY(encrypted, !arc_buf_is_shared(buf));
/* /*
* If the caller wanted encrypted data we just need to copy it from * If the caller wanted encrypted data we just need to copy it from
@ -2109,7 +2079,9 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
} }
if (hdr_compressed == compressed) { if (hdr_compressed == compressed) {
if (!arc_buf_is_shared(buf)) { if (ARC_BUF_SHARED(buf)) {
ASSERT(arc_buf_is_shared(buf));
} else {
abd_copy_to_buf(buf->b_data, hdr->b_l1hdr.b_pabd, abd_copy_to_buf(buf->b_data, hdr->b_l1hdr.b_pabd,
arc_buf_size(buf)); arc_buf_size(buf));
} }
@ -2121,7 +2093,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
* If the buf is sharing its data with the hdr, unlink it and * If the buf is sharing its data with the hdr, unlink it and
* allocate a new data buffer for the buf. * allocate a new data buffer for the buf.
*/ */
if (arc_buf_is_shared(buf)) { if (ARC_BUF_SHARED(buf)) {
ASSERT(ARC_BUF_COMPRESSED(buf)); ASSERT(ARC_BUF_COMPRESSED(buf));
/* We need to give the buf its own b_data */ /* We need to give the buf its own b_data */
@ -2133,6 +2105,8 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
/* Previously overhead was 0; just add new overhead */ /* Previously overhead was 0; just add new overhead */
ARCSTAT_INCR(arcstat_overhead_size, HDR_GET_LSIZE(hdr)); ARCSTAT_INCR(arcstat_overhead_size, HDR_GET_LSIZE(hdr));
} else if (ARC_BUF_COMPRESSED(buf)) { } else if (ARC_BUF_COMPRESSED(buf)) {
ASSERT(!arc_buf_is_shared(buf));
/* We need to reallocate the buf's b_data */ /* We need to reallocate the buf's b_data */
arc_free_data_buf(hdr, buf->b_data, HDR_GET_PSIZE(hdr), arc_free_data_buf(hdr, buf->b_data, HDR_GET_PSIZE(hdr),
buf); buf);
@ -2241,7 +2215,6 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(state)) { if (GHOST_STATE(state)) {
ASSERT0(hdr->b_l1hdr.b_bufcnt);
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
ASSERT(!HDR_HAS_RABD(hdr)); ASSERT(!HDR_HAS_RABD(hdr));
@ -2261,7 +2234,7 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
buf = buf->b_next) { buf = buf->b_next) {
if (arc_buf_is_shared(buf)) if (ARC_BUF_SHARED(buf))
continue; continue;
(void) zfs_refcount_add_many(&state->arcs_esize[type], (void) zfs_refcount_add_many(&state->arcs_esize[type],
arc_buf_size(buf), buf); arc_buf_size(buf), buf);
@ -2281,7 +2254,6 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(state)) { if (GHOST_STATE(state)) {
ASSERT0(hdr->b_l1hdr.b_bufcnt);
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
ASSERT(!HDR_HAS_RABD(hdr)); ASSERT(!HDR_HAS_RABD(hdr));
@ -2301,7 +2273,7 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
buf = buf->b_next) { buf = buf->b_next) {
if (arc_buf_is_shared(buf)) if (ARC_BUF_SHARED(buf))
continue; continue;
(void) zfs_refcount_remove_many(&state->arcs_esize[type], (void) zfs_refcount_remove_many(&state->arcs_esize[type],
arc_buf_size(buf), buf); arc_buf_size(buf), buf);
@ -2397,7 +2369,9 @@ arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index)
l2hdr = &hdr->b_l2hdr; l2hdr = &hdr->b_l2hdr;
if (l1hdr) { if (l1hdr) {
abi->abi_bufcnt = l1hdr->b_bufcnt; abi->abi_bufcnt = 0;
for (arc_buf_t *buf = l1hdr->b_buf; buf; buf = buf->b_next)
abi->abi_bufcnt++;
abi->abi_access = l1hdr->b_arc_access; abi->abi_access = l1hdr->b_arc_access;
abi->abi_mru_hits = l1hdr->b_mru_hits; abi->abi_mru_hits = l1hdr->b_mru_hits;
abi->abi_mru_ghost_hits = l1hdr->b_mru_ghost_hits; abi->abi_mru_ghost_hits = l1hdr->b_mru_ghost_hits;
@ -2425,7 +2399,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
{ {
arc_state_t *old_state; arc_state_t *old_state;
int64_t refcnt; int64_t refcnt;
uint32_t bufcnt;
boolean_t update_old, update_new; boolean_t update_old, update_new;
arc_buf_contents_t type = arc_buf_type(hdr); arc_buf_contents_t type = arc_buf_type(hdr);
@ -2439,19 +2412,16 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
if (HDR_HAS_L1HDR(hdr)) { if (HDR_HAS_L1HDR(hdr)) {
old_state = hdr->b_l1hdr.b_state; old_state = hdr->b_l1hdr.b_state;
refcnt = zfs_refcount_count(&hdr->b_l1hdr.b_refcnt); refcnt = zfs_refcount_count(&hdr->b_l1hdr.b_refcnt);
bufcnt = hdr->b_l1hdr.b_bufcnt; update_old = (hdr->b_l1hdr.b_buf != NULL ||
update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL || hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
HDR_HAS_RABD(hdr));
IMPLY(GHOST_STATE(old_state), bufcnt == 0);
IMPLY(GHOST_STATE(new_state), bufcnt == 0);
IMPLY(GHOST_STATE(old_state), hdr->b_l1hdr.b_buf == NULL); IMPLY(GHOST_STATE(old_state), hdr->b_l1hdr.b_buf == NULL);
IMPLY(GHOST_STATE(new_state), hdr->b_l1hdr.b_buf == NULL); IMPLY(GHOST_STATE(new_state), hdr->b_l1hdr.b_buf == NULL);
IMPLY(old_state == arc_anon, bufcnt <= 1); IMPLY(old_state == arc_anon, hdr->b_l1hdr.b_buf == NULL ||
ARC_BUF_LAST(hdr->b_l1hdr.b_buf));
} else { } else {
old_state = arc_l2c_only; old_state = arc_l2c_only;
refcnt = 0; refcnt = 0;
bufcnt = 0;
update_old = B_FALSE; update_old = B_FALSE;
} }
update_new = update_old; update_new = update_old;
@ -2499,14 +2469,12 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
if (update_new && new_state != arc_l2c_only) { if (update_new && new_state != arc_l2c_only) {
ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(new_state)) { if (GHOST_STATE(new_state)) {
ASSERT0(bufcnt);
/* /*
* When moving a header to a ghost state, we first * When moving a header to a ghost state, we first
* remove all arc buffers. Thus, we'll have a * remove all arc buffers. Thus, we'll have no arc
* bufcnt of zero, and no arc buffer to use for * buffer to use for the reference. As a result, we
* the reference. As a result, we use the arc * use the arc header pointer for the reference.
* header pointer for the reference.
*/ */
(void) zfs_refcount_add_many( (void) zfs_refcount_add_many(
&new_state->arcs_size[type], &new_state->arcs_size[type],
@ -2514,7 +2482,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
ASSERT(!HDR_HAS_RABD(hdr)); ASSERT(!HDR_HAS_RABD(hdr));
} else { } else {
uint32_t buffers = 0;
/* /*
* Each individual buffer holds a unique reference, * Each individual buffer holds a unique reference,
@ -2523,8 +2490,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
*/ */
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
buf = buf->b_next) { buf = buf->b_next) {
ASSERT3U(bufcnt, !=, 0);
buffers++;
/* /*
* When the arc_buf_t is sharing the data * When the arc_buf_t is sharing the data
@ -2533,14 +2498,13 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
* add to the refcount if the arc_buf_t is * add to the refcount if the arc_buf_t is
* not shared. * not shared.
*/ */
if (arc_buf_is_shared(buf)) if (ARC_BUF_SHARED(buf))
continue; continue;
(void) zfs_refcount_add_many( (void) zfs_refcount_add_many(
&new_state->arcs_size[type], &new_state->arcs_size[type],
arc_buf_size(buf), buf); arc_buf_size(buf), buf);
} }
ASSERT3U(bufcnt, ==, buffers);
if (hdr->b_l1hdr.b_pabd != NULL) { if (hdr->b_l1hdr.b_pabd != NULL) {
(void) zfs_refcount_add_many( (void) zfs_refcount_add_many(
@ -2559,7 +2523,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
if (update_old && old_state != arc_l2c_only) { if (update_old && old_state != arc_l2c_only) {
ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(old_state)) { if (GHOST_STATE(old_state)) {
ASSERT0(bufcnt);
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
ASSERT(!HDR_HAS_RABD(hdr)); ASSERT(!HDR_HAS_RABD(hdr));
@ -2575,7 +2538,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
&old_state->arcs_size[type], &old_state->arcs_size[type],
HDR_GET_LSIZE(hdr), hdr); HDR_GET_LSIZE(hdr), hdr);
} else { } else {
uint32_t buffers = 0;
/* /*
* Each individual buffer holds a unique reference, * Each individual buffer holds a unique reference,
@ -2584,8 +2546,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
*/ */
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
buf = buf->b_next) { buf = buf->b_next) {
ASSERT3U(bufcnt, !=, 0);
buffers++;
/* /*
* When the arc_buf_t is sharing the data * When the arc_buf_t is sharing the data
@ -2594,14 +2554,13 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
* add to the refcount if the arc_buf_t is * add to the refcount if the arc_buf_t is
* not shared. * not shared.
*/ */
if (arc_buf_is_shared(buf)) if (ARC_BUF_SHARED(buf))
continue; continue;
(void) zfs_refcount_remove_many( (void) zfs_refcount_remove_many(
&old_state->arcs_size[type], &old_state->arcs_size[type],
arc_buf_size(buf), buf); arc_buf_size(buf), buf);
} }
ASSERT3U(bufcnt, ==, buffers);
ASSERT(hdr->b_l1hdr.b_pabd != NULL || ASSERT(hdr->b_l1hdr.b_pabd != NULL ||
HDR_HAS_RABD(hdr)); HDR_HAS_RABD(hdr));
@ -2849,9 +2808,6 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
VERIFY3P(buf->b_data, !=, NULL); VERIFY3P(buf->b_data, !=, NULL);
hdr->b_l1hdr.b_buf = buf; hdr->b_l1hdr.b_buf = buf;
hdr->b_l1hdr.b_bufcnt += 1;
if (encrypted)
hdr->b_crypt_hdr.b_ebufcnt += 1;
/* /*
* If the user wants the data from the hdr, we need to either copy or * If the user wants the data from the hdr, we need to either copy or
@ -3093,8 +3049,6 @@ arc_buf_remove(arc_buf_hdr_t *hdr, arc_buf_t *buf)
} }
buf->b_next = NULL; buf->b_next = NULL;
ASSERT3P(lastbuf, !=, buf); ASSERT3P(lastbuf, !=, buf);
IMPLY(hdr->b_l1hdr.b_bufcnt > 0, lastbuf != NULL);
IMPLY(hdr->b_l1hdr.b_bufcnt > 0, hdr->b_l1hdr.b_buf != NULL);
IMPLY(lastbuf != NULL, ARC_BUF_LAST(lastbuf)); IMPLY(lastbuf != NULL, ARC_BUF_LAST(lastbuf));
return (lastbuf); return (lastbuf);
@ -3124,31 +3078,30 @@ arc_buf_destroy_impl(arc_buf_t *buf)
arc_cksum_verify(buf); arc_cksum_verify(buf);
arc_buf_unwatch(buf); arc_buf_unwatch(buf);
if (arc_buf_is_shared(buf)) { if (ARC_BUF_SHARED(buf)) {
arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA); arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA);
} else { } else {
ASSERT(!arc_buf_is_shared(buf));
uint64_t size = arc_buf_size(buf); uint64_t size = arc_buf_size(buf);
arc_free_data_buf(hdr, buf->b_data, size, buf); arc_free_data_buf(hdr, buf->b_data, size, buf);
ARCSTAT_INCR(arcstat_overhead_size, -size); ARCSTAT_INCR(arcstat_overhead_size, -size);
} }
buf->b_data = NULL; buf->b_data = NULL;
ASSERT(hdr->b_l1hdr.b_bufcnt > 0); /*
hdr->b_l1hdr.b_bufcnt -= 1; * If we have no more encrypted buffers and we've already
* gotten a copy of the decrypted data we can free b_rabd
if (ARC_BUF_ENCRYPTED(buf)) { * to save some space.
hdr->b_crypt_hdr.b_ebufcnt -= 1; */
if (ARC_BUF_ENCRYPTED(buf) && HDR_HAS_RABD(hdr) &&
/* hdr->b_l1hdr.b_pabd != NULL && !HDR_IO_IN_PROGRESS(hdr)) {
* If we have no more encrypted buffers and we've arc_buf_t *b;
* already gotten a copy of the decrypted data we can for (b = hdr->b_l1hdr.b_buf; b; b = b->b_next) {
* free b_rabd to save some space. if (b != buf && ARC_BUF_ENCRYPTED(b))
*/ break;
if (hdr->b_crypt_hdr.b_ebufcnt == 0 &&
HDR_HAS_RABD(hdr) && hdr->b_l1hdr.b_pabd != NULL &&
!HDR_IO_IN_PROGRESS(hdr)) {
arc_hdr_free_abd(hdr, B_TRUE);
} }
if (b == NULL)
arc_hdr_free_abd(hdr, B_TRUE);
} }
} }
@ -3169,9 +3122,9 @@ arc_buf_destroy_impl(arc_buf_t *buf)
*/ */
if (lastbuf != NULL && !ARC_BUF_ENCRYPTED(lastbuf)) { if (lastbuf != NULL && !ARC_BUF_ENCRYPTED(lastbuf)) {
/* Only one buf can be shared at once */ /* Only one buf can be shared at once */
VERIFY(!arc_buf_is_shared(lastbuf)); ASSERT(!arc_buf_is_shared(lastbuf));
/* hdr is uncompressed so can't have compressed buf */ /* hdr is uncompressed so can't have compressed buf */
VERIFY(!ARC_BUF_COMPRESSED(lastbuf)); ASSERT(!ARC_BUF_COMPRESSED(lastbuf));
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
arc_hdr_free_abd(hdr, B_FALSE); arc_hdr_free_abd(hdr, B_FALSE);
@ -3309,11 +3262,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
arc_buf_hdr_t *hdr; arc_buf_hdr_t *hdr;
VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA); VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
if (protected) { hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE);
} else {
hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
}
ASSERT(HDR_EMPTY(hdr)); ASSERT(HDR_EMPTY(hdr));
#ifdef ZFS_DEBUG #ifdef ZFS_DEBUG
@ -3336,7 +3285,6 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
hdr->b_l1hdr.b_mru_ghost_hits = 0; hdr->b_l1hdr.b_mru_ghost_hits = 0;
hdr->b_l1hdr.b_mfu_hits = 0; hdr->b_l1hdr.b_mfu_hits = 0;
hdr->b_l1hdr.b_mfu_ghost_hits = 0; hdr->b_l1hdr.b_mfu_ghost_hits = 0;
hdr->b_l1hdr.b_bufcnt = 0;
hdr->b_l1hdr.b_buf = NULL; hdr->b_l1hdr.b_buf = NULL;
ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
@ -3362,16 +3310,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) || ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) ||
(old == hdr_l2only_cache && new == hdr_full_cache)); (old == hdr_l2only_cache && new == hdr_full_cache));
/*
* if the caller wanted a new full header and the header is to be
* encrypted we will actually allocate the header from the full crypt
* cache instead. The same applies to freeing from the old cache.
*/
if (HDR_PROTECTED(hdr) && new == hdr_full_cache)
new = hdr_full_crypt_cache;
if (HDR_PROTECTED(hdr) && old == hdr_full_cache)
old = hdr_full_crypt_cache;
nhdr = kmem_cache_alloc(new, KM_PUSHPAGE); nhdr = kmem_cache_alloc(new, KM_PUSHPAGE);
ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
@ -3379,7 +3317,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
memcpy(nhdr, hdr, HDR_L2ONLY_SIZE); memcpy(nhdr, hdr, HDR_L2ONLY_SIZE);
if (new == hdr_full_cache || new == hdr_full_crypt_cache) { if (new == hdr_full_cache) {
arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR); arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR);
/* /*
* arc_access and arc_change_state need to be aware that a * arc_access and arc_change_state need to be aware that a
@ -3393,7 +3331,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
ASSERT(!HDR_HAS_RABD(hdr)); ASSERT(!HDR_HAS_RABD(hdr));
} else { } else {
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
ASSERT0(hdr->b_l1hdr.b_bufcnt);
#ifdef ZFS_DEBUG #ifdef ZFS_DEBUG
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
#endif #endif
@ -3459,126 +3396,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
return (nhdr); return (nhdr);
} }
/*
* This function allows an L1 header to be reallocated as a crypt
* header and vice versa. If we are going to a crypt header, the
* new fields will be zeroed out.
*/
static arc_buf_hdr_t *
arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
{
arc_buf_hdr_t *nhdr;
arc_buf_t *buf;
kmem_cache_t *ncache, *ocache;
/*
* This function requires that hdr is in the arc_anon state.
* Therefore it won't have any L2ARC data for us to worry
* about copying.
*/
ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(!HDR_HAS_L2HDR(hdr));
ASSERT3U(!!HDR_PROTECTED(hdr), !=, need_crypt);
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
ASSERT(!list_link_active(&hdr->b_l2hdr.b_l2node));
ASSERT3P(hdr->b_hash_next, ==, NULL);
if (need_crypt) {
ncache = hdr_full_crypt_cache;
ocache = hdr_full_cache;
} else {
ncache = hdr_full_cache;
ocache = hdr_full_crypt_cache;
}
nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE);
/*
* Copy all members that aren't locks or condvars to the new header.
* No lists are pointing to us (as we asserted above), so we don't
* need to worry about the list nodes.
*/
nhdr->b_dva = hdr->b_dva;
nhdr->b_birth = hdr->b_birth;
nhdr->b_type = hdr->b_type;
nhdr->b_flags = hdr->b_flags;
nhdr->b_psize = hdr->b_psize;
nhdr->b_lsize = hdr->b_lsize;
nhdr->b_spa = hdr->b_spa;
#ifdef ZFS_DEBUG
nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum;
#endif
nhdr->b_l1hdr.b_bufcnt = hdr->b_l1hdr.b_bufcnt;
nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap;
nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state;
nhdr->b_l1hdr.b_arc_access = hdr->b_l1hdr.b_arc_access;
nhdr->b_l1hdr.b_mru_hits = hdr->b_l1hdr.b_mru_hits;
nhdr->b_l1hdr.b_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
nhdr->b_l1hdr.b_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
nhdr->b_l1hdr.b_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb;
nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd;
/*
* This zfs_refcount_add() exists only to ensure that the individual
* arc buffers always point to a header that is referenced, avoiding
* a small race condition that could trigger ASSERTs.
*/
(void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, FTAG);
nhdr->b_l1hdr.b_buf = hdr->b_l1hdr.b_buf;
for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next)
buf->b_hdr = nhdr;
zfs_refcount_transfer(&nhdr->b_l1hdr.b_refcnt, &hdr->b_l1hdr.b_refcnt);
(void) zfs_refcount_remove(&nhdr->b_l1hdr.b_refcnt, FTAG);
ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt));
if (need_crypt) {
arc_hdr_set_flags(nhdr, ARC_FLAG_PROTECTED);
} else {
arc_hdr_clear_flags(nhdr, ARC_FLAG_PROTECTED);
}
/* unset all members of the original hdr */
memset(&hdr->b_dva, 0, sizeof (dva_t));
hdr->b_birth = 0;
hdr->b_type = 0;
hdr->b_flags = 0;
hdr->b_psize = 0;
hdr->b_lsize = 0;
hdr->b_spa = 0;
#ifdef ZFS_DEBUG
hdr->b_l1hdr.b_freeze_cksum = NULL;
#endif
hdr->b_l1hdr.b_buf = NULL;
hdr->b_l1hdr.b_bufcnt = 0;
hdr->b_l1hdr.b_byteswap = 0;
hdr->b_l1hdr.b_state = NULL;
hdr->b_l1hdr.b_arc_access = 0;
hdr->b_l1hdr.b_mru_hits = 0;
hdr->b_l1hdr.b_mru_ghost_hits = 0;
hdr->b_l1hdr.b_mfu_hits = 0;
hdr->b_l1hdr.b_mfu_ghost_hits = 0;
hdr->b_l1hdr.b_acb = NULL;
hdr->b_l1hdr.b_pabd = NULL;
if (ocache == hdr_full_crypt_cache) {
ASSERT(!HDR_HAS_RABD(hdr));
hdr->b_crypt_hdr.b_ot = DMU_OT_NONE;
hdr->b_crypt_hdr.b_ebufcnt = 0;
hdr->b_crypt_hdr.b_dsobj = 0;
memset(hdr->b_crypt_hdr.b_salt, 0, ZIO_DATA_SALT_LEN);
memset(hdr->b_crypt_hdr.b_iv, 0, ZIO_DATA_IV_LEN);
memset(hdr->b_crypt_hdr.b_mac, 0, ZIO_DATA_MAC_LEN);
}
buf_discard_identity(hdr);
kmem_cache_free(ocache, hdr);
return (nhdr);
}
/* /*
* This function is used by the send / receive code to convert a newly * This function is used by the send / receive code to convert a newly
* allocated arc_buf_t to one that is suitable for a raw encrypted write. It * allocated arc_buf_t to one that is suitable for a raw encrypted write. It
@ -3598,8 +3415,7 @@ arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
buf->b_flags |= (ARC_BUF_FLAG_COMPRESSED | ARC_BUF_FLAG_ENCRYPTED); buf->b_flags |= (ARC_BUF_FLAG_COMPRESSED | ARC_BUF_FLAG_ENCRYPTED);
if (!HDR_PROTECTED(hdr)) arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
hdr = arc_hdr_realloc_crypt(hdr, B_TRUE);
hdr->b_crypt_hdr.b_dsobj = dsobj; hdr->b_crypt_hdr.b_dsobj = dsobj;
hdr->b_crypt_hdr.b_ot = ot; hdr->b_crypt_hdr.b_ot = ot;
hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ? hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ?
@ -3800,8 +3616,6 @@ static void
arc_hdr_destroy(arc_buf_hdr_t *hdr) arc_hdr_destroy(arc_buf_hdr_t *hdr)
{ {
if (HDR_HAS_L1HDR(hdr)) { if (HDR_HAS_L1HDR(hdr)) {
ASSERT(hdr->b_l1hdr.b_buf == NULL ||
hdr->b_l1hdr.b_bufcnt > 0);
ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
} }
@ -3865,12 +3679,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
#ifdef ZFS_DEBUG #ifdef ZFS_DEBUG
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
#endif #endif
kmem_cache_free(hdr_full_cache, hdr);
if (!HDR_PROTECTED(hdr)) {
kmem_cache_free(hdr_full_cache, hdr);
} else {
kmem_cache_free(hdr_full_crypt_cache, hdr);
}
} else { } else {
kmem_cache_free(hdr_l2only_cache, hdr); kmem_cache_free(hdr_l2only_cache, hdr);
} }
@ -3882,7 +3691,8 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag)
arc_buf_hdr_t *hdr = buf->b_hdr; arc_buf_hdr_t *hdr = buf->b_hdr;
if (hdr->b_l1hdr.b_state == arc_anon) { if (hdr->b_l1hdr.b_state == arc_anon) {
ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1); ASSERT3P(hdr->b_l1hdr.b_buf, ==, buf);
ASSERT(ARC_BUF_LAST(buf));
ASSERT(!HDR_IO_IN_PROGRESS(hdr)); ASSERT(!HDR_IO_IN_PROGRESS(hdr));
VERIFY0(remove_reference(hdr, tag)); VERIFY0(remove_reference(hdr, tag));
return; return;
@ -3892,7 +3702,7 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag)
mutex_enter(hash_lock); mutex_enter(hash_lock);
ASSERT3P(hdr, ==, buf->b_hdr); ASSERT3P(hdr, ==, buf->b_hdr);
ASSERT(hdr->b_l1hdr.b_bufcnt > 0); ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
ASSERT3P(hdr->b_l1hdr.b_state, !=, arc_anon); ASSERT3P(hdr->b_l1hdr.b_state, !=, arc_anon);
ASSERT3P(buf->b_data, !=, NULL); ASSERT3P(buf->b_data, !=, NULL);
@ -3935,7 +3745,6 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)
ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(!HDR_IO_IN_PROGRESS(hdr)); ASSERT(!HDR_IO_IN_PROGRESS(hdr));
ASSERT0(hdr->b_l1hdr.b_bufcnt);
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt)); ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt));
@ -5597,13 +5406,6 @@ arc_read_done(zio_t *zio)
buf_hash_remove(hdr); buf_hash_remove(hdr);
} }
/*
* Broadcast before we drop the hash_lock to avoid the possibility
* that the hdr (and hence the cv) might be freed before we get to
* the cv_broadcast().
*/
cv_broadcast(&hdr->b_l1hdr.b_cv);
arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
(void) remove_reference(hdr, hdr); (void) remove_reference(hdr, hdr);
@ -5798,8 +5600,7 @@ top:
} }
acb->acb_zio_head = head_zio; acb->acb_zio_head = head_zio;
acb->acb_next = hdr->b_l1hdr.b_acb; acb->acb_next = hdr->b_l1hdr.b_acb;
if (hdr->b_l1hdr.b_acb) hdr->b_l1hdr.b_acb->acb_prev = acb;
hdr->b_l1hdr.b_acb->acb_prev = acb;
hdr->b_l1hdr.b_acb = acb; hdr->b_l1hdr.b_acb = acb;
} }
mutex_exit(hash_lock); mutex_exit(hash_lock);
@ -5939,8 +5740,28 @@ top:
* and so the performance impact shouldn't * and so the performance impact shouldn't
* matter. * matter.
*/ */
cv_wait(&hdr->b_l1hdr.b_cv, hash_lock); arc_callback_t *acb = kmem_zalloc(
sizeof (arc_callback_t), KM_SLEEP);
acb->acb_wait = B_TRUE;
mutex_init(&acb->acb_wait_lock, NULL,
MUTEX_DEFAULT, NULL);
cv_init(&acb->acb_wait_cv, NULL, CV_DEFAULT,
NULL);
acb->acb_zio_head =
hdr->b_l1hdr.b_acb->acb_zio_head;
acb->acb_next = hdr->b_l1hdr.b_acb;
hdr->b_l1hdr.b_acb->acb_prev = acb;
hdr->b_l1hdr.b_acb = acb;
mutex_exit(hash_lock); mutex_exit(hash_lock);
mutex_enter(&acb->acb_wait_lock);
while (acb->acb_wait) {
cv_wait(&acb->acb_wait_cv,
&acb->acb_wait_lock);
}
mutex_exit(&acb->acb_wait_lock);
mutex_destroy(&acb->acb_wait_lock);
cv_destroy(&acb->acb_wait_cv);
kmem_free(acb, sizeof (arc_callback_t));
goto top; goto top;
} }
} }
@ -6060,12 +5881,9 @@ top:
* 3. This buffer isn't currently writing to the L2ARC. * 3. This buffer isn't currently writing to the L2ARC.
* 4. The L2ARC entry wasn't evicted, which may * 4. The L2ARC entry wasn't evicted, which may
* also have invalidated the vdev. * also have invalidated the vdev.
* 5. This isn't prefetch or l2arc_noprefetch is 0.
*/ */
if (HDR_HAS_L2HDR(hdr) && if (HDR_HAS_L2HDR(hdr) &&
!HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) && !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr)) {
!(l2arc_noprefetch &&
(*arc_flags & ARC_FLAG_PREFETCH))) {
l2arc_read_callback_t *cb; l2arc_read_callback_t *cb;
abd_t *abd; abd_t *abd;
uint64_t asize; uint64_t asize;
@ -6245,6 +6063,56 @@ arc_remove_prune_callback(arc_prune_t *p)
kmem_free(p, sizeof (*p)); kmem_free(p, sizeof (*p));
} }
/*
* Helper function for arc_prune_async() it is responsible for safely
* handling the execution of a registered arc_prune_func_t.
*/
static void
arc_prune_task(void *ptr)
{
arc_prune_t *ap = (arc_prune_t *)ptr;
arc_prune_func_t *func = ap->p_pfunc;
if (func != NULL)
func(ap->p_adjust, ap->p_private);
zfs_refcount_remove(&ap->p_refcnt, func);
}
/*
* Notify registered consumers they must drop holds on a portion of the ARC
* buffers they reference. This provides a mechanism to ensure the ARC can
* honor the metadata limit and reclaim otherwise pinned ARC buffers.
*
* This operation is performed asynchronously so it may be safely called
* in the context of the arc_reclaim_thread(). A reference is taken here
* for each registered arc_prune_t and the arc_prune_task() is responsible
* for releasing it once the registered arc_prune_func_t has completed.
*/
static void
arc_prune_async(uint64_t adjust)
{
arc_prune_t *ap;
mutex_enter(&arc_prune_mtx);
for (ap = list_head(&arc_prune_list); ap != NULL;
ap = list_next(&arc_prune_list, ap)) {
if (zfs_refcount_count(&ap->p_refcnt) >= 2)
continue;
zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
ap->p_adjust = adjust;
if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
ap, TQ_SLEEP) == TASKQID_INVALID) {
zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
continue;
}
ARCSTAT_BUMP(arcstat_prune);
}
mutex_exit(&arc_prune_mtx);
}
/* /*
* Notify the arc that a block was freed, and thus will never be used again. * Notify the arc that a block was freed, and thus will never be used again.
*/ */
@ -6321,7 +6189,8 @@ arc_release(arc_buf_t *buf, const void *tag)
ASSERT(!HDR_IN_HASH_TABLE(hdr)); ASSERT(!HDR_IN_HASH_TABLE(hdr));
ASSERT(!HDR_HAS_L2HDR(hdr)); ASSERT(!HDR_HAS_L2HDR(hdr));
ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1); ASSERT3P(hdr->b_l1hdr.b_buf, ==, buf);
ASSERT(ARC_BUF_LAST(buf));
ASSERT3S(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt), ==, 1); ASSERT3S(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt), ==, 1);
ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
@ -6372,7 +6241,7 @@ arc_release(arc_buf_t *buf, const void *tag)
/* /*
* Do we have more than one buf? * Do we have more than one buf?
*/ */
if (hdr->b_l1hdr.b_bufcnt > 1) { if (hdr->b_l1hdr.b_buf != buf || !ARC_BUF_LAST(buf)) {
arc_buf_hdr_t *nhdr; arc_buf_hdr_t *nhdr;
uint64_t spa = hdr->b_spa; uint64_t spa = hdr->b_spa;
uint64_t psize = HDR_GET_PSIZE(hdr); uint64_t psize = HDR_GET_PSIZE(hdr);
@ -6385,7 +6254,7 @@ arc_release(arc_buf_t *buf, const void *tag)
ASSERT(hdr->b_l1hdr.b_buf != buf || buf->b_next != NULL); ASSERT(hdr->b_l1hdr.b_buf != buf || buf->b_next != NULL);
VERIFY3S(remove_reference(hdr, tag), >, 0); VERIFY3S(remove_reference(hdr, tag), >, 0);
if (arc_buf_is_shared(buf) && !ARC_BUF_COMPRESSED(buf)) { if (ARC_BUF_SHARED(buf) && !ARC_BUF_COMPRESSED(buf)) {
ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf); ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf);
ASSERT(ARC_BUF_LAST(buf)); ASSERT(ARC_BUF_LAST(buf));
} }
@ -6402,9 +6271,9 @@ arc_release(arc_buf_t *buf, const void *tag)
* If the current arc_buf_t and the hdr are sharing their data * If the current arc_buf_t and the hdr are sharing their data
* buffer, then we must stop sharing that block. * buffer, then we must stop sharing that block.
*/ */
if (arc_buf_is_shared(buf)) { if (ARC_BUF_SHARED(buf)) {
ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf); ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf);
VERIFY(!arc_buf_is_shared(lastbuf)); ASSERT(!arc_buf_is_shared(lastbuf));
/* /*
* First, sever the block sharing relationship between * First, sever the block sharing relationship between
@ -6437,7 +6306,7 @@ arc_release(arc_buf_t *buf, const void *tag)
*/ */
ASSERT(arc_buf_is_shared(lastbuf) || ASSERT(arc_buf_is_shared(lastbuf) ||
arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF); arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF);
ASSERT(!ARC_BUF_SHARED(buf)); ASSERT(!arc_buf_is_shared(buf));
} }
ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
@ -6453,10 +6322,6 @@ arc_release(arc_buf_t *buf, const void *tag)
arc_buf_size(buf), buf); arc_buf_size(buf), buf);
} }
hdr->b_l1hdr.b_bufcnt -= 1;
if (ARC_BUF_ENCRYPTED(buf))
hdr->b_crypt_hdr.b_ebufcnt -= 1;
arc_cksum_verify(buf); arc_cksum_verify(buf);
arc_buf_unwatch(buf); arc_buf_unwatch(buf);
@ -6469,15 +6334,11 @@ arc_release(arc_buf_t *buf, const void *tag)
nhdr = arc_hdr_alloc(spa, psize, lsize, protected, nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
compress, hdr->b_complevel, type); compress, hdr->b_complevel, type);
ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
ASSERT0(nhdr->b_l1hdr.b_bufcnt);
ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt)); ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
VERIFY3U(nhdr->b_type, ==, type); VERIFY3U(nhdr->b_type, ==, type);
ASSERT(!HDR_SHARED_DATA(nhdr)); ASSERT(!HDR_SHARED_DATA(nhdr));
nhdr->b_l1hdr.b_buf = buf; nhdr->b_l1hdr.b_buf = buf;
nhdr->b_l1hdr.b_bufcnt = 1;
if (ARC_BUF_ENCRYPTED(buf))
nhdr->b_crypt_hdr.b_ebufcnt = 1;
(void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, tag); (void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, tag);
buf->b_hdr = nhdr; buf->b_hdr = nhdr;
@ -6528,7 +6389,7 @@ arc_write_ready(zio_t *zio)
ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(!zfs_refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt)); ASSERT(!zfs_refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt));
ASSERT(hdr->b_l1hdr.b_bufcnt > 0); ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
/* /*
* If we're reexecuting this zio because the pool suspended, then * If we're reexecuting this zio because the pool suspended, then
@ -6539,9 +6400,10 @@ arc_write_ready(zio_t *zio)
arc_cksum_free(hdr); arc_cksum_free(hdr);
arc_buf_unwatch(buf); arc_buf_unwatch(buf);
if (hdr->b_l1hdr.b_pabd != NULL) { if (hdr->b_l1hdr.b_pabd != NULL) {
if (arc_buf_is_shared(buf)) { if (ARC_BUF_SHARED(buf)) {
arc_unshare_buf(hdr, buf); arc_unshare_buf(hdr, buf);
} else { } else {
ASSERT(!arc_buf_is_shared(buf));
arc_hdr_free_abd(hdr, B_FALSE); arc_hdr_free_abd(hdr, B_FALSE);
} }
} }
@ -6563,13 +6425,9 @@ arc_write_ready(zio_t *zio)
add_reference(hdr, hdr); /* For IO_IN_PROGRESS. */ add_reference(hdr, hdr); /* For IO_IN_PROGRESS. */
} }
if (BP_IS_PROTECTED(bp) != !!HDR_PROTECTED(hdr))
hdr = arc_hdr_realloc_crypt(hdr, BP_IS_PROTECTED(bp));
if (BP_IS_PROTECTED(bp)) { if (BP_IS_PROTECTED(bp)) {
/* ZIL blocks are written through zio_rewrite */ /* ZIL blocks are written through zio_rewrite */
ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG); ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
ASSERT(HDR_PROTECTED(hdr));
if (BP_SHOULD_BYTESWAP(bp)) { if (BP_SHOULD_BYTESWAP(bp)) {
if (BP_GET_LEVEL(bp) > 0) { if (BP_GET_LEVEL(bp) > 0) {
@ -6582,11 +6440,14 @@ arc_write_ready(zio_t *zio)
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
} }
arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp); hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp);
hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset; hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset;
zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt, zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
hdr->b_crypt_hdr.b_iv); hdr->b_crypt_hdr.b_iv);
zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac); zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac);
} else {
arc_hdr_clear_flags(hdr, ARC_FLAG_PROTECTED);
} }
/* /*
@ -6667,7 +6528,8 @@ arc_write_ready(zio_t *zio)
} else { } else {
ASSERT3P(buf->b_data, ==, abd_to_buf(zio->io_orig_abd)); ASSERT3P(buf->b_data, ==, abd_to_buf(zio->io_orig_abd));
ASSERT3U(zio->io_orig_size, ==, arc_buf_size(buf)); ASSERT3U(zio->io_orig_size, ==, arc_buf_size(buf));
ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1); ASSERT3P(hdr->b_l1hdr.b_buf, ==, buf);
ASSERT(ARC_BUF_LAST(buf));
arc_share_buf(hdr, buf); arc_share_buf(hdr, buf);
} }
@ -6748,7 +6610,8 @@ arc_write_done(zio_t *zio)
(void *)hdr, (void *)exists); (void *)hdr, (void *)exists);
} else { } else {
/* Dedup */ /* Dedup */
ASSERT(hdr->b_l1hdr.b_bufcnt == 1); ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
ASSERT(ARC_BUF_LAST(hdr->b_l1hdr.b_buf));
ASSERT(hdr->b_l1hdr.b_state == arc_anon); ASSERT(hdr->b_l1hdr.b_state == arc_anon);
ASSERT(BP_GET_DEDUP(zio->io_bp)); ASSERT(BP_GET_DEDUP(zio->io_bp));
ASSERT(BP_GET_LEVEL(zio->io_bp) == 0); ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
@ -6789,7 +6652,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
ASSERT(!HDR_IO_ERROR(hdr)); ASSERT(!HDR_IO_ERROR(hdr));
ASSERT(!HDR_IO_IN_PROGRESS(hdr)); ASSERT(!HDR_IO_IN_PROGRESS(hdr));
ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0); ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
if (uncached) if (uncached)
arc_hdr_set_flags(hdr, ARC_FLAG_UNCACHED); arc_hdr_set_flags(hdr, ARC_FLAG_UNCACHED);
else if (l2arc) else if (l2arc)
@ -6839,9 +6702,10 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
* The hdr will remain with a NULL data pointer and the * The hdr will remain with a NULL data pointer and the
* buf will take sole ownership of the block. * buf will take sole ownership of the block.
*/ */
if (arc_buf_is_shared(buf)) { if (ARC_BUF_SHARED(buf)) {
arc_unshare_buf(hdr, buf); arc_unshare_buf(hdr, buf);
} else { } else {
ASSERT(!arc_buf_is_shared(buf));
arc_hdr_free_abd(hdr, B_FALSE); arc_hdr_free_abd(hdr, B_FALSE);
} }
VERIFY3P(buf->b_data, !=, NULL); VERIFY3P(buf->b_data, !=, NULL);

View File

@ -210,10 +210,12 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
dmu_buf_impl_t *db; dmu_buf_impl_t *db;
rw_enter(&dn->dn_struct_rwlock, RW_READER); rw_enter(&dn->dn_struct_rwlock, RW_READER);
db = dbuf_hold_level(dn, level, blkid, FTAG); err = dbuf_hold_impl(dn, level, blkid, TRUE, FALSE, FTAG, &db);
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
if (db == NULL) if (err == ENOENT)
return (SET_ERROR(EIO)); return (0);
if (err != 0)
return (err);
/* /*
* PARTIAL_FIRST allows caching for uncacheable blocks. It will * PARTIAL_FIRST allows caching for uncacheable blocks. It will
* be cleared after dmu_buf_will_dirty() call dbuf_read() again. * be cleared after dmu_buf_will_dirty() call dbuf_read() again.

View File

@ -965,18 +965,18 @@ dsl_pool_need_dirty_delay(dsl_pool_t *dp)
uint64_t delay_min_bytes = uint64_t delay_min_bytes =
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100; zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
mutex_enter(&dp->dp_lock); /*
uint64_t dirty = dp->dp_dirty_total; * We are not taking the dp_lock here and few other places, since torn
mutex_exit(&dp->dp_lock); * reads are unlikely: on 64-bit systems due to register size and on
* 32-bit due to memory constraints. Pool-wide locks in hot path may
return (dirty > delay_min_bytes); * be too expensive, while we do not need a precise result here.
*/
return (dp->dp_dirty_total > delay_min_bytes);
} }
static boolean_t static boolean_t
dsl_pool_need_dirty_sync(dsl_pool_t *dp, uint64_t txg) dsl_pool_need_dirty_sync(dsl_pool_t *dp, uint64_t txg)
{ {
ASSERT(MUTEX_HELD(&dp->dp_lock));
uint64_t dirty_min_bytes = uint64_t dirty_min_bytes =
zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100; zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
uint64_t dirty = dp->dp_dirty_pertxg[txg & TXG_MASK]; uint64_t dirty = dp->dp_dirty_pertxg[txg & TXG_MASK];

View File

@ -207,11 +207,6 @@ static const uint32_t metaslab_min_search_count = 100;
*/ */
static int metaslab_df_use_largest_segment = B_FALSE; static int metaslab_df_use_largest_segment = B_FALSE;
/*
* Percentage of all cpus that can be used by the metaslab taskq.
*/
int metaslab_load_pct = 50;
/* /*
* These tunables control how long a metaslab will remain loaded after the * These tunables control how long a metaslab will remain loaded after the
* last allocation from it. A metaslab can't be unloaded until at least * last allocation from it. A metaslab can't be unloaded until at least
@ -856,9 +851,6 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd, int allocators)
zfs_refcount_create_tracked(&mga->mga_alloc_queue_depth); zfs_refcount_create_tracked(&mga->mga_alloc_queue_depth);
} }
mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct,
maxclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC);
return (mg); return (mg);
} }
@ -874,7 +866,6 @@ metaslab_group_destroy(metaslab_group_t *mg)
*/ */
ASSERT(mg->mg_activation_count <= 0); ASSERT(mg->mg_activation_count <= 0);
taskq_destroy(mg->mg_taskq);
avl_destroy(&mg->mg_metaslab_tree); avl_destroy(&mg->mg_metaslab_tree);
mutex_destroy(&mg->mg_lock); mutex_destroy(&mg->mg_lock);
mutex_destroy(&mg->mg_ms_disabled_lock); mutex_destroy(&mg->mg_ms_disabled_lock);
@ -965,7 +956,7 @@ metaslab_group_passivate(metaslab_group_t *mg)
* allocations from taking place and any changes to the vdev tree. * allocations from taking place and any changes to the vdev tree.
*/ */
spa_config_exit(spa, locks & ~(SCL_ZIO - 1), spa); spa_config_exit(spa, locks & ~(SCL_ZIO - 1), spa);
taskq_wait_outstanding(mg->mg_taskq, 0); taskq_wait_outstanding(spa->spa_metaslab_taskq, 0);
spa_config_enter(spa, locks & ~(SCL_ZIO - 1), spa, RW_WRITER); spa_config_enter(spa, locks & ~(SCL_ZIO - 1), spa, RW_WRITER);
metaslab_group_alloc_update(mg); metaslab_group_alloc_update(mg);
for (int i = 0; i < mg->mg_allocators; i++) { for (int i = 0; i < mg->mg_allocators; i++) {
@ -3529,10 +3520,8 @@ metaslab_group_preload(metaslab_group_t *mg)
avl_tree_t *t = &mg->mg_metaslab_tree; avl_tree_t *t = &mg->mg_metaslab_tree;
int m = 0; int m = 0;
if (spa_shutting_down(spa) || !metaslab_preload_enabled) { if (spa_shutting_down(spa) || !metaslab_preload_enabled)
taskq_wait_outstanding(mg->mg_taskq, 0);
return; return;
}
mutex_enter(&mg->mg_lock); mutex_enter(&mg->mg_lock);
@ -3552,8 +3541,9 @@ metaslab_group_preload(metaslab_group_t *mg)
continue; continue;
} }
VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload, VERIFY(taskq_dispatch(spa->spa_metaslab_taskq, metaslab_preload,
msp, TQ_SLEEP) != TASKQID_INVALID); msp, TQ_SLEEP | (m <= mg->mg_allocators ? TQ_FRONT : 0))
!= TASKQID_INVALID);
} }
mutex_exit(&mg->mg_lock); mutex_exit(&mg->mg_lock);
} }
@ -6182,6 +6172,9 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, debug_unload, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_enabled, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_enabled, INT, ZMOD_RW,
"Preload potential metaslabs during reassessment"); "Preload potential metaslabs during reassessment");
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_limit, UINT, ZMOD_RW,
"Max number of metaslabs per group to preload");
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, unload_delay, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, unload_delay, UINT, ZMOD_RW,
"Delay in txgs after metaslab was last used before unloading"); "Delay in txgs after metaslab was last used before unloading");

View File

@ -169,6 +169,11 @@ static int spa_load_impl(spa_t *spa, spa_import_type_t type,
const char **ereport); const char **ereport);
static void spa_vdev_resilver_done(spa_t *spa); static void spa_vdev_resilver_done(spa_t *spa);
/*
* Percentage of all CPUs that can be used by the metaslab preload taskq.
*/
static uint_t metaslab_preload_pct = 50;
static uint_t zio_taskq_batch_pct = 80; /* 1 thread per cpu in pset */ static uint_t zio_taskq_batch_pct = 80; /* 1 thread per cpu in pset */
static uint_t zio_taskq_batch_tpq; /* threads per taskq */ static uint_t zio_taskq_batch_tpq; /* threads per taskq */
static const boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ static const boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
@ -1397,6 +1402,13 @@ spa_activate(spa_t *spa, spa_mode_t mode)
spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri, spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri,
1, INT_MAX, 0); 1, INT_MAX, 0);
/*
* The taskq to preload metaslabs.
*/
spa->spa_metaslab_taskq = taskq_create("z_metaslab",
metaslab_preload_pct, maxclsyspri, 1, INT_MAX,
TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
/* /*
* Taskq dedicated to prefetcher threads: this is used to prevent the * Taskq dedicated to prefetcher threads: this is used to prevent the
* pool traverse code from monopolizing the global (and limited) * pool traverse code from monopolizing the global (and limited)
@ -1432,6 +1444,11 @@ spa_deactivate(spa_t *spa)
spa->spa_zvol_taskq = NULL; spa->spa_zvol_taskq = NULL;
} }
if (spa->spa_metaslab_taskq) {
taskq_destroy(spa->spa_metaslab_taskq);
spa->spa_metaslab_taskq = NULL;
}
if (spa->spa_prefetch_taskq) { if (spa->spa_prefetch_taskq) {
taskq_destroy(spa->spa_prefetch_taskq); taskq_destroy(spa->spa_prefetch_taskq);
spa->spa_prefetch_taskq = NULL; spa->spa_prefetch_taskq = NULL;
@ -1704,13 +1721,7 @@ spa_unload(spa_t *spa)
* This ensures that there is no async metaslab prefetching * This ensures that there is no async metaslab prefetching
* while we attempt to unload the spa. * while we attempt to unload the spa.
*/ */
if (spa->spa_root_vdev != NULL) { taskq_wait(spa->spa_metaslab_taskq);
for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) {
vdev_t *vc = spa->spa_root_vdev->vdev_child[c];
if (vc->vdev_mg != NULL)
taskq_wait(vc->vdev_mg->mg_taskq);
}
}
if (spa->spa_mmp.mmp_thread) if (spa->spa_mmp.mmp_thread)
mmp_thread_stop(spa); mmp_thread_stop(spa);
@ -3920,6 +3931,24 @@ spa_ld_trusted_config(spa_t *spa, spa_import_type_t type,
rvd = mrvd; rvd = mrvd;
spa_config_exit(spa, SCL_ALL, FTAG); spa_config_exit(spa, SCL_ALL, FTAG);
/*
* If 'zpool import' used a cached config, then the on-disk hostid and
* hostname may be different to the cached config in ways that should
* prevent import. Userspace can't discover this without a scan, but
* we know, so we add these values to LOAD_INFO so the caller can know
* the difference.
*
* Note that we have to do this before the config is regenerated,
* because the new config will have the hostid and hostname for this
* host, in readiness for import.
*/
if (nvlist_exists(mos_config, ZPOOL_CONFIG_HOSTID))
fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_HOSTID,
fnvlist_lookup_uint64(mos_config, ZPOOL_CONFIG_HOSTID));
if (nvlist_exists(mos_config, ZPOOL_CONFIG_HOSTNAME))
fnvlist_add_string(spa->spa_load_info, ZPOOL_CONFIG_HOSTNAME,
fnvlist_lookup_string(mos_config, ZPOOL_CONFIG_HOSTNAME));
/* /*
* We will use spa_config if we decide to reload the spa or if spa_load * We will use spa_config if we decide to reload the spa or if spa_load
* fails and we rewind. We must thus regenerate the config using the * fails and we rewind. We must thus regenerate the config using the
@ -10132,6 +10161,9 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs);
/* asynchronous event notification */ /* asynchronous event notification */
EXPORT_SYMBOL(spa_event_notify); EXPORT_SYMBOL(spa_event_notify);
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_pct, UINT, ZMOD_RW,
"Percentage of CPUs to run a metaslab preload taskq");
/* BEGIN CSTYLED */ /* BEGIN CSTYLED */
ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_shift, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_shift, UINT, ZMOD_RW,
"log2 fraction of arc that can be used by inflight I/Os when " "log2 fraction of arc that can be used by inflight I/Os when "

View File

@ -367,23 +367,24 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent,
* So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration
* information for all pool visible within the zone. * information for all pool visible within the zone.
*/ */
nvlist_t * int
spa_all_configs(uint64_t *generation) spa_all_configs(uint64_t *generation, nvlist_t **pools)
{ {
nvlist_t *pools;
spa_t *spa = NULL; spa_t *spa = NULL;
if (*generation == spa_config_generation) if (*generation == spa_config_generation)
return (NULL); return (SET_ERROR(EEXIST));
pools = fnvlist_alloc(); int error = mutex_enter_interruptible(&spa_namespace_lock);
if (error)
return (SET_ERROR(EINTR));
mutex_enter(&spa_namespace_lock); *pools = fnvlist_alloc();
while ((spa = spa_next(spa)) != NULL) { while ((spa = spa_next(spa)) != NULL) {
if (INGLOBALZONE(curproc) || if (INGLOBALZONE(curproc) ||
zone_dataset_visible(spa_name(spa), NULL)) { zone_dataset_visible(spa_name(spa), NULL)) {
mutex_enter(&spa->spa_props_lock); mutex_enter(&spa->spa_props_lock);
fnvlist_add_nvlist(pools, spa_name(spa), fnvlist_add_nvlist(*pools, spa_name(spa),
spa->spa_config); spa->spa_config);
mutex_exit(&spa->spa_props_lock); mutex_exit(&spa->spa_props_lock);
} }
@ -391,7 +392,7 @@ spa_all_configs(uint64_t *generation)
*generation = spa_config_generation; *generation = spa_config_generation;
mutex_exit(&spa_namespace_lock); mutex_exit(&spa_namespace_lock);
return (pools); return (0);
} }
void void

View File

@ -4215,6 +4215,7 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
/* XXX - L2ARC 1.0 does not support expansion */ /* XXX - L2ARC 1.0 does not support expansion */
if (vd->vdev_aux) if (vd->vdev_aux)
return (spa_vdev_state_exit(spa, vd, ENOTSUP)); return (spa_vdev_state_exit(spa, vd, ENOTSUP));
spa->spa_ccw_fail_time = 0;
spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
} }

View File

@ -1138,6 +1138,16 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
POOL_STATE_L2CACHE) == 0); POOL_STATE_L2CACHE) == 0);
VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID,
vd->vdev_guid) == 0); vd->vdev_guid) == 0);
/*
* This is merely to facilitate reporting the ashift of the
* cache device through zdb. The actual retrieval of the
* ashift (in vdev_alloc()) uses the nvlist
* spa->spa_l2cache->sav_config (populated in
* spa_ld_open_aux_vdevs()).
*/
VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_ASHIFT,
vd->vdev_ashift) == 0);
} else { } else {
uint64_t txg = 0ULL; uint64_t txg = 0ULL;

View File

@ -273,8 +273,10 @@ vdev_queue_class_add(vdev_queue_t *vq, zio_t *zio)
{ {
zio_priority_t p = zio->io_priority; zio_priority_t p = zio->io_priority;
vq->vq_cqueued |= 1U << p; vq->vq_cqueued |= 1U << p;
if (vdev_queue_class_fifo(p)) if (vdev_queue_class_fifo(p)) {
list_insert_tail(&vq->vq_class[p].vqc_list, zio); list_insert_tail(&vq->vq_class[p].vqc_list, zio);
vq->vq_class[p].vqc_list_numnodes++;
}
else else
avl_add(&vq->vq_class[p].vqc_tree, zio); avl_add(&vq->vq_class[p].vqc_tree, zio);
} }
@ -288,6 +290,7 @@ vdev_queue_class_remove(vdev_queue_t *vq, zio_t *zio)
list_t *list = &vq->vq_class[p].vqc_list; list_t *list = &vq->vq_class[p].vqc_list;
list_remove(list, zio); list_remove(list, zio);
empty = list_is_empty(list); empty = list_is_empty(list);
vq->vq_class[p].vqc_list_numnodes--;
} else { } else {
avl_tree_t *tree = &vq->vq_class[p].vqc_tree; avl_tree_t *tree = &vq->vq_class[p].vqc_tree;
avl_remove(tree, zio); avl_remove(tree, zio);
@ -1069,7 +1072,7 @@ vdev_queue_class_length(vdev_t *vd, zio_priority_t p)
{ {
vdev_queue_t *vq = &vd->vdev_queue; vdev_queue_t *vq = &vd->vdev_queue;
if (vdev_queue_class_fifo(p)) if (vdev_queue_class_fifo(p))
return (list_is_empty(&vq->vq_class[p].vqc_list) == 0); return (vq->vq_class[p].vqc_list_numnodes);
else else
return (avl_numnodes(&vq->vq_class[p].vqc_tree)); return (avl_numnodes(&vq->vq_class[p].vqc_tree));
} }

View File

@ -807,12 +807,12 @@ vdev_rebuild_thread(void *arg)
/* /*
* Calculate the max number of in-flight bytes for top-level * Calculate the max number of in-flight bytes for top-level
* vdev scanning operations (minimum 1MB, maximum 1/4 of * vdev scanning operations (minimum 1MB, maximum 1/2 of
* arc_c_max shared by all top-level vdevs). Limits for the * arc_c_max shared by all top-level vdevs). Limits for the
* issuing phase are done per top-level vdev and are handled * issuing phase are done per top-level vdev and are handled
* separately. * separately.
*/ */
uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1); uint64_t limit = (arc_c_max / 2) / MAX(rvd->vdev_children, 1);
vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20, vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20,
zfs_rebuild_vdev_limit * vd->vdev_children)); zfs_rebuild_vdev_limit * vd->vdev_children));

View File

@ -23,6 +23,7 @@
* Copyright (c) 2016 by Delphix. All rights reserved. * Copyright (c) 2016 by Delphix. All rights reserved.
* Copyright (c) 2019 by Lawrence Livermore National Security, LLC. * Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
* Copyright (c) 2021 Hewlett Packard Enterprise Development LP * Copyright (c) 2021 Hewlett Packard Enterprise Development LP
* Copyright 2023 RackTop Systems, Inc.
*/ */
#include <sys/spa.h> #include <sys/spa.h>
@ -591,6 +592,7 @@ vdev_trim_ranges(trim_args_t *ta)
uint64_t extent_bytes_max = ta->trim_extent_bytes_max; uint64_t extent_bytes_max = ta->trim_extent_bytes_max;
uint64_t extent_bytes_min = ta->trim_extent_bytes_min; uint64_t extent_bytes_min = ta->trim_extent_bytes_min;
spa_t *spa = vd->vdev_spa; spa_t *spa = vd->vdev_spa;
int error = 0;
ta->trim_start_time = gethrtime(); ta->trim_start_time = gethrtime();
ta->trim_bytes_done = 0; ta->trim_bytes_done = 0;
@ -610,19 +612,32 @@ vdev_trim_ranges(trim_args_t *ta)
uint64_t writes_required = ((size - 1) / extent_bytes_max) + 1; uint64_t writes_required = ((size - 1) / extent_bytes_max) + 1;
for (uint64_t w = 0; w < writes_required; w++) { for (uint64_t w = 0; w < writes_required; w++) {
int error;
error = vdev_trim_range(ta, VDEV_LABEL_START_SIZE + error = vdev_trim_range(ta, VDEV_LABEL_START_SIZE +
rs_get_start(rs, ta->trim_tree) + rs_get_start(rs, ta->trim_tree) +
(w *extent_bytes_max), MIN(size - (w *extent_bytes_max), MIN(size -
(w * extent_bytes_max), extent_bytes_max)); (w * extent_bytes_max), extent_bytes_max));
if (error != 0) { if (error != 0) {
return (error); goto done;
} }
} }
} }
return (0); done:
/*
* Make sure all TRIMs for this metaslab have completed before
* returning. TRIM zios have lower priority over regular or syncing
* zios, so all TRIM zios for this metaslab must complete before the
* metaslab is re-enabled. Otherwise it's possible write zios to
* this metaslab could cut ahead of still queued TRIM zios for this
* metaslab causing corruption if the ranges overlap.
*/
mutex_enter(&vd->vdev_trim_io_lock);
while (vd->vdev_trim_inflight[0] > 0) {
cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
}
mutex_exit(&vd->vdev_trim_io_lock);
return (error);
} }
static void static void
@ -941,11 +956,6 @@ vdev_trim_thread(void *arg)
} }
spa_config_exit(spa, SCL_CONFIG, FTAG); spa_config_exit(spa, SCL_CONFIG, FTAG);
mutex_enter(&vd->vdev_trim_io_lock);
while (vd->vdev_trim_inflight[0] > 0) {
cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
}
mutex_exit(&vd->vdev_trim_io_lock);
range_tree_destroy(ta.trim_tree); range_tree_destroy(ta.trim_tree);

View File

@ -1582,8 +1582,9 @@ zfs_ioc_pool_configs(zfs_cmd_t *zc)
nvlist_t *configs; nvlist_t *configs;
int error; int error;
if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL) error = spa_all_configs(&zc->zc_cookie, &configs);
return (SET_ERROR(EEXIST)); if (error)
return (error);
error = put_nvlist(zc, configs); error = put_nvlist(zc, configs);

View File

@ -1094,6 +1094,15 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
ASSERT(!outzfsvfs->z_replay); ASSERT(!outzfsvfs->z_replay);
/*
* Block cloning from an unencrypted dataset into an encrypted
* dataset and vice versa is not supported.
*/
if (inos->os_encrypted != outos->os_encrypted) {
zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
return (SET_ERROR(EXDEV));
}
error = zfs_verify_zp(inzp); error = zfs_verify_zp(inzp);
if (error == 0) if (error == 0)
error = zfs_verify_zp(outzp); error = zfs_verify_zp(outzp);
@ -1206,6 +1215,19 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
goto unlock; goto unlock;
} }
/*
* If we are copying only one block and it is smaller than recordsize
* property, do not allow destination to grow beyond one block if it
* is not there yet. Otherwise the destination will get stuck with
* that block size forever, that can be as small as 512 bytes, no
* matter how big the destination grow later.
*/
if (len <= inblksz && inblksz < outzfsvfs->z_max_blksz &&
outzp->z_size <= inblksz && outoff + len > inblksz) {
error = SET_ERROR(EINVAL);
goto unlock;
}
error = zn_rlimit_fsize(outoff + len); error = zn_rlimit_fsize(outoff + len);
if (error != 0) { if (error != 0) {
goto unlock; goto unlock;

View File

@ -145,7 +145,7 @@ static int zil_nocacheflush = 0;
* Any writes above that will be executed with lower (asynchronous) priority * Any writes above that will be executed with lower (asynchronous) priority
* to limit potential SLOG device abuse by single active ZIL writer. * to limit potential SLOG device abuse by single active ZIL writer.
*/ */
static uint64_t zil_slog_bulk = 768 * 1024; static uint64_t zil_slog_bulk = 64 * 1024 * 1024;
static kmem_cache_t *zil_lwb_cache; static kmem_cache_t *zil_lwb_cache;
static kmem_cache_t *zil_zcw_cache; static kmem_cache_t *zil_zcw_cache;
@ -1958,26 +1958,28 @@ zil_max_log_data(zilog_t *zilog, size_t hdrsize)
/* /*
* Maximum amount of log space we agree to waste to reduce number of * Maximum amount of log space we agree to waste to reduce number of
* WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%). * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~6%).
*/ */
static inline uint64_t static inline uint64_t
zil_max_waste_space(zilog_t *zilog) zil_max_waste_space(zilog_t *zilog)
{ {
return (zil_max_log_data(zilog, sizeof (lr_write_t)) / 8); return (zil_max_log_data(zilog, sizeof (lr_write_t)) / 16);
} }
/* /*
* Maximum amount of write data for WR_COPIED. For correctness, consumers * Maximum amount of write data for WR_COPIED. For correctness, consumers
* must fall back to WR_NEED_COPY if we can't fit the entire record into one * must fall back to WR_NEED_COPY if we can't fit the entire record into one
* maximum sized log block, because each WR_COPIED record must fit in a * maximum sized log block, because each WR_COPIED record must fit in a
* single log block. For space efficiency, we want to fit two records into a * single log block. Below that it is a tradeoff of additional memory copy
* max-sized log block. * and possibly worse log space efficiency vs additional range lock/unlock.
*/ */
static uint_t zil_maxcopied = 7680;
uint64_t uint64_t
zil_max_copied_data(zilog_t *zilog) zil_max_copied_data(zilog_t *zilog)
{ {
return ((zilog->zl_max_block_size - sizeof (zil_chain_t)) / 2 - uint64_t max_data = zil_max_log_data(zilog, sizeof (lr_write_t));
sizeof (lr_write_t)); return (MIN(max_data, zil_maxcopied));
} }
/* /*
@ -4226,3 +4228,6 @@ ZFS_MODULE_PARAM(zfs_zil, zil_, slog_bulk, U64, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_zil, zil_, maxblocksize, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_zil, zil_, maxblocksize, UINT, ZMOD_RW,
"Limit in bytes of ZIL log block size"); "Limit in bytes of ZIL log block size");
ZFS_MODULE_PARAM(zfs_zil, zil_, maxcopied, UINT, ZMOD_RW,
"Limit in bytes WR_COPIED size");

View File

@ -158,23 +158,22 @@ zio_init(void)
zio_link_cache = kmem_cache_create("zio_link_cache", zio_link_cache = kmem_cache_create("zio_link_cache",
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
/*
* For small buffers, we want a cache for each multiple of
* SPA_MINBLOCKSIZE. For larger buffers, we want a cache
* for each quarter-power of 2.
*/
for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) { for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
size_t size = (c + 1) << SPA_MINBLOCKSHIFT; size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
size_t align, cflags, data_cflags;
char name[32];
/*
* Create cache for each half-power of 2 size, starting from
* SPA_MINBLOCKSIZE. It should give us memory space efficiency
* of ~7/8, sufficient for transient allocations mostly using
* these caches.
*/
size_t p2 = size; size_t p2 = size;
size_t align = 0;
size_t data_cflags, cflags;
data_cflags = KMC_NODEBUG;
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
KMC_NODEBUG : 0;
while (!ISP2(p2)) while (!ISP2(p2))
p2 &= p2 - 1; p2 &= p2 - 1;
if (!IS_P2ALIGNED(size, p2 / 2))
continue;
#ifndef _KERNEL #ifndef _KERNEL
/* /*
@ -185,47 +184,37 @@ zio_init(void)
*/ */
if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE)) if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
continue; continue;
/*
* Here's the problem - on 4K native devices in userland on
* Linux using O_DIRECT, buffers must be 4K aligned or I/O
* will fail with EINVAL, causing zdb (and others) to coredump.
* Since userland probably doesn't need optimized buffer caches,
* we just force 4K alignment on everything.
*/
align = 8 * SPA_MINBLOCKSIZE;
#else
if (size < PAGESIZE) {
align = SPA_MINBLOCKSIZE;
} else if (IS_P2ALIGNED(size, p2 >> 2)) {
align = PAGESIZE;
}
#endif #endif
if (align != 0) { if (IS_P2ALIGNED(size, PAGESIZE))
char name[36]; align = PAGESIZE;
if (cflags == data_cflags) { else
/* align = 1 << (highbit64(size ^ (size - 1)) - 1);
* Resulting kmem caches would be identical.
* Save memory by creating only one.
*/
(void) snprintf(name, sizeof (name),
"zio_buf_comb_%lu", (ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name,
size, align, NULL, NULL, NULL, NULL, NULL,
cflags);
zio_data_buf_cache[c] = zio_buf_cache[c];
continue;
}
(void) snprintf(name, sizeof (name), "zio_buf_%lu",
(ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name, size,
align, NULL, NULL, NULL, NULL, NULL, cflags);
(void) snprintf(name, sizeof (name), "zio_data_buf_%lu", cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
(ulong_t)size); KMC_NODEBUG : 0;
zio_data_buf_cache[c] = kmem_cache_create(name, size, data_cflags = KMC_NODEBUG;
align, NULL, NULL, NULL, NULL, NULL, data_cflags); if (cflags == data_cflags) {
/*
* Resulting kmem caches would be identical.
* Save memory by creating only one.
*/
(void) snprintf(name, sizeof (name),
"zio_buf_comb_%lu", (ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name, size, align,
NULL, NULL, NULL, NULL, NULL, cflags);
zio_data_buf_cache[c] = zio_buf_cache[c];
continue;
} }
(void) snprintf(name, sizeof (name), "zio_buf_%lu",
(ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name, size, align,
NULL, NULL, NULL, NULL, NULL, cflags);
(void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
(ulong_t)size);
zio_data_buf_cache[c] = kmem_cache_create(name, size, align,
NULL, NULL, NULL, NULL, NULL, data_cflags);
} }
while (--c != 0) { while (--c != 0) {

View File

@ -24,6 +24,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
BuildArch: noarch BuildArch: noarch
Requires: dkms >= 2.2.0.3 Requires: dkms >= 2.2.0.3
Requires(pre): dkms >= 2.2.0.3
Requires(post): dkms >= 2.2.0.3 Requires(post): dkms >= 2.2.0.3
Requires(preun): dkms >= 2.2.0.3 Requires(preun): dkms >= 2.2.0.3
Requires: gcc, make, perl, diffutils Requires: gcc, make, perl, diffutils
@ -68,9 +69,92 @@ fi
%defattr(-,root,root) %defattr(-,root,root)
/usr/src/%{module}-%{version} /usr/src/%{module}-%{version}
%pre
echo "Running pre installation script: $0. Parameters: $*"
# We don't want any other versions lingering around in dkms.
# Tests with 'dnf' showed that in case of reinstall, or upgrade
# the preun scriptlet removed the version we are trying to install.
# Because of this, find all zfs dkms sources in /var/lib/dkms and
# remove them, if we find a matching version in dkms.
dkms_root=/var/lib/dkms
if [ -d ${dkms_root}/%{module} ]; then
cd ${dkms_root}/%{module}
for x in [[:digit:]]*; do
[ -d "$x" ] || continue
otherver="$x"
opath="${dkms_root}/%{module}/${otherver}"
if [ "$otherver" != %{version} ]; then
# This is a workaround for a broken 'dkms status', we caused in a previous version.
# One day it might be not needed anymore, but it does not hurt to keep it.
if dkms status -m %{module} -v "$otherver" 2>&1 | grep "${opath}/source/dkms.conf does not exist"
then
echo "ERROR: dkms status is broken!" >&2
if [ -L "${opath}/source" -a ! -d "${opath}/source" ]
then
echo "Trying to fix it by removing the symlink: ${opath}/source" >&2
echo "You should manually remove ${opath}" >&2
rm -f "${opath}/source" || echo "Removal failed!" >&2
fi
fi
if [ `dkms status -m %{module} -v "$otherver" | grep -c %{module}` -gt 0 ]; then
echo "Removing old %{module} dkms modules version $otherver from all kernels."
dkms remove -m %{module} -v "$otherver" --all ||:
fi
fi
done
fi
# Uninstall this version of zfs dkms modules before installation of the package.
if [ `dkms status -m %{module} -v %{version} | grep -c %{module}` -gt 0 ]; then
echo "Removing %{module} dkms modules version %{version} from all kernels."
dkms remove -m %{module} -v %{version} --all ||:
fi
%post
echo "Running post installation script: $0. Parameters: $*"
# Add the module to dkms, as reccommended in the dkms man page.
# This is generally rpm specfic.
# But this also may help, if we have a broken 'dkms status'.
# Because, if the sources are available and only the symlink pointing
# to them is missing, this will resolve the situation
echo "Adding %{module} dkms modules version %{version} to dkms."
dkms add -m %{module} -v %{version} %{!?not_rpm:--rpm_safe_upgrade} ||:
# After installing the package, dkms install this zfs version for the current kernel.
# Force the overwriting of old modules to avoid diff warnings in dkms status.
# Or in case of a downgrade to overwrite newer versions.
# Or if some other backed up versions have been restored before.
echo "Installing %{module} dkms modules version %{version} for the current kernel."
dkms install --force -m %{module} -v %{version} ||:
%preun %preun
dkms remove -m %{module} -v %{version} --all dkms_root="/var/lib/dkms/%{module}/%{version}"
echo "Running pre uninstall script: $0. Parameters: $*"
# In case of upgrade we do nothing. See above comment in pre hook.
if [ "$1" = "1" -o "$1" = "upgrade" ] ; then
echo "This is an upgrade. Skipping pre uninstall action."
exit 0
fi
%posttrans # Check if we uninstall the package. In that case remove the dkms modules.
/usr/lib/dkms/common.postinst %{module} %{version} # '0' is the value for the first parameter for rpm packages.
# 'remove' or 'purge' are the possible names for deb packages.
if [ "$1" = "0" -o "$1" = "remove" -o "$1" = "purge" ] ; then
if [ `dkms status -m %{module} -v %{version} | grep -c %{module}` -gt 0 ]; then
echo "Removing %{module} dkms modules version %{version} from all kernels."
dkms remove -m %{module} -v %{version} --all %{!?not_rpm:--rpm_safe_upgrade} && exit 0
fi
# If removing the modules failed, it might be because of the broken 'dkms status'.
if dkms status -m %{module} -v %{version} 2>&1 | grep "${dkms_root}/source/dkms.conf does not exist"
then
echo "ERROR: dkms status is broken!" >&2
echo "You should manually remove ${dkms_root}" >&2
echo "WARNING: installed modules in /lib/modules/`uname -r`/extra could not be removed automatically!" >&2
fi
else
echo "Script parameter $1 did not match any removal condition."
fi
exit 0

Some files were not shown because too many files have changed in this diff Show More