diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000000..0f4b96a830
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,10 @@
+root = true
+
+[*]
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[*.{c,h}]
+tab_width = 8
+indent_style = tab
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 004711ae78..f28a747e82 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -1,10 +1,12 @@
-# Contributing to ZFS on Linux
-

+# Contributing to OpenZFS
+
+
+
*First of all, thank you for taking the time to contribute!*
-By using the following guidelines, you can help us make ZFS on Linux even
-better.
+By using the following guidelines, you can help us make OpenZFS even better.
## Table Of Contents
[What should I know before I get
@@ -32,17 +34,17 @@ started?](#what-should-i-know-before-i-get-started)
Helpful resources
- * [ZFS on Linux wiki](https://github.com/zfsonlinux/zfs/wiki)
- * [OpenZFS Documentation](http://open-zfs.org/wiki/Developer_resources)
- * [Git and GitHub for beginners](https://github.com/zfsonlinux/zfs/wiki/Git-and-GitHub-for-beginners)
+ * [OpenZFS Documentation](https://openzfs.github.io/openzfs-docs/)
+ * [OpenZFS Developer Resources](http://open-zfs.org/wiki/Developer_resources)
+ * [Git and GitHub for beginners](https://openzfs.github.io/openzfs-docs/Developer%20Resources/Git%20and%20GitHub%20for%20beginners.html)
## What should I know before I get started?
### Get ZFS
You can build zfs packages by following [these
-instructions](https://github.com/zfsonlinux/zfs/wiki/Building-ZFS),
+instructions](https://openzfs.github.io/openzfs-docs/Developer%20Resources/Building%20ZFS.html),
or install stable packages from [your distribution's
-repository](https://github.com/zfsonlinux/zfs/wiki/Getting-Started).
+repository](https://openzfs.github.io/openzfs-docs/Getting%20Started/index.html).
### Debug ZFS
A variety of methods and tools are available to aid ZFS developers.
@@ -51,29 +53,30 @@ configure option should be set. This will enable additional correctness
checks and all the ASSERTs to help quickly catch potential issues.
In addition, there are numerous utilities and debugging files which
-provide visibility in to the inner workings of ZFS. The most useful
-of these tools are discussed in detail on the [debugging ZFS wiki
-page](https://github.com/zfsonlinux/zfs/wiki/Debugging).
+provide visibility into the inner workings of ZFS. The most useful
+of these tools are discussed in detail on the [Troubleshooting
+page](https://openzfs.github.io/openzfs-docs/Basic%20Concepts/Troubleshooting.html).
### Where can I ask for help?
-[The zfs-discuss mailing list or IRC](http://list.zfsonlinux.org)
-are the best places to ask for help. Please do not file support requests
-on the GitHub issue tracker.
+The [zfs-discuss mailing
+list](https://openzfs.github.io/openzfs-docs/Project%20and%20Community/Mailing%20Lists.html)
+or IRC are the best places to ask for help. Please do not file
+support requests on the GitHub issue tracker.
## How Can I Contribute?
### Reporting Bugs
*Please* contact us via the [zfs-discuss mailing
-list or IRC](http://list.zfsonlinux.org) if you aren't
-certain that you are experiencing a bug.
+list](https://openzfs.github.io/openzfs-docs/Project%20and%20Community/Mailing%20Lists.html)
+or IRC if you aren't certain that you are experiencing a bug.
If you run into an issue, please search our [issue
-tracker](https://github.com/zfsonlinux/zfs/issues) *first* to ensure the
+tracker](https://github.com/openzfs/zfs/issues) *first* to ensure the
issue hasn't been reported before. Open a new issue only if you haven't
found anything similar to your issue.
You can open a new issue and search existing issues using the public [issue
-tracker](https://github.com/zfsonlinux/zfs/issues).
+tracker](https://github.com/openzfs/zfs/issues).
#### When opening a new issue, please include the following information at the top of the issue:
* What distribution (with version) you are using.
@@ -105,13 +108,13 @@ information like:
* Stack traces which may be logged to `dmesg`.
### Suggesting Enhancements
-ZFS on Linux is a widely deployed production filesystem which is under
-active development. The team's primary focus is on fixing known issues,
-improving performance, and adding compelling new features.
+OpenZFS is a widely deployed production filesystem which is under active
+development. The team's primary focus is on fixing known issues, improving
+performance, and adding compelling new features.
You can view the list of proposed features
-by filtering the issue tracker by the ["Feature"
-label](https://github.com/zfsonlinux/zfs/issues?q=is%3Aopen+is%3Aissue+label%3AFeature).
+by filtering the issue tracker by the ["Type: Feature"
+label](https://github.com/openzfs/zfs/issues?q=is%3Aopen+is%3Aissue+label%3A%22Type%3A+Feature%22).
If you have an idea for a feature first check this list. If your idea already
appears then add a +1 to the top most comment, this helps us gauge interest
in that feature.
@@ -120,8 +123,11 @@ Otherwise, open a new issue and describe your proposed feature. Why is this
feature needed? What problem does it solve?
### Pull Requests
-* All pull requests must be based on the current master branch and apply
-without conflicts.
+
+#### General
+
+* All pull requests, except backports and releases, must be based on the current master branch
+and should apply without conflicts.
* Please attempt to limit pull requests to a single commit which resolves
one specific issue.
* Make sure your commit messages are in the correct format. See the
@@ -133,16 +139,28 @@ logically independent patches which build on each other. This makes large
changes easier to review and approve which speeds up the merging process.
* Try to keep pull requests simple. Simple code with comments is much easier
to review and approve.
+* All proposed changes must be approved by an OpenZFS organization member.
+* If you have an idea you'd like to discuss or which requires additional testing, consider opening it as a draft pull request.
+Once everything is in good shape and the details have been worked out you can remove its draft status.
+Any required reviews can then be finalized and the pull request merged.
+
+#### Tests and Benchmarks
+* Every pull request will by tested by the buildbot on multiple platforms by running the [zfs-tests.sh and zloop.sh](
+https://openzfs.github.io/openzfs-docs/Developer%20Resources/Building%20ZFS.html#running-zloop-sh-and-zfs-tests-sh) test suites.
+* To verify your changes conform to the [style guidelines](
+https://github.com/openzfs/zfs/blob/master/.github/CONTRIBUTING.md#style-guides
+), please run `make checkstyle` and resolve any warnings.
+* Static code analysis of each pull request is performed by the buildbot; run `make lint` to check your changes.
* Test cases should be provided when appropriate.
+This includes making sure new features have adequate code coverage.
* If your pull request improves performance, please include some benchmarks.
* The pull request must pass all required [ZFS
Buildbot](http://build.zfsonlinux.org/) builders before
being accepted. If you are experiencing intermittent TEST
builder failures, you may be experiencing a [test suite
-issue](https://github.com/zfsonlinux/zfs/issues?q=is%3Aissue+is%3Aopen+label%3A%22Test+Suite%22).
-There are also various [buildbot options](https://github.com/zfsonlinux/zfs/wiki/Buildbot-Options)
+issue](https://github.com/openzfs/zfs/issues?q=is%3Aissue+is%3Aopen+label%3A%22Type%3A+Test+Suite%22).
+There are also various [buildbot options](https://openzfs.github.io/openzfs-docs/Developer%20Resources/Buildbot%20Options.html)
to control how changes are tested.
-* All proposed changes must be approved by a ZFS on Linux organization member.
### Testing
All help is appreciated! If you're in a position to run the latest code
@@ -152,16 +170,41 @@ range of realistic workloads, configurations and architectures we're better
able quickly identify and resolve potential issues.
Users can also run the [ZFS Test
-Suite](https://github.com/zfsonlinux/zfs/tree/master/tests) on their systems
+Suite](https://github.com/openzfs/zfs/tree/master/tests) on their systems
to verify ZFS is behaving as intended.
## Style Guides
+### Repository Structure
+
+OpenZFS uses a standardised branching structure.
+- The "development and main branch", is the branch all development should be based on.
+- "Release branches" contain the latest released code for said version.
+- "Staging branches" contain selected commits prior to being released.
+
+**Branch Names:**
+- Development and Main branch: `master`
+- Release branches: `zfs-$VERSION-release`
+- Staging branches: `zfs-$VERSION-staging`
+
+`$VERSION` should be replaced with the `major.minor` version number.
+_(This is the version number without the `.patch` version at the end)_
+
### Coding Conventions
We currently use [C Style and Coding Standards for
SunOS](http://www.cis.upenn.edu/%7Elee/06cse480/data/cstyle.ms.pdf) as our
coding convention.
+This repository has an `.editorconfig` file. If your editor [supports
+editorconfig](https://editorconfig.org/#download), it will
+automatically respect most of this project's whitespace preferences.
+
+Additionally, Git can help warn on whitespace problems as well:
+
+```
+git config --local core.whitespace trailing-space,space-before-tab,indent-with-non-tab,-tab-in-indent
+```
+
### Commit Message Formats
#### New Changes
Commit messages for new changes must meet the following guidelines:
@@ -187,70 +230,6 @@ attempting to solve.
Signed-off-by: Contributor
```
-#### OpenZFS Patch Ports
-If you are porting OpenZFS patches, the commit message must meet
-the following guidelines:
-* The first line must be the summary line from the most important OpenZFS commit being ported.
-It must begin with `OpenZFS dddd, dddd - ` where `dddd` are OpenZFS issue numbers.
-* Provides a `Authored by:` line to attribute each patch for each original author.
-* Provides the `Reviewed by:` and `Approved by:` lines from each original
-OpenZFS commit.
-* Provides a `Ported-by:` line with the developer's name followed by
-their email for each OpenZFS commit.
-* Provides a `OpenZFS-issue:` line with link for each original illumos
-issue.
-* Provides a `OpenZFS-commit:` line with link for each original OpenZFS commit.
-* If necessary, provide some porting notes to describe any deviations from
-the original OpenZFS commits.
-
-An example OpenZFS patch port commit message for a single patch is provided
-below.
-```
-OpenZFS 1234 - Summary from the original OpenZFS commit
-
-Authored by: Original Author
-Reviewed by: Reviewer One
-Reviewed by: Reviewer Two
-Approved by: Approver One
-Ported-by: ZFS Contributor
-
-Provide some porting notes here if necessary.
-
-OpenZFS-issue: https://www.illumos.org/issues/1234
-OpenZFS-commit: https://github.com/openzfs/openzfs/commit/abcd1234
-```
-
-If necessary, multiple OpenZFS patches can be combined in a single port.
-This is useful when you are porting a new patch and its subsequent bug
-fixes. An example commit message is provided below.
-```
-OpenZFS 1234, 5678 - Summary of most important OpenZFS commit
-
-1234 Summary from original OpenZFS commit for 1234
-
-Authored by: Original Author
-Reviewed by: Reviewer Two
-Approved by: Approver One
-Ported-by: ZFS Contributor
-
-Provide some porting notes here for 1234 if necessary.
-
-OpenZFS-issue: https://www.illumos.org/issues/1234
-OpenZFS-commit: https://github.com/openzfs/openzfs/commit/abcd1234
-
-5678 Summary from original OpenZFS commit for 5678
-
-Authored by: Original Author2
-Reviewed by: Reviewer One
-Approved by: Approver Two
-Ported-by: ZFS Contributor
-
-Provide some porting notes here for 5678 if necessary.
-
-OpenZFS-issue: https://www.illumos.org/issues/5678
-OpenZFS-commit: https://github.com/openzfs/openzfs/commit/efgh5678
-```
-
#### Coverity Defect Fixes
If you are submitting a fix to a
[Coverity defect](https://scan.coverity.com/projects/zfsonlinux-zfs),
@@ -290,3 +269,13 @@ Git can append the `Signed-off-by` line to your commit messages. Simply
provide the `-s` or `--signoff` option when performing a `git commit`.
For more information about writing commit messages, visit [How to Write
a Git Commit Message](https://chris.beams.io/posts/git-commit/).
+
+#### Co-authored By
+If someone else had part in your pull request, please add the following to the commit:
+`Co-authored-by: Name `
+This is useful if their authorship was lost during squashing, rebasing, etc.,
+but may be used in any situation where there are co-authors.
+
+The email address used here should be the same as on the GitHub profile of said user.
+If said user does not have their email address public, please use the following instead:
+`Co-authored-by: Name <[username]@users.noreply.github.com>`
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
deleted file mode 100644
index e77ab39f35..0000000000
--- a/.github/ISSUE_TEMPLATE.md
+++ /dev/null
@@ -1,48 +0,0 @@
-
-
-
-
-### System information
-
-Type | Version/Name
- --- | ---
-Distribution Name |
-Distribution Version |
-Linux Kernel |
-Architecture |
-ZFS Version |
-SPL Version |
-
-
-### Describe the problem you're observing
-
-### Describe how to reproduce the problem
-
-### Include any warning/errors/backtraces from the system logs
-
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000000..92d0e03a9b
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,55 @@
+---
+name: Bug report
+about: Create a report to help us improve OpenZFS
+title: ''
+labels: 'Type: Defect'
+assignees: ''
+
+---
+
+
+
+
+
+### System information
+
+Type | Version/Name
+ --- | ---
+Distribution Name |
+Distribution Version |
+Kernel Version |
+Architecture |
+OpenZFS Version |
+
+
+### Describe the problem you're observing
+
+### Describe how to reproduce the problem
+
+### Include any warning/errors/backtraces from the system logs
+
+
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000000..ecaaa18210
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,14 @@
+blank_issues_enabled: false
+contact_links:
+ - name: OpenZFS Questions
+ url: https://github.com/openzfs/zfs/discussions/new
+ about: Ask the community for help
+ - name: OpenZFS Community Support Mailing list (Linux)
+ url: https://zfsonlinux.topicbox.com/groups/zfs-discuss
+ about: Get community support for OpenZFS on Linux
+ - name: FreeBSD Community Support Mailing list
+ url: https://lists.freebsd.org/mailman/listinfo/freebsd-fs
+ about: Get community support for OpenZFS on FreeBSD
+ - name: OpenZFS on IRC
+ url: https://web.libera.chat/#openzfs
+ about: Use IRC to get community support for OpenZFS
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000000..9b50a4a3d9
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,33 @@
+---
+name: Feature request
+about: Suggest a feature for OpenZFS
+title: ''
+labels: 'Type: Feature'
+assignees: ''
+
+---
+
+
+
+### Describe the feature would like to see added to OpenZFS
+
+
+
+### How will this feature improve OpenZFS?
+
+
+
+### Additional context
+
+
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 699ca90780..465ee182c4 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -4,7 +4,7 @@
### Motivation and Context
@@ -19,6 +19,7 @@ https://github.com/zfsonlinux/zfs/wiki/Buildbot-Options
+
### Types of changes
@@ -27,14 +28,15 @@ https://github.com/zfsonlinux/zfs/wiki/Buildbot-Options
- [ ] Performance enhancement (non-breaking change which improves efficiency)
- [ ] Code cleanup (non-breaking change which makes code smaller or more readable)
- [ ] Breaking change (fix or feature that would cause existing functionality to change)
+- [ ] Library ABI change (libzfs, libzfs\_core, libnvpair, libuutil and libzfsbootenv)
- [ ] Documentation (a change to man pages or other documentation)
### Checklist:
-- [ ] My code follows the ZFS on Linux [code style requirements](https://github.com/zfsonlinux/zfs/blob/master/.github/CONTRIBUTING.md#coding-conventions).
+- [ ] My code follows the OpenZFS [code style requirements](https://github.com/openzfs/zfs/blob/master/.github/CONTRIBUTING.md#coding-conventions).
- [ ] I have updated the documentation accordingly.
-- [ ] I have read the [**contributing** document](https://github.com/zfsonlinux/zfs/blob/master/.github/CONTRIBUTING.md).
-- [ ] I have added [tests](https://github.com/zfsonlinux/zfs/tree/master/tests) to cover my changes.
-- [ ] All new and existing tests passed.
-- [ ] All commit messages are properly formatted and contain [`Signed-off-by`](https://github.com/zfsonlinux/zfs/blob/master/.github/CONTRIBUTING.md#signed-off-by).
+- [ ] I have read the [**contributing** document](https://github.com/openzfs/zfs/blob/master/.github/CONTRIBUTING.md).
+- [ ] I have added [tests](https://github.com/openzfs/zfs/tree/master/tests) to cover my changes.
+- [ ] I have run the ZFS Test Suite with this change applied.
+- [ ] All commit messages are properly formatted and contain [`Signed-off-by`](https://github.com/openzfs/zfs/blob/master/.github/CONTRIBUTING.md#signed-off-by).
diff --git a/.github/codecov.yml b/.github/codecov.yml
index 9ae962639e..6d4932680e 100644
--- a/.github/codecov.yml
+++ b/.github/codecov.yml
@@ -4,7 +4,8 @@ codecov:
after_n_builds: 2 # user and kernel
coverage:
- precision: 2 # 2 digits of precision
+ precision: 0 # 0 decimals of precision
+ round: nearest # Round to nearest precision point
range: "50...90" # red -> yellow -> green
status:
@@ -20,3 +21,5 @@ comment:
layout: "reach, diff, flags, footer"
behavior: once # update if exists; post new; skip if deleted
require_changes: yes # only post when coverage changes
+
+# ignore: Please place any ignores in config/ax_code_coverage.m4 instead
diff --git a/.github/no-response.yml b/.github/no-response.yml
new file mode 100644
index 0000000000..ef2656ec96
--- /dev/null
+++ b/.github/no-response.yml
@@ -0,0 +1,13 @@
+# Configuration for probot-no-response - https://github.com/probot/no-response
+
+# Number of days of inactivity before an Issue is closed for lack of response
+daysUntilClose: 31
+# Label requiring a response
+responseRequiredLabel: "Status: Feedback requested"
+# Comment to post when closing an Issue for lack of response. Set to `false` to disable
+closeComment: >
+ This issue has been automatically closed because there has been no response
+ to our request for more information from the original author. With only the
+ information that is currently in the issue, we don't have enough information
+ to take action. Please reach out if you have or find the answers we need so
+ that we can investigate further.
diff --git a/.github/stale.yml b/.github/stale.yml
new file mode 100644
index 0000000000..895cc8e803
--- /dev/null
+++ b/.github/stale.yml
@@ -0,0 +1,26 @@
+# Number of days of inactivity before an issue becomes stale
+daysUntilStale: 365
+# Number of days of inactivity before a stale issue is closed
+daysUntilClose: 90
+# Limit to only `issues` or `pulls`
+only: issues
+# Issues with these labels will never be considered stale
+exemptLabels:
+ - "Type: Feature"
+ - "Bot: Not Stale"
+ - "Status: Work in Progress"
+# Set to true to ignore issues in a project (defaults to false)
+exemptProjects: true
+# Set to true to ignore issues in a milestone (defaults to false)
+exemptMilestones: true
+# Set to true to ignore issues with an assignee (defaults to false)
+exemptAssignees: true
+# Label to use when marking an issue as stale
+staleLabel: "Status: Stale"
+# Comment to post when marking an issue as stale. Set to `false` to disable
+markComment: >
+ This issue has been automatically marked as "stale" because it has not had
+ any activity for a while. It will be closed in 90 days if no further activity occurs.
+ Thank you for your contributions.
+# Limit the number of actions per hour, from 1-30. Default is 30
+limitPerRun: 6
diff --git a/.github/suppressions.txt b/.github/suppressions.txt
deleted file mode 100644
index f9508a24b4..0000000000
--- a/.github/suppressions.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-preprocessorErrorDirective:./module/zfs/vdev_raidz_math_avx512f.c:243
-preprocessorErrorDirective:./module/zfs/vdev_raidz_math_sse2.c:266
-
diff --git a/.github/workflows/checkstyle.yaml b/.github/workflows/checkstyle.yaml
new file mode 100644
index 0000000000..553d5df397
--- /dev/null
+++ b/.github/workflows/checkstyle.yaml
@@ -0,0 +1,50 @@
+name: checkstyle
+
+on:
+ push:
+ pull_request:
+
+jobs:
+ checkstyle:
+ runs-on: ubuntu-20.04
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ ref: ${{ github.event.pull_request.head.sha }}
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install --yes -qq build-essential autoconf libtool gawk alien fakeroot linux-headers-$(uname -r)
+ sudo apt-get install --yes -qq zlib1g-dev uuid-dev libattr1-dev libblkid-dev libselinux-dev libudev-dev libssl-dev python-dev python-setuptools python-cffi python3 python3-dev python3-setuptools python3-cffi
+ # packages for tests
+ sudo apt-get install --yes -qq parted lsscsi ksh attr acl nfs-kernel-server fio
+ sudo apt-get install --yes -qq mandoc cppcheck pax-utils devscripts
+ sudo -E pip --quiet install flake8
+ - name: Prepare
+ run: |
+ sh ./autogen.sh
+ ./configure
+ make -j$(nproc)
+ - name: Checkstyle
+ run: |
+ make checkstyle
+ - name: Lint
+ run: |
+ make lint
+ - name: CheckABI
+ id: CheckABI
+ run: |
+ sudo docker run -v $(pwd):/source ghcr.io/openzfs/libabigail make checkabi
+ - name: StoreABI
+ if: failure() && steps.CheckABI.outcome == 'failure'
+ run: |
+ sudo docker run -v $(pwd):/source ghcr.io/openzfs/libabigail make storeabi
+ - name: Prepare artifacts
+ if: failure() && steps.CheckABI.outcome == 'failure'
+ run: |
+ find -name *.abi | tar -cf abi_files.tar -T -
+ - uses: actions/upload-artifact@v2
+ if: failure() && steps.CheckABI.outcome == 'failure'
+ with:
+ name: New ABI files (use only if you're sure about interface changes)
+ path: abi_files.tar
diff --git a/.github/workflows/zfs-tests-functional.yml b/.github/workflows/zfs-tests-functional.yml
new file mode 100644
index 0000000000..aad3d552b2
--- /dev/null
+++ b/.github/workflows/zfs-tests-functional.yml
@@ -0,0 +1,82 @@
+name: zfs-tests-functional
+
+on:
+ push:
+ pull_request:
+
+jobs:
+ tests-functional-ubuntu:
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [18.04, 20.04]
+ runs-on: ubuntu-${{ matrix.os }}
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ ref: ${{ github.event.pull_request.head.sha }}
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install --yes -qq build-essential autoconf libtool gdb lcov \
+ git alien fakeroot wget curl bc fio acl \
+ sysstat mdadm lsscsi parted gdebi attr dbench watchdog ksh \
+ nfs-kernel-server samba rng-tools xz-utils \
+ zlib1g-dev uuid-dev libblkid-dev libselinux-dev \
+ xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \
+ libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \
+ libpam0g-dev pamtester python-dev python-setuptools python-cffi \
+ python-packaging python3 python3-dev python3-setuptools python3-cffi \
+ libcurl4-openssl-dev python3-packaging
+ - name: Autogen.sh
+ run: |
+ sh autogen.sh
+ - name: Configure
+ run: |
+ ./configure --enable-debug --enable-debuginfo
+ - name: Make
+ run: |
+ make --no-print-directory -s pkg-utils pkg-kmod
+ - name: Install
+ run: |
+ sudo dpkg -i *.deb
+ # Update order of directories to search for modules, otherwise
+ # Ubuntu will load kernel-shipped ones.
+ sudo sed -i.bak 's/updates/extra updates/' /etc/depmod.d/ubuntu.conf
+ sudo depmod
+ sudo modprobe zfs
+ # Workaround for cloud-init bug
+ # see https://github.com/openzfs/zfs/issues/12644
+ FILE=/lib/udev/rules.d/10-cloud-init-hook-hotplug.rules
+ if [ -r "${FILE}" ]; then
+ HASH=$(md5sum "${FILE}" | awk '{ print $1 }')
+ if [ "${HASH}" = "121ff0ef1936cd2ef65aec0458a35772" ]; then
+ # Just shove a zd* exclusion right above the hotplug hook...
+ sudo sed -i -e s/'LABEL="cloudinit_hook"'/'KERNEL=="zd*", GOTO="cloudinit_end"\n&'/ "${FILE}"
+ sudo udevadm control --reload-rules
+ fi
+ fi
+ # Workaround to provide additional free space for testing.
+ # https://github.com/actions/virtual-environments/issues/2840
+ sudo rm -rf /usr/share/dotnet
+ sudo rm -rf /opt/ghc
+ sudo rm -rf "/usr/local/share/boost"
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+ - name: Tests
+ run: |
+ /usr/share/zfs/zfs-tests.sh -v -s 3G
+ - name: Prepare artifacts
+ if: failure()
+ run: |
+ RESULTS_PATH=$(readlink -f /var/tmp/test_results/current)
+ sudo dmesg > $RESULTS_PATH/dmesg
+ sudo cp /var/log/syslog $RESULTS_PATH/
+ sudo chmod +r $RESULTS_PATH/*
+ # Replace ':' in dir names, actions/upload-artifact doesn't support it
+ for f in $(find $RESULTS_PATH -name '*:*'); do mv "$f" "${f//:/__}"; done
+ - uses: actions/upload-artifact@v2
+ if: failure()
+ with:
+ name: Test logs Ubuntu-${{ matrix.os }}
+ path: /var/tmp/test_results/20*/
+ if-no-files-found: ignore
diff --git a/.github/workflows/zfs-tests-sanity.yml b/.github/workflows/zfs-tests-sanity.yml
new file mode 100644
index 0000000000..4df49461ed
--- /dev/null
+++ b/.github/workflows/zfs-tests-sanity.yml
@@ -0,0 +1,78 @@
+name: zfs-tests-sanity
+
+on:
+ push:
+ pull_request:
+
+jobs:
+ tests:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ ref: ${{ github.event.pull_request.head.sha }}
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install --yes -qq build-essential autoconf libtool gdb lcov \
+ git alien fakeroot wget curl bc fio acl \
+ sysstat mdadm lsscsi parted gdebi attr dbench watchdog ksh \
+ nfs-kernel-server samba rng-tools xz-utils \
+ zlib1g-dev uuid-dev libblkid-dev libselinux-dev \
+ xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \
+ libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \
+ libpam0g-dev pamtester python-dev python-setuptools python-cffi \
+ python-packaging python3 python3-dev python3-setuptools python3-cffi \
+ python3-packaging libcurl4-openssl-dev
+ - name: Autogen.sh
+ run: |
+ sh autogen.sh
+ - name: Configure
+ run: |
+ ./configure --enable-debug --enable-debuginfo
+ - name: Make
+ run: |
+ make --no-print-directory -s pkg-utils pkg-kmod
+ - name: Install
+ run: |
+ sudo dpkg -i *.deb
+ # Update order of directories to search for modules, otherwise
+ # Ubuntu will load kernel-shipped ones.
+ sudo sed -i.bak 's/updates/extra updates/' /etc/depmod.d/ubuntu.conf
+ sudo depmod
+ sudo modprobe zfs
+ # Workaround for cloud-init bug
+ # see https://github.com/openzfs/zfs/issues/12644
+ FILE=/lib/udev/rules.d/10-cloud-init-hook-hotplug.rules
+ if [ -r "${FILE}" ]; then
+ HASH=$(md5sum "${FILE}" | awk '{ print $1 }')
+ if [ "${HASH}" = "121ff0ef1936cd2ef65aec0458a35772" ]; then
+ # Just shove a zd* exclusion right above the hotplug hook...
+ sudo sed -i -e s/'LABEL="cloudinit_hook"'/'KERNEL=="zd*", GOTO="cloudinit_end"\n&'/ "${FILE}"
+ sudo udevadm control --reload-rules
+ fi
+ fi
+ # Workaround to provide additional free space for testing.
+ # https://github.com/actions/virtual-environments/issues/2840
+ sudo rm -rf /usr/share/dotnet
+ sudo rm -rf /opt/ghc
+ sudo rm -rf "/usr/local/share/boost"
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+ - name: Tests
+ run: |
+ /usr/share/zfs/zfs-tests.sh -v -s 3G -r sanity
+ - name: Prepare artifacts
+ if: failure()
+ run: |
+ RESULTS_PATH=$(readlink -f /var/tmp/test_results/current)
+ sudo dmesg > $RESULTS_PATH/dmesg
+ sudo cp /var/log/syslog $RESULTS_PATH/
+ sudo chmod +r $RESULTS_PATH/*
+ # Replace ':' in dir names, actions/upload-artifact doesn't support it
+ for f in $(find $RESULTS_PATH -name '*:*'); do mv "$f" "${f//:/__}"; done
+ - uses: actions/upload-artifact@v2
+ if: failure()
+ with:
+ name: Test logs
+ path: /var/tmp/test_results/20*/
+ if-no-files-found: ignore
diff --git a/.github/workflows/zloop.yml b/.github/workflows/zloop.yml
new file mode 100644
index 0000000000..cf81ad4bca
--- /dev/null
+++ b/.github/workflows/zloop.yml
@@ -0,0 +1,67 @@
+name: zloop
+
+on:
+ push:
+ pull_request:
+
+jobs:
+ tests:
+ runs-on: ubuntu-latest
+ env:
+ TEST_DIR: /var/tmp/zloop
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ ref: ${{ github.event.pull_request.head.sha }}
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install --yes -qq build-essential autoconf libtool gdb \
+ git alien fakeroot \
+ zlib1g-dev uuid-dev libblkid-dev libselinux-dev \
+ xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \
+ libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \
+ libpam0g-dev \
+ python-dev python-setuptools python-cffi python-packaging \
+ python3 python3-dev python3-setuptools python3-cffi python3-packaging
+ - name: Autogen.sh
+ run: |
+ sh autogen.sh
+ - name: Configure
+ run: |
+ ./configure --enable-debug --enable-debuginfo
+ - name: Make
+ run: |
+ make --no-print-directory -s pkg-utils pkg-kmod
+ - name: Install
+ run: |
+ sudo dpkg -i *.deb
+ # Update order of directories to search for modules, otherwise
+ # Ubuntu will load kernel-shipped ones.
+ sudo sed -i.bak 's/updates/extra updates/' /etc/depmod.d/ubuntu.conf
+ sudo depmod
+ sudo modprobe zfs
+ - name: Tests
+ run: |
+ sudo mkdir -p $TEST_DIR
+ # run for 20 minutes to have a total runner time of 30 minutes
+ sudo /usr/share/zfs/zloop.sh -t 1200 -l -m1 -- -T 120 -P 60
+ - name: Prepare artifacts
+ if: failure()
+ run: |
+ sudo chmod +r -R $TEST_DIR/
+ - uses: actions/upload-artifact@v2
+ if: failure()
+ with:
+ name: Logs
+ path: |
+ /var/tmp/zloop/*/
+ !/var/tmp/zloop/*/vdev/
+ if-no-files-found: ignore
+ - uses: actions/upload-artifact@v2
+ if: failure()
+ with:
+ name: Pool files
+ path: |
+ /var/tmp/zloop/*/vdev/
+ if-no-files-found: ignore
diff --git a/.gitignore b/.gitignore
index 549fa59f38..056bbb8f08 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@ Makefile.in
# Top level generated files specific to this top level dir
#
/bin
+/build
/configure
/config.log
/config.status
@@ -61,5 +62,9 @@ cscope.*
*.patch
*.orig
*.log
+*.tmp
venv
+*.so
+*.so.debug
+*.so.full
diff --git a/.gitmodules b/.gitmodules
index d400f10a7e..9eaa2b0495 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
[submodule "scripts/zfs-images"]
path = scripts/zfs-images
- url = https://github.com/zfsonlinux/zfs-images
+ url = https://github.com/openzfs/zfs-images
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 620c0432e2..0000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-language: c
-sudo: required
-env:
- global:
- # Travis limits maximum log size, we have to cut tests output
- - ZFS_TEST_TRAVIS_LOG_MAX_LENGTH=800
- matrix:
- # tags are mainly in ascending order
- - ZFS_TEST_TAGS='acl,atime,bootfs,cachefile,casenorm,chattr,checksum,clean_mirror,compression,ctime,delegate,devices,events,exec,fault,features,grow_pool,zdb,zfs,zfs_bookmark,zfs_change-key,zfs_clone,zfs_copies,zfs_create,zfs_diff,zfs_get,zfs_inherit,zfs_load-key,zfs_rename'
- - ZFS_TEST_TAGS='cache,history,hkdf,inuse,zfs_property,zfs_receive,zfs_reservation,zfs_send,zfs_set,zfs_share,zfs_snapshot,zfs_unload-key,zfs_unmount,zfs_unshare,zfs_upgrade,zpool,zpool_add,zpool_attach,zpool_clear,zpool_create,zpool_destroy,zpool_detach'
- - ZFS_TEST_TAGS='grow_replicas,mv_files,cli_user,zfs_mount,zfs_promote,zfs_rollback,zpool_events,zpool_expand,zpool_export,zpool_get,zpool_history,zpool_import,zpool_labelclear,zpool_offline,zpool_online,zpool_remove,zpool_reopen,zpool_replace,zpool_scrub,zpool_set,zpool_status,zpool_sync,zpool_upgrade'
- - ZFS_TEST_TAGS='zfs_destroy,large_files,largest_pool,link_count,migration,mmap,mmp,mount,nestedfs,no_space,nopwrite,online_offline,pool_names,poolversion,privilege,quota,raidz,redundancy,rsend'
- - ZFS_TEST_TAGS='inheritance,refquota,refreserv,rename_dirs,replacement,reservation,rootpool,scrub_mirror,slog,snapshot,snapused,sparse,threadsappend,tmpfile,truncate,upgrade,userquota,vdev_zaps,write_dirs,xattr,zvol,libzfs'
-before_install:
- - sudo apt-get -qq update
- - sudo apt-get install --yes -qq build-essential autoconf libtool gawk alien fakeroot linux-headers-$(uname -r)
- - sudo apt-get install --yes -qq zlib1g-dev uuid-dev libattr1-dev libblkid-dev libselinux-dev libudev-dev libssl-dev
- # packages for tests
- - sudo apt-get install --yes -qq parted lsscsi ksh attr acl nfs-kernel-server fio
-install:
- - git clone --depth=1 https://github.com/zfsonlinux/spl
- - cd spl
- - git checkout master
- - sh autogen.sh
- - ./configure
- - make --no-print-directory -s pkg-utils pkg-kmod
- - sudo dpkg -i *.deb
- - cd ..
- - sh autogen.sh
- - ./configure
- - make --no-print-directory -s pkg-utils pkg-kmod
- - sudo dpkg -i *.deb
-script:
- - travis_wait 50 /usr/share/zfs/zfs-tests.sh -v -T $ZFS_TEST_TAGS
-after_failure:
- - find /var/tmp/test_results/current/log -type f -name '*' -printf "%f\n" -exec cut -c -$ZFS_TEST_TRAVIS_LOG_MAX_LENGTH {} \;
-after_success:
- - find /var/tmp/test_results/current/log -type f -name '*' -printf "%f\n" -exec cut -c -$ZFS_TEST_TRAVIS_LOG_MAX_LENGTH {} \;
diff --git a/AUTHORS b/AUTHORS
index 8314a1c214..aab8bf29c9 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -83,6 +83,7 @@ CONTRIBUTORS:
Christopher Voltz
Chunwei Chen
Clemens Fruhwirth
+ Coleman Kane
Colin Ian King
Craig Loomis
Craig Sanders
@@ -181,6 +182,7 @@ CONTRIBUTORS:
Keith M Wesolowski
Kevin Tanguy
KireinaHoro
+ Kjeld Schouten-Lebbing
Kohsuke Kawaguchi
Kyle Blatter
Kyle Fuller
@@ -209,6 +211,7 @@ CONTRIBUTORS:
Michael Gebetsroither
Michael Kjorling
Michael Martin
+ Michael Niewöhner
Mike Gerdts
Mike Harsch
Mike Leddy
@@ -257,6 +260,7 @@ CONTRIBUTORS:
Saso Kiselkov
Scot W. Stevenson
Sean Eric Fagan
+ Sebastian Gottschall
Sen Haerens
Serapheim Dimitropoulos
Seth Forshee
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index d314a66b4e..2dcc251e55 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -1,2 +1,2 @@
The [OpenZFS Code of Conduct](http://www.open-zfs.org/wiki/Code_of_Conduct)
-applies to spaces associated with the ZFS on Linux project, including GitHub.
+applies to spaces associated with the OpenZFS project, including GitHub.
diff --git a/COPYRIGHT b/COPYRIGHT
index 54fbceade1..85556b542f 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -19,7 +19,11 @@ notable exceptions and their respective licenses include:
* AES Implementation: module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman
* AES Implementation: module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl
* PBKDF2 Implementation: lib/libzfs/THIRDPARTYLICENSE.openssl
- * SPL Implementation: module/spl/THIRDPARTYLICENSE.gplv2
+ * SPL Implementation: module/os/linux/spl/THIRDPARTYLICENSE.gplv2
+ * GCM Implementation: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
+ * GCM Implementation: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
+ * GHASH Implementation: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
+ * GHASH Implementation: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
This product includes software developed by the OpenSSL Project for use
in the OpenSSL Toolkit (http://www.openssl.org/)
diff --git a/META b/META
index a93750eebd..8dacb8082f 100644
--- a/META
+++ b/META
@@ -1,10 +1,10 @@
Meta: 1
Name: zfs
Branch: 1.0
-Version: 0.8.0
+Version: 2.1.99
Release: 1
Release-Tags: relext
License: CDDL
-Author: OpenZFS on Linux
-Linux-Maximum: 5.1
-Linux-Minimum: 2.6.32
+Author: OpenZFS
+Linux-Maximum: 5.14
+Linux-Minimum: 3.10
diff --git a/Makefile.am b/Makefile.am
index 1ec2514922..34fe16ce41 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,12 +1,17 @@
+include $(top_srcdir)/config/Shellcheck.am
+
ACLOCAL_AMFLAGS = -I config
-include config/rpm.am
-include config/deb.am
-include config/tgz.am
+SUBDIRS = include
+if BUILD_LINUX
+SUBDIRS += rpm
+endif
-SUBDIRS = include rpm
if CONFIG_USER
-SUBDIRS += udev etc man scripts lib tests cmd contrib
+SUBDIRS += man scripts lib tests cmd etc contrib
+if BUILD_LINUX
+SUBDIRS += udev
+endif
endif
if CONFIG_KERNEL
SUBDIRS += module
@@ -14,33 +19,51 @@ SUBDIRS += module
extradir = $(prefix)/src/zfs-$(VERSION)
extra_HEADERS = zfs.release.in zfs_config.h.in
+if BUILD_LINUX
kerneldir = $(prefix)/src/zfs-$(VERSION)/$(LINUX_VERSION)
nodist_kernel_HEADERS = zfs.release zfs_config.h module/$(LINUX_SYMBOLS)
endif
+endif
AUTOMAKE_OPTIONS = foreign
EXTRA_DIST = autogen.sh copy-builtin
EXTRA_DIST += config/config.awk config/rpm.am config/deb.am config/tgz.am
-EXTRA_DIST += META AUTHORS COPYRIGHT LICENSE NEWS NOTICE README.md
-EXTRA_DIST += CODE_OF_CONDUCT.md
+EXTRA_DIST += AUTHORS CODE_OF_CONDUCT.md COPYRIGHT LICENSE META NEWS NOTICE
+EXTRA_DIST += README.md RELEASES.md
+EXTRA_DIST += module/lua/README.zfs module/os/linux/spl/README.md
# Include all the extra licensing information for modules
-EXTRA_DIST += module/icp/algs/skein/THIRDPARTYLICENSE module/icp/algs/skein/THIRDPARTYLICENSE.descrip
-EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip
-EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip
-EXTRA_DIST += module/spl/THIRDPARTYLICENSE.gplv2 module/spl/THIRDPARTYLICENSE.gplv2.descrip
-EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash module/zfs/THIRDPARTYLICENSE.cityhash.descrip
+EXTRA_DIST += module/icp/algs/skein/THIRDPARTYLICENSE
+EXTRA_DIST += module/icp/algs/skein/THIRDPARTYLICENSE.descrip
+EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman
+EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip
+EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl
+EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip
+EXTRA_DIST += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
+EXTRA_DIST += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip
+EXTRA_DIST += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
+EXTRA_DIST += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip
+EXTRA_DIST += module/os/linux/spl/THIRDPARTYLICENSE.gplv2
+EXTRA_DIST += module/os/linux/spl/THIRDPARTYLICENSE.gplv2.descrip
+EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash
+EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash.descrip
@CODE_COVERAGE_RULES@
-.PHONY: gitrev
-gitrev:
- -${top_srcdir}/scripts/make_gitrev.sh
+GITREV = include/zfs_gitrev.h
-BUILT_SOURCES = gitrev
+PHONY = gitrev
+gitrev:
+ $(AM_V_GEN)$(top_srcdir)/scripts/make_gitrev.sh $(GITREV)
+
+all: gitrev
+
+# Double-colon rules are allowed; there are multiple independent definitions.
+maintainer-clean-local::
+ -$(RM) $(GITREV)
distclean-local::
- -$(RM) -R autom4te*.cache
+ -$(RM) -R autom4te*.cache build
-find . \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS \
-o -name .pc -o -name .hg -o -name .git \) -prune -o \
\( -name '*.orig' -o -name '*.rej' -o -name '*~' \
@@ -52,13 +75,15 @@ distclean-local::
-type f -print | xargs $(RM)
all-local:
- -${top_srcdir}/scripts/zfs-tests.sh -c
+ -[ -x ${top_builddir}/scripts/zfs-tests.sh ] && \
+ ${top_builddir}/scripts/zfs-tests.sh -c
-dist-hook: gitrev
- cp ${top_srcdir}/include/zfs_gitrev.h $(distdir)/include; \
- sed -i 's/Release:[[:print:]]*/Release: $(RELEASE)/' \
+dist-hook:
+ $(AM_V_GEN)$(top_srcdir)/scripts/make_gitrev.sh -D $(distdir) $(GITREV)
+ $(SED) ${ac_inplace} -e 's/Release:[[:print:]]*/Release: $(RELEASE)/' \
$(distdir)/META
+if BUILD_LINUX
# For compatibility, create a matching spl-x.y.z directly which contains
# symlinks to the updated header and object file locations. These
# compatibility links will be removed in the next major release.
@@ -75,75 +100,102 @@ install-data-hook:
ln -fs zfs_config.h spl_config.h && \
ln -fs zfs.release spl.release
endif
+endif
-codecheck: cstyle shellcheck flake8 mancheck testscheck vcscheck
+PHONY += codecheck
+codecheck: cstyle shellcheck checkbashisms flake8 mancheck testscheck vcscheck
+PHONY += checkstyle
checkstyle: codecheck commitcheck
+PHONY += commitcheck
commitcheck:
@if git rev-parse --git-dir > /dev/null 2>&1; then \
${top_srcdir}/scripts/commitcheck.sh; \
fi
+PHONY += cstyle
cstyle:
- @find ${top_srcdir} -name '*.[hc]' ! -name 'zfs_config.*' \
- ! -name '*.mod.c' -type f \
+ @find ${top_srcdir} -name build -prune \
+ -o -type f -name '*.[hc]' \
+ ! -name 'zfs_config.*' ! -name '*.mod.c' \
+ ! -name 'opt_global.h' ! -name '*_if*.h' \
+ ! -path './module/zstd/lib/*' \
-exec ${top_srcdir}/scripts/cstyle.pl -cpP {} \+
-shellcheck:
- @if type shellcheck > /dev/null 2>&1; then \
- shellcheck --exclude=SC1090 --format=gcc \
- $$(find ${top_srcdir}/scripts/*.sh -type f) \
- $$(find ${top_srcdir}/cmd/zed/zed.d/*.sh -type f) \
- $$(find ${top_srcdir}/cmd/zpool/zpool.d/* -executable); \
- else \
- echo "skipping shellcheck because shellcheck is not installed"; \
- fi
+filter_executable = -exec test -x '{}' \; -print
+SHELLCHECKDIRS = cmd contrib etc scripts tests
+SHELLCHECKSCRIPTS = autogen.sh
+
+PHONY += checkabi storeabi
+
+checklibabiversion:
+ libabiversion=`abidw -v | $(SED) 's/[^0-9]//g'`; \
+ if test $$libabiversion -lt "200"; then \
+ /bin/echo -e "\n" \
+ "*** Please use libabigail 2.0.0 version or newer;\n" \
+ "*** otherwise results are not consistent!\n" \
+ "(or see https://github.com/openzfs/libabigail-docker )\n"; \
+ exit 1; \
+ fi;
+
+checkabi: checklibabiversion lib
+ $(MAKE) -C lib checkabi
+
+storeabi: checklibabiversion lib
+ $(MAKE) -C lib storeabi
+
+PHONY += mancheck
mancheck:
- @if type mandoc > /dev/null 2>&1; then \
- find ${top_srcdir}/man/man8 -type f -name 'zfs.8' \
- -o -name 'zpool.8' -o -name 'zdb.8' \
- -o -name 'zgenhostid.8' | \
- xargs mandoc -Tlint -Werror; \
- else \
- echo "skipping mancheck because mandoc is not installed"; \
- fi
+ ${top_srcdir}/scripts/mancheck.sh ${top_srcdir}/man ${top_srcdir}/tests/test-runner/man
+if BUILD_LINUX
+stat_fmt = -c '%A %n'
+else
+stat_fmt = -f '%Sp %N'
+endif
+
+PHONY += testscheck
testscheck:
@find ${top_srcdir}/tests/zfs-tests -type f \
- \( -name '*.ksh' -not -executable \) -o \
- \( -name '*.kshlib' -executable \) -o \
- \( -name '*.shlib' -executable \) -o \
- \( -name '*.cfg' -executable \) | \
- xargs -r stat -c '%A %n' | \
+ \( -name '*.ksh' -not ${filter_executable} \) -o \
+ \( -name '*.kshlib' ${filter_executable} \) -o \
+ \( -name '*.shlib' ${filter_executable} \) -o \
+ \( -name '*.cfg' ${filter_executable} \) | \
+ xargs -r stat ${stat_fmt} | \
awk '{c++; print} END {if(c>0) exit 1}'
+PHONY += vcscheck
vcscheck:
@if git rev-parse --git-dir > /dev/null 2>&1; then \
git ls-files . --exclude-standard --others | \
awk '{c++; print} END {if(c>0) exit 1}' ; \
fi
+PHONY += lint
lint: cppcheck paxcheck
-cppcheck:
- @if type cppcheck > /dev/null 2>&1; then \
- cppcheck --quiet --force --error-exitcode=2 --inline-suppr \
- --suppressions-list=.github/suppressions.txt \
- -UHAVE_SSE2 -UHAVE_AVX512F -UHAVE_UIO_ZEROCOPY \
- ${top_srcdir}; \
+CPPCHECKDIRS = cmd lib module
+PHONY += cppcheck
+cppcheck: $(CPPCHECKDIRS)
+ @if test -n "$(CPPCHECK)"; then \
+ set -e ; for dir in $(CPPCHECKDIRS) ; do \
+ $(MAKE) -C $$dir cppcheck ; \
+ done \
else \
echo "skipping cppcheck because cppcheck is not installed"; \
fi
+PHONY += paxcheck
paxcheck:
@if type scanelf > /dev/null 2>&1; then \
- ${top_srcdir}/scripts/paxcheck.sh ${top_srcdir}; \
+ ${top_srcdir}/scripts/paxcheck.sh ${top_builddir}; \
else \
echo "skipping paxcheck because scanelf is not installed"; \
fi
+PHONY += flake8
flake8:
@if type flake8 > /dev/null 2>&1; then \
flake8 ${top_srcdir}; \
@@ -151,17 +203,34 @@ flake8:
echo "skipping flake8 because flake8 is not installed"; \
fi
+PHONY += ctags
ctags:
$(RM) tags
- find $(top_srcdir) -name .git -prune -o -name '*.[hc]' | xargs ctags
+ find $(top_srcdir) -name '.?*' -prune \
+ -o -type f -name '*.[hcS]' -print | xargs ctags -a
+PHONY += etags
etags:
$(RM) TAGS
- find $(top_srcdir) -name .pc -prune -o -name '*.[hc]' | xargs etags -a
+ find $(top_srcdir) -name '.?*' -prune \
+ -o -type f -name '*.[hcS]' -print | xargs etags -a
+PHONY += cscopelist
+cscopelist:
+ find $(top_srcdir) -name '.?*' -prune \
+ -o -type f -name '*.[hc]' -print >cscope.files
+
+PHONY += tags
tags: ctags etags
+PHONY += pkg pkg-dkms pkg-kmod pkg-utils
pkg: @DEFAULT_PACKAGE@
pkg-dkms: @DEFAULT_PACKAGE@-dkms
pkg-kmod: @DEFAULT_PACKAGE@-kmod
pkg-utils: @DEFAULT_PACKAGE@-utils
+
+include config/rpm.am
+include config/deb.am
+include config/tgz.am
+
+.PHONY: $(PHONY)
diff --git a/NEWS b/NEWS
index bbdc2b69bb..3907ce5326 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,3 @@
Descriptions of all releases can be found on github:
-https://github.com/zfsonlinux/zfs/releases
+https://github.com/openzfs/zfs/releases
diff --git a/README.md b/README.md
index 59d167f8ec..d666df7af3 100644
--- a/README.md
+++ b/README.md
@@ -1,31 +1,35 @@
-
+
-ZFS on Linux is an advanced file system and volume manager which was originally
+OpenZFS is an advanced file system and volume manager which was originally
developed for Solaris and is now maintained by the OpenZFS community.
+This repository contains the code for running OpenZFS on Linux and FreeBSD.
-[](https://codecov.io/gh/zfsonlinux/zfs)
-[](https://scan.coverity.com/projects/zfsonlinux-zfs)
+[](https://codecov.io/gh/openzfs/zfs)
+[](https://scan.coverity.com/projects/openzfs-zfs)
# Official Resources
- * [Site](http://zfsonlinux.org)
- * [Wiki](https://github.com/zfsonlinux/zfs/wiki)
- * [Mailing lists](https://github.com/zfsonlinux/zfs/wiki/Mailing-Lists)
- * [OpenZFS site](http://open-zfs.org/)
+ * [Documentation](https://openzfs.github.io/openzfs-docs/) - for using and developing this repo
+ * [ZoL Site](https://zfsonlinux.org) - Linux release info & links
+ * [Mailing lists](https://openzfs.github.io/openzfs-docs/Project%20and%20Community/Mailing%20Lists.html)
+ * [OpenZFS site](http://open-zfs.org/) - for conference videos and info on other platforms (illumos, OSX, Windows, etc)
# Installation
-Full documentation for installing ZoL on your favorite Linux distribution can
-be found at [our site](http://zfsonlinux.org/).
+Full documentation for installing OpenZFS on your favorite operating system can
+be found at the [Getting Started Page](https://openzfs.github.io/openzfs-docs/Getting%20Started/index.html).
# Contribute & Develop
We have a separate document with [contribution guidelines](./.github/CONTRIBUTING.md).
+We have a [Code of Conduct](./CODE_OF_CONDUCT.md).
+
# Release
-ZFS on Linux is released under a CDDL license.
+OpenZFS is released under a CDDL license.
For more details see the NOTICE, LICENSE and COPYRIGHT files; `UCRL-CODE-235197`
# Supported Kernels
- * The `META` file contains the officially recognized supported kernel versions.
+ * The `META` file contains the officially recognized supported Linux kernel versions.
+ * Supported FreeBSD versions are any supported branches and releases starting from 12.2-RELEASE.
diff --git a/RELEASES.md b/RELEASES.md
new file mode 100644
index 0000000000..55bfdb80ef
--- /dev/null
+++ b/RELEASES.md
@@ -0,0 +1,37 @@
+OpenZFS uses the MAJOR.MINOR.PATCH versioning scheme described here:
+
+ * MAJOR - Incremented at the discretion of the OpenZFS developers to indicate
+ a particularly noteworthy feature or change. An increase in MAJOR number
+ does not indicate any incompatible on-disk format change. The ability
+ to import a ZFS pool is controlled by the feature flags enabled on the
+ pool and the feature flags supported by the installed OpenZFS version.
+ Increasing the MAJOR version is expected to be an infrequent occurrence.
+
+ * MINOR - Incremented to indicate new functionality such as a new feature
+ flag, pool/dataset property, zfs/zpool sub-command, new user/kernel
+ interface, etc. MINOR releases may introduce incompatible changes to the
+ user space library APIs (libzfs.so). Existing user/kernel interfaces are
+ considered to be stable to maximize compatibility between OpenZFS releases.
+ Additions to the user/kernel interface are backwards compatible.
+
+ * PATCH - Incremented when applying documentation updates, important bug
+ fixes, minor performance improvements, and kernel compatibility patches.
+ The user space library APIs and user/kernel interface are considered to
+ be stable. PATCH releases for a MAJOR.MINOR are published as needed.
+
+Two release branches are maintained for OpenZFS, they are:
+
+ * OpenZFS LTS - A designated MAJOR.MINOR release with periodic PATCH
+ releases that incorporate important changes backported from newer OpenZFS
+ releases. This branch is intended for use in environments using an
+ LTS, enterprise, or similarly managed kernel (RHEL, Ubuntu LTS, Debian).
+ Minor changes to support these distribution kernels will be applied as
+ needed. New kernel versions released after the OpenZFS LTS release are
+ not supported. LTS releases will receive patches for at least 2 years.
+ The current LTS release is OpenZFS 2.1.
+
+ * OpenZFS current - Tracks the newest MAJOR.MINOR release. This branch
+ includes support for the latest OpenZFS features and recently releases
+ kernels. When a new MINOR release is tagged the previous MINOR release
+ will no longer be maintained (unless it is an LTS release). New MINOR
+ releases are planned to occur roughly annually.
diff --git a/TEST b/TEST
index ebe6ef963f..376d6eb691 100644
--- a/TEST
+++ b/TEST
@@ -48,64 +48,3 @@
#TEST_ZFSSTRESS_VDEV="/var/tmp/vdev"
#TEST_ZFSSTRESS_DIR="/$TEST_ZFSSTRESS_POOL/$TEST_ZFSSTRESS_FS"
#TEST_ZFSSTRESS_OPTIONS=""
-
-### per-builder customization
-#
-# BB_NAME=builder-name
-# - distribution=Amazon,Debian,Fedora,RHEL,SUSE,Ubuntu
-# - version=x.y
-# - architecture=x86_64,i686,arm,aarch64
-# - type=build,test
-#
-case "$BB_NAME" in
-Amazon*)
- # ZFS enabled xfstests fails to build
- TEST_XFSTESTS_SKIP="yes"
- ;;
-CentOS-7*)
- # ZFS enabled xfstests fails to build
- TEST_XFSTESTS_SKIP="yes"
- ;;
-CentOS-6*)
- ;;
-Debian*)
- ;;
-Fedora*)
- ;;
-RHEL*)
- ;;
-SUSE*)
- ;;
-Ubuntu-16.04*)
- # ZFS enabled xfstests fails to build
- TEST_XFSTESTS_SKIP="yes"
- ;;
-Ubuntu*)
- ;;
-*)
- ;;
-esac
-
-###
-#
-# Run ztest longer on the "coverage" builders to gain more code coverage
-# data out of ztest, libzpool, etc.
-#
-case "$BB_NAME" in
-*coverage*)
- TEST_ZTEST_TIMEOUT=3600
- ;;
-*)
- TEST_ZTEST_TIMEOUT=900
- ;;
-esac
-
-###
-#
-# Disable the following test suites on 32-bit systems.
-#
-if [ $(getconf LONG_BIT) = "32" ]; then
- TEST_ZTEST_SKIP="yes"
- TEST_XFSTESTS_SKIP="yes"
- TEST_ZFSSTRESS_SKIP="yes"
-fi
diff --git a/cmd/Makefile.am b/cmd/Makefile.am
index 9dd7b8b4f0..5fc9e83971 100644
--- a/cmd/Makefile.am
+++ b/cmd/Makefile.am
@@ -1,3 +1,27 @@
-SUBDIRS = zfs zpool zdb zhack zinject zstreamdump ztest
-SUBDIRS += mount_zfs fsck_zfs zvol_id vdev_id arcstat dbufstat zed
-SUBDIRS += arc_summary raidz_test zgenhostid
+include $(top_srcdir)/config/Shellcheck.am
+
+SUBDIRS = zfs zpool zdb zhack zinject zstream ztest
+SUBDIRS += fsck_zfs vdev_id raidz_test zfs_ids_to_path
+SUBDIRS += zpool_influxdb
+
+CPPCHECKDIRS = zfs zpool zdb zhack zinject zstream ztest
+CPPCHECKDIRS += raidz_test zfs_ids_to_path zpool_influxdb
+
+# TODO: #12084: SHELLCHECKDIRS = fsck_zfs vdev_id zpool
+SHELLCHECKDIRS = fsck_zfs zpool
+
+if USING_PYTHON
+SUBDIRS += arcstat arc_summary dbufstat
+endif
+
+if BUILD_LINUX
+SUBDIRS += mount_zfs zed zgenhostid zvol_id zvol_wait
+CPPCHECKDIRS += mount_zfs zed zgenhostid zvol_id
+SHELLCHECKDIRS += zed
+endif
+
+PHONY = cppcheck
+cppcheck: $(CPPCHECKDIRS)
+ set -e ; for dir in $(CPPCHECKDIRS) ; do \
+ $(MAKE) -C $$dir cppcheck ; \
+ done
diff --git a/cmd/arc_summary/.gitignore b/cmd/arc_summary/.gitignore
new file mode 100644
index 0000000000..50ba15f034
--- /dev/null
+++ b/cmd/arc_summary/.gitignore
@@ -0,0 +1 @@
+arc_summary
diff --git a/cmd/arc_summary/Makefile.am b/cmd/arc_summary/Makefile.am
index a83edffadc..1a26c2c199 100644
--- a/cmd/arc_summary/Makefile.am
+++ b/cmd/arc_summary/Makefile.am
@@ -1,13 +1,13 @@
+bin_SCRIPTS = arc_summary
+
+CLEANFILES = arc_summary
EXTRA_DIST = arc_summary2 arc_summary3
if USING_PYTHON_2
-dist_bin_SCRIPTS = arc_summary2
-install-exec-hook:
- mv $(DESTDIR)$(bindir)/arc_summary2 $(DESTDIR)$(bindir)/arc_summary
+SCRIPT = arc_summary2
+else
+SCRIPT = arc_summary3
endif
-if USING_PYTHON_3
-dist_bin_SCRIPTS = arc_summary3
-install-exec-hook:
- mv $(DESTDIR)$(bindir)/arc_summary3 $(DESTDIR)$(bindir)/arc_summary
-endif
+arc_summary: $(SCRIPT)
+ cp $< $@
diff --git a/cmd/arc_summary/arc_summary2 b/cmd/arc_summary/arc_summary2
index ab4a3c574a..3302a802d1 100755
--- a/cmd/arc_summary/arc_summary2
+++ b/cmd/arc_summary/arc_summary2
@@ -1,4 +1,4 @@
-#!/usr/bin/python2
+#!/usr/bin/env python2
#
# $Id: arc_summary.pl,v 388:e27800740aa2 2011-07-08 02:53:29Z jhell $
#
@@ -42,7 +42,7 @@
Provides basic information on the ARC, its efficiency, the L2ARC (if present),
the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See the
in-source documentation and code at
-https://github.com/zfsonlinux/zfs/blob/master/module/zfs/arc.c for details.
+https://github.com/openzfs/zfs/blob/master/module/zfs/arc.c for details.
"""
import getopt
@@ -54,46 +54,64 @@ import errno
from subprocess import Popen, PIPE
from decimal import Decimal as D
+
+if sys.platform.startswith('freebsd'):
+ # Requires py27-sysctl on FreeBSD
+ import sysctl
+
+ def is_value(ctl):
+ return ctl.type != sysctl.CTLTYPE_NODE
+
+ def load_kstats(namespace):
+ """Collect information on a specific subsystem of the ARC"""
+
+ base = 'kstat.zfs.misc.%s.' % namespace
+ fmt = lambda kstat: (kstat.name, D(kstat.value))
+ kstats = sysctl.filter(base)
+ return [fmt(kstat) for kstat in kstats if is_value(kstat)]
+
+ def load_tunables():
+ ctls = sysctl.filter('vfs.zfs')
+ return dict((ctl.name, ctl.value) for ctl in ctls if is_value(ctl))
+
+elif sys.platform.startswith('linux'):
+
+ def load_kstats(namespace):
+ """Collect information on a specific subsystem of the ARC"""
+
+ kstat = 'kstat.zfs.misc.%s.%%s' % namespace
+ path = '/proc/spl/kstat/zfs/%s' % namespace
+ with open(path) as f:
+ entries = [line.strip().split() for line in f][2:] # Skip header
+ return [(kstat % name, D(value)) for name, _, value in entries]
+
+ def load_tunables():
+ basepath = '/sys/module/zfs/parameters'
+ tunables = {}
+ for name in os.listdir(basepath):
+ if not name:
+ continue
+ path = '%s/%s' % (basepath, name)
+ with open(path) as f:
+ value = f.read()
+ tunables[name] = value.strip()
+ return tunables
+
+
show_tunable_descriptions = False
alternate_tunable_layout = False
-def handle_Exception(ex_cls, ex, tb):
- if ex is IOError:
- if ex.errno == errno.EPIPE:
- sys.exit()
-
- if ex is KeyboardInterrupt:
- sys.exit()
-
-
-sys.excepthook = handle_Exception
-
-
def get_Kstat():
"""Collect information on the ZFS subsystem from the /proc virtual
file system. The name "kstat" is a holdover from the Solaris utility
of the same name.
"""
- def load_proc_kstats(fn, namespace):
- """Collect information on a specific subsystem of the ARC"""
-
- kstats = [line.strip() for line in open(fn)]
- del kstats[0:2]
- for kstat in kstats:
- kstat = kstat.strip()
- name, _, value = kstat.split()
- Kstat[namespace + name] = D(value)
-
Kstat = {}
- load_proc_kstats('/proc/spl/kstat/zfs/arcstats',
- 'kstat.zfs.misc.arcstats.')
- load_proc_kstats('/proc/spl/kstat/zfs/zfetchstats',
- 'kstat.zfs.misc.zfetchstats.')
- load_proc_kstats('/proc/spl/kstat/zfs/vdev_cache_stats',
- 'kstat.zfs.misc.vdev_cache_stats.')
-
+ Kstat.update(load_kstats('arcstats'))
+ Kstat.update(load_kstats('zfetchstats'))
+ Kstat.update(load_kstats('vdev_cache_stats'))
return Kstat
@@ -195,12 +213,30 @@ def get_arc_summary(Kstat):
deleted = Kstat["kstat.zfs.misc.arcstats.deleted"]
mutex_miss = Kstat["kstat.zfs.misc.arcstats.mutex_miss"]
evict_skip = Kstat["kstat.zfs.misc.arcstats.evict_skip"]
+ evict_l2_cached = Kstat["kstat.zfs.misc.arcstats.evict_l2_cached"]
+ evict_l2_eligible = Kstat["kstat.zfs.misc.arcstats.evict_l2_eligible"]
+ evict_l2_eligible_mfu = Kstat["kstat.zfs.misc.arcstats.evict_l2_eligible_mfu"]
+ evict_l2_eligible_mru = Kstat["kstat.zfs.misc.arcstats.evict_l2_eligible_mru"]
+ evict_l2_ineligible = Kstat["kstat.zfs.misc.arcstats.evict_l2_ineligible"]
+ evict_l2_skip = Kstat["kstat.zfs.misc.arcstats.evict_l2_skip"]
# ARC Misc.
output["arc_misc"] = {}
output["arc_misc"]["deleted"] = fHits(deleted)
- output["arc_misc"]['mutex_miss'] = fHits(mutex_miss)
- output["arc_misc"]['evict_skips'] = fHits(evict_skip)
+ output["arc_misc"]["mutex_miss"] = fHits(mutex_miss)
+ output["arc_misc"]["evict_skips"] = fHits(evict_skip)
+ output["arc_misc"]["evict_l2_skip"] = fHits(evict_l2_skip)
+ output["arc_misc"]["evict_l2_cached"] = fBytes(evict_l2_cached)
+ output["arc_misc"]["evict_l2_eligible"] = fBytes(evict_l2_eligible)
+ output["arc_misc"]["evict_l2_eligible_mfu"] = {
+ 'per': fPerc(evict_l2_eligible_mfu, evict_l2_eligible),
+ 'num': fBytes(evict_l2_eligible_mfu),
+ }
+ output["arc_misc"]["evict_l2_eligible_mru"] = {
+ 'per': fPerc(evict_l2_eligible_mru, evict_l2_eligible),
+ 'num': fBytes(evict_l2_eligible_mru),
+ }
+ output["arc_misc"]["evict_l2_ineligible"] = fBytes(evict_l2_ineligible)
# ARC Sizing
arc_size = Kstat["kstat.zfs.misc.arcstats.size"]
@@ -316,8 +352,26 @@ def _arc_summary(Kstat):
sys.stdout.write("\tDeleted:\t\t\t\t%s\n" % arc['arc_misc']['deleted'])
sys.stdout.write("\tMutex Misses:\t\t\t\t%s\n" %
arc['arc_misc']['mutex_miss'])
- sys.stdout.write("\tEvict Skips:\t\t\t\t%s\n" %
+ sys.stdout.write("\tEviction Skips:\t\t\t\t%s\n" %
arc['arc_misc']['evict_skips'])
+ sys.stdout.write("\tEviction Skips Due to L2 Writes:\t%s\n" %
+ arc['arc_misc']['evict_l2_skip'])
+ sys.stdout.write("\tL2 Cached Evictions:\t\t\t%s\n" %
+ arc['arc_misc']['evict_l2_cached'])
+ sys.stdout.write("\tL2 Eligible Evictions:\t\t\t%s\n" %
+ arc['arc_misc']['evict_l2_eligible'])
+ sys.stdout.write("\tL2 Eligible MFU Evictions:\t%s\t%s\n" % (
+ arc['arc_misc']['evict_l2_eligible_mfu']['per'],
+ arc['arc_misc']['evict_l2_eligible_mfu']['num'],
+ )
+ )
+ sys.stdout.write("\tL2 Eligible MRU Evictions:\t%s\t%s\n" % (
+ arc['arc_misc']['evict_l2_eligible_mru']['per'],
+ arc['arc_misc']['evict_l2_eligible_mru']['num'],
+ )
+ )
+ sys.stdout.write("\tL2 Ineligible Evictions:\t\t%s\n" %
+ arc['arc_misc']['evict_l2_ineligible'])
sys.stdout.write("\n")
# ARC Sizing
@@ -653,6 +707,11 @@ def get_l2arc_summary(Kstat):
l2_writes_done = Kstat["kstat.zfs.misc.arcstats.l2_writes_done"]
l2_writes_error = Kstat["kstat.zfs.misc.arcstats.l2_writes_error"]
l2_writes_sent = Kstat["kstat.zfs.misc.arcstats.l2_writes_sent"]
+ l2_mfu_asize = Kstat["kstat.zfs.misc.arcstats.l2_mfu_asize"]
+ l2_mru_asize = Kstat["kstat.zfs.misc.arcstats.l2_mru_asize"]
+ l2_prefetch_asize = Kstat["kstat.zfs.misc.arcstats.l2_prefetch_asize"]
+ l2_bufc_data_asize = Kstat["kstat.zfs.misc.arcstats.l2_bufc_data_asize"]
+ l2_bufc_metadata_asize = Kstat["kstat.zfs.misc.arcstats.l2_bufc_metadata_asize"]
l2_access_total = (l2_hits + l2_misses)
output['l2_health_count'] = (l2_writes_error + l2_cksum_bad + l2_io_error)
@@ -675,7 +734,7 @@ def get_l2arc_summary(Kstat):
output["io_errors"] = fHits(l2_io_error)
output["l2_arc_size"] = {}
- output["l2_arc_size"]["adative"] = fBytes(l2_size)
+ output["l2_arc_size"]["adaptive"] = fBytes(l2_size)
output["l2_arc_size"]["actual"] = {
'per': fPerc(l2_asize, l2_size),
'num': fBytes(l2_asize)
@@ -684,6 +743,26 @@ def get_l2arc_summary(Kstat):
'per': fPerc(l2_hdr_size, l2_size),
'num': fBytes(l2_hdr_size),
}
+ output["l2_arc_size"]["mfu_asize"] = {
+ 'per': fPerc(l2_mfu_asize, l2_asize),
+ 'num': fBytes(l2_mfu_asize),
+ }
+ output["l2_arc_size"]["mru_asize"] = {
+ 'per': fPerc(l2_mru_asize, l2_asize),
+ 'num': fBytes(l2_mru_asize),
+ }
+ output["l2_arc_size"]["prefetch_asize"] = {
+ 'per': fPerc(l2_prefetch_asize, l2_asize),
+ 'num': fBytes(l2_prefetch_asize),
+ }
+ output["l2_arc_size"]["bufc_data_asize"] = {
+ 'per': fPerc(l2_bufc_data_asize, l2_asize),
+ 'num': fBytes(l2_bufc_data_asize),
+ }
+ output["l2_arc_size"]["bufc_metadata_asize"] = {
+ 'per': fPerc(l2_bufc_metadata_asize, l2_asize),
+ 'num': fBytes(l2_bufc_metadata_asize),
+ }
output["l2_arc_evicts"] = {}
output["l2_arc_evicts"]['lock_retries'] = fHits(l2_evict_lock_retry)
@@ -748,7 +827,7 @@ def _l2arc_summary(Kstat):
sys.stdout.write("\n")
sys.stdout.write("L2 ARC Size: (Adaptive)\t\t\t\t%s\n" %
- arc["l2_arc_size"]["adative"])
+ arc["l2_arc_size"]["adaptive"])
sys.stdout.write("\tCompressed:\t\t\t%s\t%s\n" % (
arc["l2_arc_size"]["actual"]["per"],
arc["l2_arc_size"]["actual"]["num"],
@@ -759,11 +838,36 @@ def _l2arc_summary(Kstat):
arc["l2_arc_size"]["head_size"]["num"],
)
)
+ sys.stdout.write("\tMFU Alloc. Size:\t\t%s\t%s\n" % (
+ arc["l2_arc_size"]["mfu_asize"]["per"],
+ arc["l2_arc_size"]["mfu_asize"]["num"],
+ )
+ )
+ sys.stdout.write("\tMRU Alloc. Size:\t\t%s\t%s\n" % (
+ arc["l2_arc_size"]["mru_asize"]["per"],
+ arc["l2_arc_size"]["mru_asize"]["num"],
+ )
+ )
+ sys.stdout.write("\tPrefetch Alloc. Size:\t\t%s\t%s\n" % (
+ arc["l2_arc_size"]["prefetch_asize"]["per"],
+ arc["l2_arc_size"]["prefetch_asize"]["num"],
+ )
+ )
+ sys.stdout.write("\tData (buf content) Alloc. Size:\t%s\t%s\n" % (
+ arc["l2_arc_size"]["bufc_data_asize"]["per"],
+ arc["l2_arc_size"]["bufc_data_asize"]["num"],
+ )
+ )
+ sys.stdout.write("\tMetadata (buf content) Size:\t%s\t%s\n" % (
+ arc["l2_arc_size"]["bufc_metadata_asize"]["per"],
+ arc["l2_arc_size"]["bufc_metadata_asize"]["num"],
+ )
+ )
sys.stdout.write("\n")
if arc["l2_arc_evicts"]['lock_retries'] != '0' or \
arc["l2_arc_evicts"]["reading"] != '0':
- sys.stdout.write("L2 ARC Evicts:\n")
+ sys.stdout.write("L2 ARC Evictions:\n")
sys.stdout.write("\tLock Retries:\t\t\t\t%s\n" %
arc["l2_arc_evicts"]['lock_retries'])
sys.stdout.write("\tUpon Reading:\t\t\t\t%s\n" %
@@ -921,14 +1025,7 @@ def _tunable_summary(Kstat):
global show_tunable_descriptions
global alternate_tunable_layout
- names = os.listdir("/sys/module/zfs/parameters/")
-
- values = {}
- for name in names:
- with open("/sys/module/zfs/parameters/" + name) as f:
- value = f.read()
- values[name] = value.strip()
-
+ tunables = load_tunables()
descriptions = {}
if show_tunable_descriptions:
@@ -966,22 +1063,17 @@ def _tunable_summary(Kstat):
sys.stderr.write("Tunable descriptions will be disabled.\n")
sys.stdout.write("ZFS Tunables:\n")
- names.sort()
if alternate_tunable_layout:
fmt = "\t%s=%s\n"
else:
fmt = "\t%-50s%s\n"
- for name in names:
-
- if not name:
- continue
-
+ for name in sorted(tunables.keys()):
if show_tunable_descriptions and name in descriptions:
sys.stdout.write("\t# %s\n" % descriptions[name])
- sys.stdout.write(fmt % (name, values[name]))
+ sys.stdout.write(fmt % (name, tunables[name]))
unSub = [
@@ -1033,48 +1125,55 @@ def main():
global alternate_tunable_layout
try:
- opts, args = getopt.getopt(
- sys.argv[1:],
- "adp:h", ["alternate", "description", "page=", "help"]
- )
- except getopt.error as e:
- sys.stderr.write("Error: %s\n" % e.msg)
- usage()
- sys.exit(1)
-
- args = {}
- for opt, arg in opts:
- if opt in ('-a', '--alternate'):
- args['a'] = True
- if opt in ('-d', '--description'):
- args['d'] = True
- if opt in ('-p', '--page'):
- args['p'] = arg
- if opt in ('-h', '--help'):
- usage()
- sys.exit(0)
-
- Kstat = get_Kstat()
-
- alternate_tunable_layout = 'a' in args
- show_tunable_descriptions = 'd' in args
-
- pages = []
-
- if 'p' in args:
try:
- pages.append(unSub[int(args['p']) - 1])
- except IndexError:
- sys.stderr.write('the argument to -p must be between 1 and ' +
- str(len(unSub)) + '\n')
+ opts, args = getopt.getopt(
+ sys.argv[1:],
+ "adp:h", ["alternate", "description", "page=", "help"]
+ )
+ except getopt.error as e:
+ sys.stderr.write("Error: %s\n" % e.msg)
+ usage()
sys.exit(1)
- else:
- pages = unSub
- zfs_header()
- for page in pages:
- page(Kstat)
- sys.stdout.write("\n")
+ args = {}
+ for opt, arg in opts:
+ if opt in ('-a', '--alternate'):
+ args['a'] = True
+ if opt in ('-d', '--description'):
+ args['d'] = True
+ if opt in ('-p', '--page'):
+ args['p'] = arg
+ if opt in ('-h', '--help'):
+ usage()
+ sys.exit(0)
+
+ Kstat = get_Kstat()
+
+ alternate_tunable_layout = 'a' in args
+ show_tunable_descriptions = 'd' in args
+
+ pages = []
+
+ if 'p' in args:
+ try:
+ pages.append(unSub[int(args['p']) - 1])
+ except IndexError:
+ sys.stderr.write('the argument to -p must be between 1 and ' +
+ str(len(unSub)) + '\n')
+ sys.exit(1)
+ else:
+ pages = unSub
+
+ zfs_header()
+ for page in pages:
+ page(Kstat)
+ sys.stdout.write("\n")
+ except IOError as ex:
+ if (ex.errno == errno.EPIPE):
+ sys.exit(0)
+ raise
+ except KeyboardInterrupt:
+ sys.exit(0)
if __name__ == '__main__':
diff --git a/cmd/arc_summary/arc_summary3 b/cmd/arc_summary/arc_summary3
index fc5e1e4b64..7b28012ede 100755
--- a/cmd/arc_summary/arc_summary3
+++ b/cmd/arc_summary/arc_summary3
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
#
# Copyright (c) 2008 Ben Rockwood ,
# Copyright (c) 2010 Martin Matuska ,
@@ -32,7 +32,7 @@
Provides basic information on the ARC, its efficiency, the L2ARC (if present),
the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See
the in-source documentation and code at
-https://github.com/zfsonlinux/zfs/blob/master/module/zfs/arc.c for details.
+https://github.com/openzfs/zfs/blob/master/module/zfs/arc.c for details.
The original introduction to arc_summary can be found at
http://cuddletech.com/?p=454
"""
@@ -42,13 +42,17 @@ import os
import subprocess
import sys
import time
+import errno
-DECRIPTION = 'Print ARC and other statistics for ZFS on Linux'
+# We can't use env -S portably, and we need python3 -u to handle pipes in
+# the shell abruptly closing the way we want to, so...
+import io
+if isinstance(sys.__stderr__.buffer, io.BufferedWriter):
+ os.execv(sys.executable, [sys.executable, "-u"] + sys.argv)
+
+DESCRIPTION = 'Print ARC and other statistics for OpenZFS'
INDENT = ' '*8
LINE_LENGTH = 72
-PROC_PATH = '/proc/spl/kstat/zfs/'
-SPL_PATH = '/sys/module/spl/parameters/'
-TUNABLES_PATH = '/sys/module/zfs/parameters/'
DATE_FORMAT = '%a %b %d %H:%M:%S %Y'
TITLE = 'ZFS Subsystem Report'
@@ -61,11 +65,10 @@ SECTION_PATHS = {'arc': 'arcstats',
'dmu': 'dmu_tx',
'l2arc': 'arcstats', # L2ARC stuff lives in arcstats
'vdev': 'vdev_cache_stats',
- 'xuio': 'xuio_stats',
'zfetch': 'zfetchstats',
'zil': 'zil'}
-parser = argparse.ArgumentParser(description=DECRIPTION)
+parser = argparse.ArgumentParser(description=DESCRIPTION)
parser.add_argument('-a', '--alternate', action='store_true', default=False,
help='use alternate formatting for tunables and SPL',
dest='alt')
@@ -83,6 +86,172 @@ parser.add_argument('-s', '--section', dest='section', help=SECTION_HELP)
ARGS = parser.parse_args()
+if sys.platform.startswith('freebsd'):
+ # Requires py36-sysctl on FreeBSD
+ import sysctl
+
+ VDEV_CACHE_SIZE = 'vdev.cache_size'
+
+ def is_value(ctl):
+ return ctl.type != sysctl.CTLTYPE_NODE
+
+ def namefmt(ctl, base='vfs.zfs.'):
+ # base is removed from the name
+ cut = len(base)
+ return ctl.name[cut:]
+
+ def load_kstats(section):
+ base = 'kstat.zfs.misc.{section}.'.format(section=section)
+ fmt = lambda kstat: '{name} : {value}'.format(name=namefmt(kstat, base),
+ value=kstat.value)
+ kstats = sysctl.filter(base)
+ return [fmt(kstat) for kstat in kstats if is_value(kstat)]
+
+ def get_params(base):
+ ctls = sysctl.filter(base)
+ return {namefmt(ctl): str(ctl.value) for ctl in ctls if is_value(ctl)}
+
+ def get_tunable_params():
+ return get_params('vfs.zfs')
+
+ def get_vdev_params():
+ return get_params('vfs.zfs.vdev')
+
+ def get_version_impl(request):
+ # FreeBSD reports versions for zpl and spa instead of zfs and spl.
+ name = {'zfs': 'zpl',
+ 'spl': 'spa'}[request]
+ mib = 'vfs.zfs.version.{}'.format(name)
+ version = sysctl.filter(mib)[0].value
+ return '{} version {}'.format(name, version)
+
+ def get_descriptions(_request):
+ ctls = sysctl.filter('vfs.zfs')
+ return {namefmt(ctl): ctl.description for ctl in ctls if is_value(ctl)}
+
+
+elif sys.platform.startswith('linux'):
+ KSTAT_PATH = '/proc/spl/kstat/zfs'
+ SPL_PATH = '/sys/module/spl/parameters'
+ TUNABLES_PATH = '/sys/module/zfs/parameters'
+
+ VDEV_CACHE_SIZE = 'zfs_vdev_cache_size'
+
+ def load_kstats(section):
+ path = os.path.join(KSTAT_PATH, section)
+ with open(path) as f:
+ return list(f)[2:] # Get rid of header
+
+ def get_params(basepath):
+ """Collect information on the Solaris Porting Layer (SPL) or the
+ tunables, depending on the PATH given. Does not check if PATH is
+ legal.
+ """
+ result = {}
+ for name in os.listdir(basepath):
+ path = os.path.join(basepath, name)
+ with open(path) as f:
+ value = f.read()
+ result[name] = value.strip()
+ return result
+
+ def get_spl_params():
+ return get_params(SPL_PATH)
+
+ def get_tunable_params():
+ return get_params(TUNABLES_PATH)
+
+ def get_vdev_params():
+ return get_params(TUNABLES_PATH)
+
+ def get_version_impl(request):
+ # The original arc_summary called /sbin/modinfo/{spl,zfs} to get
+ # the version information. We switch to /sys/module/{spl,zfs}/version
+ # to make sure we get what is really loaded in the kernel
+ try:
+ with open("/sys/module/{}/version".format(request)) as f:
+ return f.read().strip()
+ except:
+ return "(unknown)"
+
+ def get_descriptions(request):
+ """Get the descriptions of the Solaris Porting Layer (SPL) or the
+ tunables, return with minimal formatting.
+ """
+
+ if request not in ('spl', 'zfs'):
+ print('ERROR: description of "{0}" requested)'.format(request))
+ sys.exit(1)
+
+ descs = {}
+ target_prefix = 'parm:'
+
+ # We would prefer to do this with /sys/modules -- see the discussion at
+ # get_version() -- but there isn't a way to get the descriptions from
+ # there, so we fall back on modinfo
+ command = ["/sbin/modinfo", request, "-0"]
+
+ # The recommended way to do this is with subprocess.run(). However,
+ # some installed versions of Python are < 3.5, so we offer them
+ # the option of doing it the old way (for now)
+ info = ''
+
+ try:
+
+ if 'run' in dir(subprocess):
+ info = subprocess.run(command, stdout=subprocess.PIPE,
+ universal_newlines=True)
+ raw_output = info.stdout.split('\0')
+ else:
+ info = subprocess.check_output(command,
+ universal_newlines=True)
+ raw_output = info.split('\0')
+
+ except subprocess.CalledProcessError:
+ print("Error: Descriptions not available",
+ "(can't access kernel module)")
+ sys.exit(1)
+
+ for line in raw_output:
+
+ if not line.startswith(target_prefix):
+ continue
+
+ line = line[len(target_prefix):].strip()
+ name, raw_desc = line.split(':', 1)
+ desc = raw_desc.rsplit('(', 1)[0]
+
+ if desc == '':
+ desc = '(No description found)'
+
+ descs[name.strip()] = desc.strip()
+
+ return descs
+
+def handle_unraisableException(exc_type, exc_value=None, exc_traceback=None,
+ err_msg=None, object=None):
+ handle_Exception(exc_type, object, exc_traceback)
+
+def handle_Exception(ex_cls, ex, tb):
+ if ex_cls is KeyboardInterrupt:
+ sys.exit()
+
+ if ex_cls is BrokenPipeError:
+ # It turns out that while sys.exit() triggers an exception
+ # not handled message on Python 3.8+, os._exit() does not.
+ os._exit(0)
+
+ if ex_cls is OSError:
+ if ex.errno == errno.ENOTCONN:
+ sys.exit()
+
+ raise ex
+
+if hasattr(sys,'unraisablehook'): # Python 3.8+
+ sys.unraisablehook = handle_unraisableException
+sys.excepthook = handle_Exception
+
+
def cleanup_line(single_line):
"""Format a raw line of data from /proc and isolate the name value
part, returning a tuple with each. Currently, this gets rid of the
@@ -238,139 +407,48 @@ def format_raw_line(name, value):
if ARGS.alt:
result = '{0}{1}={2}'.format(INDENT, name, value)
else:
- spc = LINE_LENGTH-(len(INDENT)+len(value))
- result = '{0}{1:<{spc}}{2}'.format(INDENT, name, value, spc=spc)
+ # Right-align the value within the line length if it fits,
+ # otherwise just separate it from the name by a single space.
+ fit = LINE_LENGTH - len(INDENT) - len(name)
+ overflow = len(value) + 1
+ w = max(fit, overflow)
+ result = '{0}{1}{2:>{w}}'.format(INDENT, name, value, w=w)
return result
def get_kstats():
- """Collect information on the ZFS subsystem from the /proc Linux virtual
- file system. The step does not perform any further processing, giving us
- the option to only work on what is actually needed. The name "kstat" is a
- holdover from the Solaris utility of the same name.
+ """Collect information on the ZFS subsystem. The step does not perform any
+ further processing, giving us the option to only work on what is actually
+ needed. The name "kstat" is a holdover from the Solaris utility of the same
+ name.
"""
result = {}
- secs = SECTION_PATHS.values()
- for section in secs:
-
- with open(PROC_PATH+section, 'r') as proc_location:
- lines = [line for line in proc_location]
-
- del lines[0:2] # Get rid of header
- result[section] = lines
+ for section in SECTION_PATHS.values():
+ if section not in result:
+ result[section] = load_kstats(section)
return result
-def get_spl_tunables(PATH):
- """Collect information on the Solaris Porting Layer (SPL) or the
- tunables, depending on the PATH given. Does not check if PATH is
- legal.
- """
-
- result = {}
- parameters = os.listdir(PATH)
-
- for name in parameters:
-
- with open(PATH+name, 'r') as para_file:
- value = para_file.read()
- result[name] = value.strip()
-
- return result
-
-
-def get_descriptions(request):
- """Get the decriptions of the Solaris Porting Layer (SPL) or the
- tunables, return with minimal formatting.
- """
-
- if request not in ('spl', 'zfs'):
- print('ERROR: description of "{0}" requested)'.format(request))
- sys.exit(1)
-
- descs = {}
- target_prefix = 'parm:'
-
- # We would prefer to do this with /sys/modules -- see the discussion at
- # get_version() -- but there isn't a way to get the descriptions from
- # there, so we fall back on modinfo
- command = ["/sbin/modinfo", request, "-0"]
-
- # The recommended way to do this is with subprocess.run(). However,
- # some installed versions of Python are < 3.5, so we offer them
- # the option of doing it the old way (for now)
- info = ''
-
- try:
-
- if 'run' in dir(subprocess):
- info = subprocess.run(command, stdout=subprocess.PIPE,
- universal_newlines=True)
- raw_output = info.stdout.split('\0')
- else:
- info = subprocess.check_output(command, universal_newlines=True)
- raw_output = info.split('\0')
-
- except subprocess.CalledProcessError:
- print("Error: Descriptions not available (can't access kernel module)")
- sys.exit(1)
-
- for line in raw_output:
-
- if not line.startswith(target_prefix):
- continue
-
- line = line[len(target_prefix):].strip()
- name, raw_desc = line.split(':', 1)
- desc = raw_desc.rsplit('(', 1)[0]
-
- if desc == '':
- desc = '(No description found)'
-
- descs[name.strip()] = desc.strip()
-
- return descs
-
-
def get_version(request):
"""Get the version number of ZFS or SPL on this machine for header.
Returns an error string, but does not raise an error, if we can't
- get the ZFS/SPL version via modinfo.
+ get the ZFS/SPL version.
"""
if request not in ('spl', 'zfs'):
error_msg = '(ERROR: "{0}" requested)'.format(request)
return error_msg
- # The original arc_summary called /sbin/modinfo/{spl,zfs} to get
- # the version information. We switch to /sys/module/{spl,zfs}/version
- # to make sure we get what is really loaded in the kernel
- command = ["cat", "/sys/module/{0}/version".format(request)]
- req = request.upper()
- version = "(Can't get {0} version)".format(req)
-
- # The recommended way to do this is with subprocess.run(). However,
- # some installed versions of Python are < 3.5, so we offer them
- # the option of doing it the old way (for now)
- info = ''
- if 'run' in dir(subprocess):
- info = subprocess.run(command, stdout=subprocess.PIPE,
- universal_newlines=True)
- version = info.stdout.strip()
- else:
- info = subprocess.check_output(command, universal_newlines=True)
- version = info.strip()
-
- return version
+ return get_version_impl(request)
def print_header():
"""Print the initial heading with date and time as well as info on the
- Linux and ZFS versions. This is not called for the graph.
+ kernel and ZFS versions. This is not called for the graph.
"""
# datetime is now recommended over time but we keep the exact formatting
@@ -534,6 +612,20 @@ def section_arc(kstats_dict):
prt_i1('Deleted:', f_hits(arc_stats['deleted']))
prt_i1('Mutex misses:', f_hits(arc_stats['mutex_miss']))
prt_i1('Eviction skips:', f_hits(arc_stats['evict_skip']))
+ prt_i1('Eviction skips due to L2 writes:',
+ f_hits(arc_stats['evict_l2_skip']))
+ prt_i1('L2 cached evictions:', f_bytes(arc_stats['evict_l2_cached']))
+ prt_i1('L2 eligible evictions:', f_bytes(arc_stats['evict_l2_eligible']))
+ prt_i2('L2 eligible MFU evictions:',
+ f_perc(arc_stats['evict_l2_eligible_mfu'],
+ arc_stats['evict_l2_eligible']),
+ f_bytes(arc_stats['evict_l2_eligible_mfu']))
+ prt_i2('L2 eligible MRU evictions:',
+ f_perc(arc_stats['evict_l2_eligible_mru'],
+ arc_stats['evict_l2_eligible']),
+ f_bytes(arc_stats['evict_l2_eligible_mru']))
+ prt_i1('L2 ineligible evictions:',
+ f_bytes(arc_stats['evict_l2_ineligible']))
print()
@@ -672,15 +764,30 @@ def section_l2arc(kstats_dict):
prt_i2('Header size:',
f_perc(arc_stats['l2_hdr_size'], arc_stats['l2_size']),
f_bytes(arc_stats['l2_hdr_size']))
+ prt_i2('MFU allocated size:',
+ f_perc(arc_stats['l2_mfu_asize'], arc_stats['l2_asize']),
+ f_bytes(arc_stats['l2_mfu_asize']))
+ prt_i2('MRU allocated size:',
+ f_perc(arc_stats['l2_mru_asize'], arc_stats['l2_asize']),
+ f_bytes(arc_stats['l2_mru_asize']))
+ prt_i2('Prefetch allocated size:',
+ f_perc(arc_stats['l2_prefetch_asize'], arc_stats['l2_asize']),
+ f_bytes(arc_stats['l2_prefetch_asize']))
+ prt_i2('Data (buffer content) allocated size:',
+ f_perc(arc_stats['l2_bufc_data_asize'], arc_stats['l2_asize']),
+ f_bytes(arc_stats['l2_bufc_data_asize']))
+ prt_i2('Metadata (buffer content) allocated size:',
+ f_perc(arc_stats['l2_bufc_metadata_asize'], arc_stats['l2_asize']),
+ f_bytes(arc_stats['l2_bufc_metadata_asize']))
print()
prt_1('L2ARC breakdown:', f_hits(l2_access_total))
prt_i2('Hit ratio:',
f_perc(arc_stats['l2_hits'], l2_access_total),
- f_bytes(arc_stats['l2_hits']))
+ f_hits(arc_stats['l2_hits']))
prt_i2('Miss ratio:',
f_perc(arc_stats['l2_misses'], l2_access_total),
- f_bytes(arc_stats['l2_misses']))
+ f_hits(arc_stats['l2_misses']))
prt_i1('Feeds:', f_hits(arc_stats['l2_feeds']))
print()
@@ -691,13 +798,13 @@ def section_l2arc(kstats_dict):
prt_i2('Done ratio:',
f_perc(arc_stats['l2_writes_done'],
arc_stats['l2_writes_sent']),
- f_bytes(arc_stats['l2_writes_done']))
+ f_hits(arc_stats['l2_writes_done']))
prt_i2('Error ratio:',
f_perc(arc_stats['l2_writes_error'],
arc_stats['l2_writes_sent']),
- f_bytes(arc_stats['l2_writes_error']))
+ f_hits(arc_stats['l2_writes_error']))
else:
- prt_i2('Writes sent:', '100 %', f_bytes(arc_stats['l2_writes_sent']))
+ prt_i2('Writes sent:', '100 %', f_hits(arc_stats['l2_writes_sent']))
print()
print('L2ARC evicts:')
@@ -708,10 +815,14 @@ def section_l2arc(kstats_dict):
def section_spl(*_):
"""Print the SPL parameters, if requested with alternative format
- and/or decriptions. This does not use kstats.
+ and/or descriptions. This does not use kstats.
"""
- spls = get_spl_tunables(SPL_PATH)
+ if sys.platform.startswith('freebsd'):
+ # No SPL support in FreeBSD
+ return
+
+ spls = get_spl_params()
keylist = sorted(spls.keys())
print('Solaris Porting Layer (SPL):')
@@ -725,7 +836,7 @@ def section_spl(*_):
try:
print(INDENT+'#', descriptions[key])
except KeyError:
- print(INDENT+'# (No decription found)') # paranoid
+ print(INDENT+'# (No description found)') # paranoid
print(format_raw_line(key, value))
@@ -734,10 +845,10 @@ def section_spl(*_):
def section_tunables(*_):
"""Print the tunables, if requested with alternative format and/or
- decriptions. This does not use kstasts.
+ descriptions. This does not use kstasts.
"""
- tunables = get_spl_tunables(TUNABLES_PATH)
+ tunables = get_tunable_params()
keylist = sorted(tunables.keys())
print('Tunables:')
@@ -751,7 +862,7 @@ def section_tunables(*_):
try:
print(INDENT+'#', descriptions[key])
except KeyError:
- print(INDENT+'# (No decription found)') # paranoid
+ print(INDENT+'# (No description found)') # paranoid
print(format_raw_line(key, value))
@@ -763,11 +874,11 @@ def section_vdev(kstats_dict):
# Currently [Nov 2017] the VDEV cache is disabled, because it is actually
# harmful. When this is the case, we just skip the whole entry. See
- # https://github.com/zfsonlinux/zfs/blob/master/module/zfs/vdev_cache.c
+ # https://github.com/openzfs/zfs/blob/master/module/zfs/vdev_cache.c
# for details
- tunables = get_spl_tunables(TUNABLES_PATH)
+ tunables = get_vdev_params()
- if tunables['zfs_vdev_cache_size'] == '0':
+ if tunables[VDEV_CACHE_SIZE] == '0':
print('VDEV cache disabled, skipping section\n')
return
@@ -789,7 +900,7 @@ def section_vdev(kstats_dict):
def section_zil(kstats_dict):
"""Collect information on the ZFS Intent Log. Some of the information
- taken from https://github.com/zfsonlinux/zfs/blob/master/include/sys/zil.h
+ taken from https://github.com/openzfs/zfs/blob/master/include/sys/zil.h
"""
zil_stats = isolate_section('zil', kstats_dict)
diff --git a/cmd/arcstat/.gitignore b/cmd/arcstat/.gitignore
new file mode 100644
index 0000000000..6d6cd1ab75
--- /dev/null
+++ b/cmd/arcstat/.gitignore
@@ -0,0 +1 @@
+arcstat
diff --git a/cmd/arcstat/Makefile.am b/cmd/arcstat/Makefile.am
index 462e9a6197..d1ba989a0c 100644
--- a/cmd/arcstat/Makefile.am
+++ b/cmd/arcstat/Makefile.am
@@ -1,13 +1,5 @@
-dist_bin_SCRIPTS = arcstat
+include $(top_srcdir)/config/Substfiles.am
-#
-# The arcstat script is compatibile with both Python 2.6 and 3.4.
-# As such the python 3 shebang can be replaced at install time when
-# targeting a python 2 system. This allows us to maintain a single
-# version of the source.
-#
-if USING_PYTHON_2
-install-exec-hook:
- sed --in-place 's|^#!/usr/bin/python3|#!/usr/bin/python2|' \
- $(DESTDIR)$(bindir)/arcstat
-endif
+bin_SCRIPTS = arcstat
+
+SUBSTFILES += $(bin_SCRIPTS)
diff --git a/cmd/arcstat/arcstat b/cmd/arcstat/arcstat.in
similarity index 60%
rename from cmd/arcstat/arcstat
rename to cmd/arcstat/arcstat.in
index 57a2d621f3..cd9a803a24 100755
--- a/cmd/arcstat/arcstat
+++ b/cmd/arcstat/arcstat.in
@@ -1,20 +1,25 @@
-#!/usr/bin/python3
+#!/usr/bin/env @PYTHON_SHEBANG@
#
# Print out ZFS ARC Statistics exported via kstat(1)
-# For a definition of fields, or usage, use arctstat.pl -v
+# For a definition of fields, or usage, use arcstat -v
#
-# This script is a fork of the original arcstat.pl (0.1) by
-# Neelakanth Nadgir, originally published on his Sun blog on
+# This script was originally a fork of the original arcstat.pl (0.1)
+# by Neelakanth Nadgir, originally published on his Sun blog on
# 09/18/2007
# http://blogs.sun.com/realneel/entry/zfs_arc_statistics
#
-# This version aims to improve upon the original by adding features
-# and fixing bugs as needed. This version is maintained by
-# Mike Harsch and is hosted in a public open source repository:
+# A new version aimed to improve upon the original by adding features
+# and fixing bugs as needed. This version was maintained by Mike
+# Harsch and was hosted in a public open source repository:
# http://github.com/mharsch/arcstat
#
-# Comments, Questions, or Suggestions are always welcome.
-# Contact the maintainer at ( mike at harschsystems dot com )
+# but has since moved to the illumos-gate repository.
+#
+# This Python port was written by John Hixson for FreeNAS, introduced
+# in commit e2c29f:
+# https://github.com/freenas/freenas
+#
+# and has been improved by many people since.
#
# CDDL HEADER START
#
@@ -51,16 +56,16 @@ import getopt
import re
import copy
-from decimal import Decimal
from signal import signal, SIGINT, SIGWINCH, SIG_DFL
+
cols = {
# HDR: [Size, Scale, Description]
"time": [8, -1, "Time"],
"hits": [4, 1000, "ARC reads per second"],
"miss": [4, 1000, "ARC misses per second"],
"read": [4, 1000, "Total ARC accesses per second"],
- "hit%": [4, 100, "ARC Hit percentage"],
+ "hit%": [4, 100, "ARC hit percentage"],
"miss%": [5, 100, "ARC miss percentage"],
"dhit": [4, 1000, "Demand hits per second"],
"dmis": [4, 1000, "Demand misses per second"],
@@ -75,13 +80,20 @@ cols = {
"mread": [5, 1000, "Metadata accesses per second"],
"mh%": [3, 100, "Metadata hit percentage"],
"mm%": [3, 100, "Metadata miss percentage"],
- "arcsz": [5, 1024, "ARC Size"],
- "c": [4, 1024, "ARC Target Size"],
- "mfu": [4, 1000, "MFU List hits per second"],
- "mru": [4, 1000, "MRU List hits per second"],
- "mfug": [4, 1000, "MFU Ghost List hits per second"],
- "mrug": [4, 1000, "MRU Ghost List hits per second"],
+ "arcsz": [5, 1024, "ARC size"],
+ "size": [4, 1024, "ARC size"],
+ "c": [4, 1024, "ARC target size"],
+ "mfu": [4, 1000, "MFU list hits per second"],
+ "mru": [4, 1000, "MRU list hits per second"],
+ "mfug": [4, 1000, "MFU ghost list hits per second"],
+ "mrug": [4, 1000, "MRU ghost list hits per second"],
"eskip": [5, 1000, "evict_skip per second"],
+ "el2skip": [7, 1000, "evict skip, due to l2 writes, per second"],
+ "el2cach": [7, 1024, "Size of L2 cached evictions per second"],
+ "el2el": [5, 1024, "Size of L2 eligible evictions per second"],
+ "el2mfu": [6, 1024, "Size of L2 eligible MFU evictions per second"],
+ "el2mru": [6, 1024, "Size of L2 eligible MRU evictions per second"],
+ "el2inel": [7, 1024, "Size of L2 ineligible evictions per second"],
"mtxmis": [6, 1000, "mutex_miss per second"],
"dread": [5, 1000, "Demand accesses per second"],
"pread": [5, 1000, "Prefetch accesses per second"],
@@ -90,17 +102,29 @@ cols = {
"l2read": [6, 1000, "Total L2ARC accesses per second"],
"l2hit%": [6, 100, "L2ARC access hit percentage"],
"l2miss%": [7, 100, "L2ARC access miss percentage"],
+ "l2pref": [6, 1024, "L2ARC prefetch allocated size"],
+ "l2mfu": [5, 1024, "L2ARC MFU allocated size"],
+ "l2mru": [5, 1024, "L2ARC MRU allocated size"],
+ "l2data": [6, 1024, "L2ARC data allocated size"],
+ "l2meta": [6, 1024, "L2ARC metadata allocated size"],
+ "l2pref%": [7, 100, "L2ARC prefetch percentage"],
+ "l2mfu%": [6, 100, "L2ARC MFU percentage"],
+ "l2mru%": [6, 100, "L2ARC MRU percentage"],
+ "l2data%": [7, 100, "L2ARC data percentage"],
+ "l2meta%": [7, 100, "L2ARC metadata percentage"],
"l2asize": [7, 1024, "Actual (compressed) size of the L2ARC"],
"l2size": [6, 1024, "Size of the L2ARC"],
- "l2bytes": [7, 1024, "bytes read per second from the L2ARC"],
- "grow": [4, 1000, "ARC Grow disabled"],
- "need": [4, 1024, "ARC Reclaim need"],
- "free": [4, 1024, "ARC Free memory"],
+ "l2bytes": [7, 1024, "Bytes read per second from the L2ARC"],
+ "grow": [4, 1000, "ARC grow disabled"],
+ "need": [4, 1024, "ARC reclaim need"],
+ "free": [4, 1024, "ARC free memory"],
+ "avail": [5, 1024, "ARC available memory"],
+ "waste": [5, 1024, "Wasted memory due to round up to pagesize"],
}
v = {}
hdr = ["time", "read", "miss", "miss%", "dmis", "dm%", "pmis", "pm%", "mmis",
- "mm%", "arcsz", "c"]
+ "mm%", "size", "c", "avail"]
xhdr = ["time", "mfu", "mru", "mfug", "mrug", "eskip", "mtxmis", "dread",
"pread", "read"]
sint = 1 # Default interval is 1 second
@@ -110,12 +134,56 @@ opfile = None
sep = " " # Default separator is 2 spaces
version = "0.4"
l2exist = False
-cmd = ("Usage: arcstat [-hvx] [-f fields] [-o file] [-s string] [interval "
+cmd = ("Usage: arcstat [-havxp] [-f fields] [-o file] [-s string] [interval "
"[count]]\n")
cur = {}
d = {}
out = None
kstat = None
+pretty_print = True
+
+
+if sys.platform.startswith('freebsd'):
+ # Requires py-sysctl on FreeBSD
+ import sysctl
+
+ def kstat_update():
+ global kstat
+
+ k = [ctl for ctl in sysctl.filter('kstat.zfs.misc.arcstats')
+ if ctl.type != sysctl.CTLTYPE_NODE]
+
+ if not k:
+ sys.exit(1)
+
+ kstat = {}
+
+ for s in k:
+ if not s:
+ continue
+
+ name, value = s.name, s.value
+ # Trims 'kstat.zfs.misc.arcstats' from the name
+ kstat[name[24:]] = int(value)
+
+elif sys.platform.startswith('linux'):
+ def kstat_update():
+ global kstat
+
+ k = [line.strip() for line in open('/proc/spl/kstat/zfs/arcstats')]
+
+ if not k:
+ sys.exit(1)
+
+ del k[0:2]
+ kstat = {}
+
+ for s in k:
+ if not s:
+ continue
+
+ name, unused, value = s.split()
+ kstat[name] = int(value)
def detailed_usage():
@@ -131,6 +199,7 @@ def detailed_usage():
def usage():
sys.stderr.write("%s\n" % cmd)
sys.stderr.write("\t -h : Print this help message\n")
+ sys.stderr.write("\t -a : Print all possible stats\n")
sys.stderr.write("\t -v : List all possible field headers and definitions"
"\n")
sys.stderr.write("\t -x : Print extended stats\n")
@@ -138,6 +207,7 @@ def usage():
sys.stderr.write("\t -o : Redirect output to the specified file\n")
sys.stderr.write("\t -s : Override default field separator with custom "
"character or string\n")
+ sys.stderr.write("\t -p : Disable auto-scaling of numerical fields\n")
sys.stderr.write("\nExamples:\n")
sys.stderr.write("\tarcstat -o /tmp/a.log 2 10\n")
sys.stderr.write("\tarcstat -s \",\" -o /tmp/a.log 2 10\n")
@@ -148,25 +218,6 @@ def usage():
sys.exit(1)
-def kstat_update():
- global kstat
-
- k = [line.strip() for line in open('/proc/spl/kstat/zfs/arcstats')]
-
- if not k:
- sys.exit(1)
-
- del k[0:2]
- kstat = {}
-
- for s in k:
- if not s:
- continue
-
- name, unused, value = s.split()
- kstat[name] = Decimal(value)
-
-
def snap_stats():
global cur
global kstat
@@ -197,7 +248,7 @@ def prettynum(sz, scale, num=0):
elif 0 < num < 1:
num = 0
- while num > scale and index < 5:
+ while abs(num) > scale and index < 5:
save = num
num = num / scale
index += 1
@@ -205,7 +256,7 @@ def prettynum(sz, scale, num=0):
if index == 0:
return "%*d" % (sz, num)
- if (save / scale) < 10:
+ if abs(save / scale) < 10:
return "%*.1f%s" % (sz - 1, num, suffix[index])
else:
return "%*d%s" % (sz - 1, num, suffix[index])
@@ -215,12 +266,14 @@ def print_values():
global hdr
global sep
global v
+ global pretty_print
- for col in hdr:
- sys.stdout.write("%s%s" % (
- prettynum(cols[col][0], cols[col][1], v[col]),
- sep
- ))
+ if pretty_print:
+ fmt = lambda col: prettynum(cols[col][0], cols[col][1], v[col])
+ else:
+ fmt = lambda col: v[col]
+
+ sys.stdout.write(sep.join(fmt(col) for col in hdr))
sys.stdout.write("\n")
sys.stdout.flush()
@@ -228,9 +281,14 @@ def print_values():
def print_header():
global hdr
global sep
+ global pretty_print
- for col in hdr:
- sys.stdout.write("%*s%s" % (cols[col][0], col, sep))
+ if pretty_print:
+ fmt = lambda col: "%*s" % (cols[col][0], col)
+ else:
+ fmt = lambda col: col
+
+ sys.stdout.write(sep.join(fmt(col) for col in hdr))
sys.stdout.write("\n")
@@ -267,8 +325,10 @@ def init():
global sep
global out
global l2exist
+ global pretty_print
desired_cols = None
+ aflag = False
xflag = False
hflag = False
vflag = False
@@ -277,14 +337,16 @@ def init():
try:
opts, args = getopt.getopt(
sys.argv[1:],
- "xo:hvs:f:",
+ "axo:hvs:f:p",
[
+ "all",
"extended",
"outfile",
"help",
"verbose",
"separator",
- "columns"
+ "columns",
+ "parsable"
]
)
except getopt.error as msg:
@@ -293,6 +355,8 @@ def init():
opts = None
for opt, arg in opts:
+ if opt in ('-a', '--all'):
+ aflag = True
if opt in ('-x', '--extended'):
xflag = True
if opt in ('-o', '--outfile'):
@@ -308,19 +372,13 @@ def init():
if opt in ('-f', '--columns'):
desired_cols = arg
i += 1
+ if opt in ('-p', '--parsable'):
+ pretty_print = False
i += 1
argv = sys.argv[i:]
- sint = Decimal(argv[0]) if argv else sint
- count = int(argv[1]) if len(argv) > 1 else count
-
- if len(argv) > 1:
- sint = Decimal(argv[0])
- count = int(argv[1])
-
- elif len(argv) > 0:
- sint = Decimal(argv[0])
- count = 0
+ sint = int(argv[0]) if argv else sint
+ count = int(argv[1]) if len(argv) > 1 else (0 if len(argv) > 0 else 1)
if hflag or (xflag and desired_cols):
usage()
@@ -360,6 +418,12 @@ def init():
incompat)
usage()
+ if aflag:
+ if l2exist:
+ hdr = cols.keys()
+ else:
+ hdr = [col for col in cols.keys() if not col.startswith("l2")]
+
if opfile:
try:
out = open(opfile, "w")
@@ -377,59 +441,79 @@ def calculate():
v = dict()
v["time"] = time.strftime("%H:%M:%S", time.localtime())
- v["hits"] = d["hits"] / sint
- v["miss"] = d["misses"] / sint
+ v["hits"] = d["hits"] // sint
+ v["miss"] = d["misses"] // sint
v["read"] = v["hits"] + v["miss"]
- v["hit%"] = 100 * v["hits"] / v["read"] if v["read"] > 0 else 0
+ v["hit%"] = 100 * v["hits"] // v["read"] if v["read"] > 0 else 0
v["miss%"] = 100 - v["hit%"] if v["read"] > 0 else 0
- v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) / sint
- v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) / sint
+ v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) // sint
+ v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) // sint
v["dread"] = v["dhit"] + v["dmis"]
- v["dh%"] = 100 * v["dhit"] / v["dread"] if v["dread"] > 0 else 0
+ v["dh%"] = 100 * v["dhit"] // v["dread"] if v["dread"] > 0 else 0
v["dm%"] = 100 - v["dh%"] if v["dread"] > 0 else 0
- v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) / sint
+ v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) // sint
v["pmis"] = (d["prefetch_data_misses"] +
- d["prefetch_metadata_misses"]) / sint
+ d["prefetch_metadata_misses"]) // sint
v["pread"] = v["phit"] + v["pmis"]
- v["ph%"] = 100 * v["phit"] / v["pread"] if v["pread"] > 0 else 0
+ v["ph%"] = 100 * v["phit"] // v["pread"] if v["pread"] > 0 else 0
v["pm%"] = 100 - v["ph%"] if v["pread"] > 0 else 0
v["mhit"] = (d["prefetch_metadata_hits"] +
- d["demand_metadata_hits"]) / sint
+ d["demand_metadata_hits"]) // sint
v["mmis"] = (d["prefetch_metadata_misses"] +
- d["demand_metadata_misses"]) / sint
+ d["demand_metadata_misses"]) // sint
v["mread"] = v["mhit"] + v["mmis"]
- v["mh%"] = 100 * v["mhit"] / v["mread"] if v["mread"] > 0 else 0
+ v["mh%"] = 100 * v["mhit"] // v["mread"] if v["mread"] > 0 else 0
v["mm%"] = 100 - v["mh%"] if v["mread"] > 0 else 0
v["arcsz"] = cur["size"]
+ v["size"] = cur["size"]
v["c"] = cur["c"]
- v["mfu"] = d["mfu_hits"] / sint
- v["mru"] = d["mru_hits"] / sint
- v["mrug"] = d["mru_ghost_hits"] / sint
- v["mfug"] = d["mfu_ghost_hits"] / sint
- v["eskip"] = d["evict_skip"] / sint
- v["mtxmis"] = d["mutex_miss"] / sint
+ v["mfu"] = d["mfu_hits"] // sint
+ v["mru"] = d["mru_hits"] // sint
+ v["mrug"] = d["mru_ghost_hits"] // sint
+ v["mfug"] = d["mfu_ghost_hits"] // sint
+ v["eskip"] = d["evict_skip"] // sint
+ v["el2skip"] = d["evict_l2_skip"] // sint
+ v["el2cach"] = d["evict_l2_cached"] // sint
+ v["el2el"] = d["evict_l2_eligible"] // sint
+ v["el2mfu"] = d["evict_l2_eligible_mfu"] // sint
+ v["el2mru"] = d["evict_l2_eligible_mru"] // sint
+ v["el2inel"] = d["evict_l2_ineligible"] // sint
+ v["mtxmis"] = d["mutex_miss"] // sint
if l2exist:
- v["l2hits"] = d["l2_hits"] / sint
- v["l2miss"] = d["l2_misses"] / sint
+ v["l2hits"] = d["l2_hits"] // sint
+ v["l2miss"] = d["l2_misses"] // sint
v["l2read"] = v["l2hits"] + v["l2miss"]
- v["l2hit%"] = 100 * v["l2hits"] / v["l2read"] if v["l2read"] > 0 else 0
+ v["l2hit%"] = 100 * v["l2hits"] // v["l2read"] if v["l2read"] > 0 else 0
v["l2miss%"] = 100 - v["l2hit%"] if v["l2read"] > 0 else 0
v["l2asize"] = cur["l2_asize"]
v["l2size"] = cur["l2_size"]
- v["l2bytes"] = d["l2_read_bytes"] / sint
+ v["l2bytes"] = d["l2_read_bytes"] // sint
+
+ v["l2pref"] = cur["l2_prefetch_asize"]
+ v["l2mfu"] = cur["l2_mfu_asize"]
+ v["l2mru"] = cur["l2_mru_asize"]
+ v["l2data"] = cur["l2_bufc_data_asize"]
+ v["l2meta"] = cur["l2_bufc_metadata_asize"]
+ v["l2pref%"] = 100 * v["l2pref"] // v["l2asize"]
+ v["l2mfu%"] = 100 * v["l2mfu"] // v["l2asize"]
+ v["l2mru%"] = 100 * v["l2mru"] // v["l2asize"]
+ v["l2data%"] = 100 * v["l2data"] // v["l2asize"]
+ v["l2meta%"] = 100 * v["l2meta"] // v["l2asize"]
v["grow"] = 0 if cur["arc_no_grow"] else 1
v["need"] = cur["arc_need_free"]
- v["free"] = cur["arc_sys_free"]
+ v["free"] = cur["memory_free_bytes"]
+ v["avail"] = cur["memory_available_bytes"]
+ v["waste"] = cur["abd_chunk_waste_size"]
def main():
diff --git a/cmd/dbufstat/.gitignore b/cmd/dbufstat/.gitignore
new file mode 100644
index 0000000000..2c2e913cef
--- /dev/null
+++ b/cmd/dbufstat/.gitignore
@@ -0,0 +1 @@
+dbufstat
diff --git a/cmd/dbufstat/Makefile.am b/cmd/dbufstat/Makefile.am
index 968a760779..e672a01a42 100644
--- a/cmd/dbufstat/Makefile.am
+++ b/cmd/dbufstat/Makefile.am
@@ -1,13 +1,5 @@
-dist_bin_SCRIPTS = dbufstat
+include $(top_srcdir)/config/Substfiles.am
-#
-# The dbufstat script is compatibile with both Python 2.6 and 3.4.
-# As such the python 3 shebang can be replaced at install time when
-# targeting a python 2 system. This allows us to maintain a single
-# version of the source.
-#
-if USING_PYTHON_2
-install-exec-hook:
- sed --in-place 's|^#!/usr/bin/python3|#!/usr/bin/python2|' \
- $(DESTDIR)$(bindir)/dbufstat
-endif
+bin_SCRIPTS = dbufstat
+
+SUBSTFILES += $(bin_SCRIPTS)
diff --git a/cmd/dbufstat/dbufstat b/cmd/dbufstat/dbufstat.in
similarity index 97%
rename from cmd/dbufstat/dbufstat
rename to cmd/dbufstat/dbufstat.in
index e6c947fbcb..82250353f5 100755
--- a/cmd/dbufstat/dbufstat
+++ b/cmd/dbufstat/dbufstat.in
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env @PYTHON_SHEBANG@
#
# Print out statistics for all cached dmu buffers. This information
# is available through the dbufs kstat and may be post-processed as
@@ -113,10 +113,25 @@ cmd = ("Usage: dbufstat [-bdhnrtvx] [-i file] [-f fields] [-o file] "
raw = 0
+if sys.platform.startswith("freebsd"):
+ import io
+ # Requires py-sysctl on FreeBSD
+ import sysctl
+
+ def default_ifile():
+ dbufs = sysctl.filter("kstat.zfs.misc.dbufs")[0].value
+ sys.stdin = io.StringIO(dbufs)
+ return "-"
+
+elif sys.platform.startswith("linux"):
+ def default_ifile():
+ return "/proc/spl/kstat/zfs/dbufs"
+
+
def print_incompat_helper(incompat):
cnt = 0
for key in sorted(incompat):
- if cnt is 0:
+ if cnt == 0:
sys.stderr.write("\t")
elif cnt > 8:
sys.stderr.write(",\n\t")
@@ -343,7 +358,7 @@ def get_compstring(c):
"ZIO_COMPRESS_GZIP_6", "ZIO_COMPRESS_GZIP_7",
"ZIO_COMPRESS_GZIP_8", "ZIO_COMPRESS_GZIP_9",
"ZIO_COMPRESS_ZLE", "ZIO_COMPRESS_LZ4",
- "ZIO_COMPRESS_FUNCTION"]
+ "ZIO_COMPRESS_ZSTD", "ZIO_COMPRESS_FUNCTION"]
# If "-rr" option is used, don't convert to string representation
if raw > 1:
@@ -645,9 +660,9 @@ def main():
sys.exit(1)
if not ifile:
- ifile = '/proc/spl/kstat/zfs/dbufs'
+ ifile = default_ifile()
- if ifile is not "-":
+ if ifile != "-":
try:
tmp = open(ifile, "r")
sys.stdin = tmp
diff --git a/cmd/fsck_zfs/.gitignore b/cmd/fsck_zfs/.gitignore
new file mode 100644
index 0000000000..0edf0309e9
--- /dev/null
+++ b/cmd/fsck_zfs/.gitignore
@@ -0,0 +1 @@
+/fsck.zfs
diff --git a/cmd/fsck_zfs/Makefile.am b/cmd/fsck_zfs/Makefile.am
index 2380f56fa4..f8139f117f 100644
--- a/cmd/fsck_zfs/Makefile.am
+++ b/cmd/fsck_zfs/Makefile.am
@@ -1 +1,6 @@
+include $(top_srcdir)/config/Substfiles.am
+include $(top_srcdir)/config/Shellcheck.am
+
dist_sbin_SCRIPTS = fsck.zfs
+
+SUBSTFILES += $(dist_sbin_SCRIPTS)
diff --git a/cmd/fsck_zfs/fsck.zfs b/cmd/fsck_zfs/fsck.zfs
deleted file mode 100755
index f1685db652..0000000000
--- a/cmd/fsck_zfs/fsck.zfs
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/sh
-#
-# fsck.zfs: A fsck helper to accomidate distributions that expect
-# to be able to execute a fsck on all filesystem types. Currently
-# this script does nothing but it could be extended to act as a
-# compatibility wrapper for 'zpool scrub'.
-#
-
-exit 0
diff --git a/cmd/fsck_zfs/fsck.zfs.in b/cmd/fsck_zfs/fsck.zfs.in
new file mode 100755
index 0000000000..37096902cb
--- /dev/null
+++ b/cmd/fsck_zfs/fsck.zfs.in
@@ -0,0 +1,44 @@
+#!/bin/sh
+#
+# fsck.zfs: A fsck helper to accommodate distributions that expect
+# to be able to execute a fsck on all filesystem types.
+#
+# This script simply bubbles up some already-known-about errors,
+# see fsck.zfs(8)
+#
+
+if [ "$#" = "0" ]; then
+ echo "Usage: $0 [options] dataset…" >&2
+ exit 16
+fi
+
+ret=0
+for dataset in "$@"; do
+ case "$dataset" in
+ -*)
+ continue
+ ;;
+ *)
+ ;;
+ esac
+
+ pool="${dataset%%/*}"
+
+ case "$(@sbindir@/zpool list -Ho health "$pool")" in
+ DEGRADED)
+ ret=$(( ret | 4 ))
+ ;;
+ FAULTED)
+ awk '!/^([[:space:]]*#.*)?$/ && $1 == "'"$dataset"'" && $3 == "zfs" {exit 1}' /etc/fstab || \
+ ret=$(( ret | 8 ))
+ ;;
+ "")
+ # Pool not found, error printed by zpool(8)
+ ret=$(( ret | 8 ))
+ ;;
+ *)
+ ;;
+ esac
+done
+
+exit "$ret"
diff --git a/cmd/mount_zfs/Makefile.am b/cmd/mount_zfs/Makefile.am
index 7adedd63b6..3957602d27 100644
--- a/cmd/mount_zfs/Makefile.am
+++ b/cmd/mount_zfs/Makefile.am
@@ -1,9 +1,5 @@
include $(top_srcdir)/config/Rules.am
-DEFAULT_INCLUDES += \
- -I$(top_srcdir)/include \
- -I$(top_srcdir)/lib/libspl/include
-
#
# Ignore the prefix for the mount helper. It must be installed in /sbin/
# because this path is hardcoded in the mount(8) for security reasons.
@@ -17,5 +13,10 @@ mount_zfs_SOURCES = \
mount_zfs.c
mount_zfs_LDADD = \
- $(top_builddir)/lib/libnvpair/libnvpair.la \
- $(top_builddir)/lib/libzfs/libzfs.la
+ $(abs_top_builddir)/lib/libzfs/libzfs.la \
+ $(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+ $(abs_top_builddir)/lib/libnvpair/libnvpair.la
+
+mount_zfs_LDADD += $(LTLIBINTL)
+
+include $(top_srcdir)/config/CppCheck.am
diff --git a/cmd/mount_zfs/mount_zfs.c b/cmd/mount_zfs/mount_zfs.c
index a9b1e166b4..434d53cbad 100644
--- a/cmd/mount_zfs/mount_zfs.c
+++ b/cmd/mount_zfs/mount_zfs.c
@@ -42,247 +42,46 @@
libzfs_handle_t *g_zfs;
-typedef struct option_map {
- const char *name;
- unsigned long mntmask;
- unsigned long zfsmask;
-} option_map_t;
-
-static const option_map_t option_map[] = {
- /* Canonicalized filesystem independent options from mount(8) */
- { MNTOPT_NOAUTO, MS_COMMENT, ZS_COMMENT },
- { MNTOPT_DEFAULTS, MS_COMMENT, ZS_COMMENT },
- { MNTOPT_NODEVICES, MS_NODEV, ZS_COMMENT },
- { MNTOPT_DIRSYNC, MS_DIRSYNC, ZS_COMMENT },
- { MNTOPT_NOEXEC, MS_NOEXEC, ZS_COMMENT },
- { MNTOPT_GROUP, MS_GROUP, ZS_COMMENT },
- { MNTOPT_NETDEV, MS_COMMENT, ZS_COMMENT },
- { MNTOPT_NOFAIL, MS_COMMENT, ZS_COMMENT },
- { MNTOPT_NOSUID, MS_NOSUID, ZS_COMMENT },
- { MNTOPT_OWNER, MS_OWNER, ZS_COMMENT },
- { MNTOPT_REMOUNT, MS_REMOUNT, ZS_COMMENT },
- { MNTOPT_RO, MS_RDONLY, ZS_COMMENT },
- { MNTOPT_RW, MS_COMMENT, ZS_COMMENT },
- { MNTOPT_SYNC, MS_SYNCHRONOUS, ZS_COMMENT },
- { MNTOPT_USER, MS_USERS, ZS_COMMENT },
- { MNTOPT_USERS, MS_USERS, ZS_COMMENT },
- /* acl flags passed with util-linux-2.24 mount command */
- { MNTOPT_ACL, MS_POSIXACL, ZS_COMMENT },
- { MNTOPT_NOACL, MS_COMMENT, ZS_COMMENT },
- { MNTOPT_POSIXACL, MS_POSIXACL, ZS_COMMENT },
-#ifdef MS_NOATIME
- { MNTOPT_NOATIME, MS_NOATIME, ZS_COMMENT },
-#endif
-#ifdef MS_NODIRATIME
- { MNTOPT_NODIRATIME, MS_NODIRATIME, ZS_COMMENT },
-#endif
-#ifdef MS_RELATIME
- { MNTOPT_RELATIME, MS_RELATIME, ZS_COMMENT },
-#endif
-#ifdef MS_STRICTATIME
- { MNTOPT_STRICTATIME, MS_STRICTATIME, ZS_COMMENT },
-#endif
-#ifdef MS_LAZYTIME
- { MNTOPT_LAZYTIME, MS_LAZYTIME, ZS_COMMENT },
-#endif
- { MNTOPT_CONTEXT, MS_COMMENT, ZS_COMMENT },
- { MNTOPT_FSCONTEXT, MS_COMMENT, ZS_COMMENT },
- { MNTOPT_DEFCONTEXT, MS_COMMENT, ZS_COMMENT },
- { MNTOPT_ROOTCONTEXT, MS_COMMENT, ZS_COMMENT },
-#ifdef MS_I_VERSION
- { MNTOPT_IVERSION, MS_I_VERSION, ZS_COMMENT },
-#endif
-#ifdef MS_MANDLOCK
- { MNTOPT_NBMAND, MS_MANDLOCK, ZS_COMMENT },
-#endif
- /* Valid options not found in mount(8) */
- { MNTOPT_BIND, MS_BIND, ZS_COMMENT },
-#ifdef MS_REC
- { MNTOPT_RBIND, MS_BIND|MS_REC, ZS_COMMENT },
-#endif
- { MNTOPT_COMMENT, MS_COMMENT, ZS_COMMENT },
-#ifdef MS_NOSUB
- { MNTOPT_NOSUB, MS_NOSUB, ZS_COMMENT },
-#endif
-#ifdef MS_SILENT
- { MNTOPT_QUIET, MS_SILENT, ZS_COMMENT },
-#endif
- /* Custom zfs options */
- { MNTOPT_XATTR, MS_COMMENT, ZS_COMMENT },
- { MNTOPT_NOXATTR, MS_COMMENT, ZS_COMMENT },
- { MNTOPT_ZFSUTIL, MS_COMMENT, ZS_ZFSUTIL },
- { NULL, 0, 0 } };
-
/*
- * Break the mount option in to a name/value pair. The name is
- * validated against the option map and mount flags set accordingly.
+ * Opportunistically convert a target string into a pool name. If the
+ * string does not represent a block device with a valid zfs label
+ * then it is passed through without modification.
*/
-static int
-parse_option(char *mntopt, unsigned long *mntflags,
- unsigned long *zfsflags, int sloppy)
+static void
+parse_dataset(const char *target, char **dataset)
{
- const option_map_t *opt;
- char *ptr, *name, *value = NULL;
- int error = 0;
-
- name = strdup(mntopt);
- if (name == NULL)
- return (ENOMEM);
-
- for (ptr = name; ptr && *ptr; ptr++) {
- if (*ptr == '=') {
- *ptr = '\0';
- value = ptr+1;
- VERIFY3P(value, !=, NULL);
- break;
- }
- }
-
- for (opt = option_map; opt->name != NULL; opt++) {
- if (strncmp(name, opt->name, strlen(name)) == 0) {
- *mntflags |= opt->mntmask;
- *zfsflags |= opt->zfsmask;
- error = 0;
- goto out;
- }
- }
-
- if (!sloppy)
- error = ENOENT;
-out:
- /* If required further process on the value may be done here */
- free(name);
- return (error);
-}
-
-/*
- * Translate the mount option string in to MS_* mount flags for the
- * kernel vfs. When sloppy is non-zero unknown options will be ignored
- * otherwise they are considered fatal are copied in to badopt.
- */
-static int
-parse_options(char *mntopts, unsigned long *mntflags, unsigned long *zfsflags,
- int sloppy, char *badopt, char *mtabopt)
-{
- int error = 0, quote = 0, flag = 0, count = 0;
- char *ptr, *opt, *opts;
-
- opts = strdup(mntopts);
- if (opts == NULL)
- return (ENOMEM);
-
- *mntflags = 0;
- opt = NULL;
-
/*
- * Scan through all mount options which must be comma delimited.
- * We must be careful to notice regions which are double quoted
- * and skip commas in these regions. Each option is then checked
- * to determine if it is a known option.
+ * Prior to util-linux 2.36.2, if a file or directory in the
+ * current working directory was named 'dataset' then mount(8)
+ * would prepend the current working directory to the dataset.
+ * Check for it and strip the prepended path when it is added.
*/
- for (ptr = opts; ptr && !flag; ptr++) {
- if (opt == NULL)
- opt = ptr;
-
- if (*ptr == '"')
- quote = !quote;
-
- if (quote)
- continue;
-
- if (*ptr == '\0')
- flag = 1;
-
- if ((*ptr == ',') || (*ptr == '\0')) {
- *ptr = '\0';
-
- error = parse_option(opt, mntflags, zfsflags, sloppy);
- if (error) {
- strcpy(badopt, opt);
- goto out;
-
- }
-
- if (!(*mntflags & MS_REMOUNT) &&
- !(*zfsflags & ZS_ZFSUTIL)) {
- if (count > 0)
- strlcat(mtabopt, ",", MNT_LINE_MAX);
-
- strlcat(mtabopt, opt, MNT_LINE_MAX);
- count++;
- }
-
- opt = NULL;
- }
- }
-
-out:
- free(opts);
- return (error);
-}
-
-/*
- * Return the pool/dataset to mount given the name passed to mount. This
- * is expected to be of the form pool/dataset, however may also refer to
- * a block device if that device contains a valid zfs label.
- */
-static char *
-parse_dataset(char *dataset)
-{
char cwd[PATH_MAX];
- struct stat64 statbuf;
- int error;
- int len;
-
- /*
- * We expect a pool/dataset to be provided, however if we're
- * given a device which is a member of a zpool we attempt to
- * extract the pool name stored in the label. Given the pool
- * name we can mount the root dataset.
- */
- error = stat64(dataset, &statbuf);
- if (error == 0) {
- nvlist_t *config;
- char *name;
- int fd;
-
- fd = open(dataset, O_RDONLY);
- if (fd < 0)
- goto out;
-
- error = zpool_read_label(fd, &config, NULL);
- (void) close(fd);
- if (error)
- goto out;
-
- error = nvlist_lookup_string(config,
- ZPOOL_CONFIG_POOL_NAME, &name);
- if (error) {
- nvlist_free(config);
- } else {
- dataset = strdup(name);
- nvlist_free(config);
- return (dataset);
- }
+ if (getcwd(cwd, PATH_MAX) == NULL) {
+ perror("getcwd");
+ return;
}
-out:
- /*
- * If a file or directory in your current working directory is
- * named 'dataset' then mount(8) will prepend your current working
- * directory to the dataset. There is no way to prevent this
- * behavior so we simply check for it and strip the prepended
- * patch when it is added.
- */
- if (getcwd(cwd, PATH_MAX) == NULL)
- return (dataset);
+ int len = strlen(cwd);
+ if (strncmp(cwd, target, len) == 0)
+ target += len;
- len = strlen(cwd);
+ /* Assume pool/dataset is more likely */
+ strlcpy(*dataset, target, PATH_MAX);
- /* Do not add one when cwd already ends in a trailing '/' */
- if (strncmp(cwd, dataset, len) == 0)
- return (dataset + len + (cwd[len-1] != '/'));
+ int fd = open(target, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return;
- return (dataset);
+ nvlist_t *cfg = NULL;
+ if (zpool_read_label(fd, &cfg, NULL) == 0) {
+ char *nm = NULL;
+ if (!nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &nm))
+ strlcpy(*dataset, nm, PATH_MAX);
+ nvlist_free(cfg);
+ }
+
+ if (close(fd))
+ perror("close");
}
/*
@@ -326,8 +125,8 @@ mtab_update(char *dataset, char *mntpoint, char *type, char *mntopts)
if (!fp) {
(void) fprintf(stderr, gettext(
"filesystem '%s' was mounted, but /etc/mtab "
- "could not be opened due to error %d\n"),
- dataset, errno);
+ "could not be opened due to error: %s\n"),
+ dataset, strerror(errno));
return (MOUNT_FILEIO);
}
@@ -335,8 +134,8 @@ mtab_update(char *dataset, char *mntpoint, char *type, char *mntopts)
if (error) {
(void) fprintf(stderr, gettext(
"filesystem '%s' was mounted, but /etc/mtab "
- "could not be updated due to error %d\n"),
- dataset, errno);
+ "could not be updated due to error: %s\n"),
+ dataset, strerror(errno));
return (MOUNT_FILEIO);
}
@@ -345,34 +144,6 @@ mtab_update(char *dataset, char *mntpoint, char *type, char *mntopts)
return (MOUNT_SUCCESS);
}
-static void
-append_mntopt(const char *name, const char *val, char *mntopts,
- char *mtabopt, boolean_t quote)
-{
- char tmp[MNT_LINE_MAX];
-
- snprintf(tmp, MNT_LINE_MAX, quote ? ",%s=\"%s\"" : ",%s=%s", name, val);
-
- if (mntopts)
- strlcat(mntopts, tmp, MNT_LINE_MAX);
-
- if (mtabopt)
- strlcat(mtabopt, tmp, MNT_LINE_MAX);
-}
-
-static void
-zfs_selinux_setcontext(zfs_handle_t *zhp, zfs_prop_t zpt, const char *name,
- char *mntopts, char *mtabopt)
-{
- char context[ZFS_MAXPROPLEN];
-
- if (zfs_prop_get(zhp, zpt, context, sizeof (context),
- NULL, NULL, 0, B_FALSE) == 0) {
- if (strcmp(context, "none") != 0)
- append_mntopt(name, context, mntopts, mtabopt, B_TRUE);
- }
-}
-
int
main(int argc, char **argv)
{
@@ -383,12 +154,13 @@ main(int argc, char **argv)
char badopt[MNT_LINE_MAX] = { '\0' };
char mtabopt[MNT_LINE_MAX] = { '\0' };
char mntpoint[PATH_MAX];
- char *dataset;
+ char dataset[PATH_MAX], *pdataset = dataset;
unsigned long mntflags = 0, zfsflags = 0, remount = 0;
int sloppy = 0, fake = 0, verbose = 0, nomtab = 0, zfsutil = 0;
int error, c;
(void) setlocale(LC_ALL, "");
+ (void) setlocale(LC_NUMERIC, "C");
(void) textdomain(TEXT_DOMAIN);
opterr = 0;
@@ -413,10 +185,11 @@ main(int argc, char **argv)
break;
case 'h':
case '?':
- (void) fprintf(stderr, gettext("Invalid option '%c'\n"),
- optopt);
+ if (optopt)
+ (void) fprintf(stderr,
+ gettext("Invalid option '%c'\n"), optopt);
(void) fprintf(stderr, gettext("Usage: mount.zfs "
- "[-sfnv] [-o options] \n"));
+ "[-sfnvh] [-o options] \n"));
return (MOUNT_USAGE);
}
}
@@ -438,18 +211,18 @@ main(int argc, char **argv)
return (MOUNT_USAGE);
}
- dataset = parse_dataset(argv[0]);
+ parse_dataset(argv[0], &pdataset);
/* canonicalize the mount point */
if (realpath(argv[1], mntpoint) == NULL) {
(void) fprintf(stderr, gettext("filesystem '%s' cannot be "
- "mounted at '%s' due to canonicalization error %d.\n"),
- dataset, argv[1], errno);
+ "mounted at '%s' due to canonicalization error: %s\n"),
+ dataset, argv[1], strerror(errno));
return (MOUNT_SYSERR);
}
/* validate mount options and set mntflags */
- error = parse_options(mntopts, &mntflags, &zfsflags, sloppy,
+ error = zfs_parse_mount_options(mntopts, &mntflags, &zfsflags, sloppy,
badopt, mtabopt);
if (error) {
switch (error) {
@@ -489,7 +262,7 @@ main(int argc, char **argv)
zfsutil = 1;
if ((g_zfs = libzfs_init()) == NULL) {
- (void) fprintf(stderr, "%s", libzfs_error_init(errno));
+ (void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
return (MOUNT_SYSERR);
}
@@ -502,32 +275,7 @@ main(int argc, char **argv)
return (MOUNT_USAGE);
}
- /*
- * Checks to see if the ZFS_PROP_SELINUX_CONTEXT exists
- * if it does, create a tmp variable in case it's needed
- * checks to see if the selinux context is set to the default
- * if it is, allow the setting of the other context properties
- * this is needed because the 'context' property overrides others
- * if it is not the default, set the 'context' property
- */
- if (zfs_prop_get(zhp, ZFS_PROP_SELINUX_CONTEXT, prop, sizeof (prop),
- NULL, NULL, 0, B_FALSE) == 0) {
- if (strcmp(prop, "none") == 0) {
- zfs_selinux_setcontext(zhp, ZFS_PROP_SELINUX_FSCONTEXT,
- MNTOPT_FSCONTEXT, mntopts, mtabopt);
- zfs_selinux_setcontext(zhp, ZFS_PROP_SELINUX_DEFCONTEXT,
- MNTOPT_DEFCONTEXT, mntopts, mtabopt);
- zfs_selinux_setcontext(zhp,
- ZFS_PROP_SELINUX_ROOTCONTEXT, MNTOPT_ROOTCONTEXT,
- mntopts, mtabopt);
- } else {
- append_mntopt(MNTOPT_CONTEXT, prop,
- mntopts, mtabopt, B_TRUE);
- }
- }
-
- /* A hint used to determine an auto-mounted snapshot mount point */
- append_mntopt(MNTOPT_MNTPOINT, mntpoint, mntopts, NULL, B_FALSE);
+ zfs_adjust_mount_options(zhp, mntpoint, mntopts, mtabopt);
/* treat all snapshots as legacy mount points */
if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT)
@@ -620,8 +368,8 @@ main(int argc, char **argv)
"mount the filesystem again.\n"), dataset);
return (MOUNT_SYSERR);
}
- /* fallthru */
#endif
+ fallthrough;
default:
(void) fprintf(stderr, gettext("filesystem "
"'%s' can not be mounted: %s\n"), dataset,
diff --git a/cmd/raidz_test/Makefile.am b/cmd/raidz_test/Makefile.am
index a394a0dde3..983ff25dc9 100644
--- a/cmd/raidz_test/Makefile.am
+++ b/cmd/raidz_test/Makefile.am
@@ -4,11 +4,7 @@ include $(top_srcdir)/config/Rules.am
AM_CFLAGS += $(FRAME_LARGER_THAN)
# Unconditionally enable ASSERTs
-AM_CPPFLAGS += -DDEBUG -UNDEBUG
-
-DEFAULT_INCLUDES += \
- -I$(top_srcdir)/include \
- -I$(top_srcdir)/lib/libspl/include
+AM_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
bin_PROGRAMS = raidz_test
@@ -18,6 +14,9 @@ raidz_test_SOURCES = \
raidz_bench.c
raidz_test_LDADD = \
- $(top_builddir)/lib/libzpool/libzpool.la
+ $(abs_top_builddir)/lib/libzpool/libzpool.la \
+ $(abs_top_builddir)/lib/libzfs_core/libzfs_core.la
-raidz_test_LDADD += -lm -ldl
+raidz_test_LDADD += -lm
+
+include $(top_srcdir)/config/CppCheck.am
diff --git a/cmd/raidz_test/raidz_bench.c b/cmd/raidz_test/raidz_bench.c
index 4863b8d97b..f44d6fbde7 100644
--- a/cmd/raidz_test/raidz_bench.c
+++ b/cmd/raidz_test/raidz_bench.c
@@ -31,8 +31,6 @@
#include
#include
-#include
-
#include "raidz_test.h"
#define GEN_BENCH_MEMORY (((uint64_t)1ULL)<<32)
@@ -83,8 +81,17 @@ run_gen_bench_impl(const char *impl)
/* create suitable raidz_map */
ncols = rto_opts.rto_dcols + fn + 1;
zio_bench.io_size = 1ULL << ds;
- rm_bench = vdev_raidz_map_alloc(&zio_bench,
- BENCH_ASHIFT, ncols, fn+1);
+
+ if (rto_opts.rto_expand) {
+ rm_bench = vdev_raidz_map_alloc_expanded(
+ zio_bench.io_abd,
+ zio_bench.io_size, zio_bench.io_offset,
+ rto_opts.rto_ashift, ncols+1, ncols,
+ fn+1, rto_opts.rto_expand_offset);
+ } else {
+ rm_bench = vdev_raidz_map_alloc(&zio_bench,
+ BENCH_ASHIFT, ncols, fn+1);
+ }
/* estimate iteration count */
iter_cnt = GEN_BENCH_MEMORY;
@@ -113,7 +120,7 @@ run_gen_bench_impl(const char *impl)
}
}
-void
+static void
run_gen_bench(void)
{
char **impl_name;
@@ -163,8 +170,16 @@ run_rec_bench_impl(const char *impl)
(1ULL << BENCH_ASHIFT))
continue;
- rm_bench = vdev_raidz_map_alloc(&zio_bench,
- BENCH_ASHIFT, ncols, PARITY_PQR);
+ if (rto_opts.rto_expand) {
+ rm_bench = vdev_raidz_map_alloc_expanded(
+ zio_bench.io_abd,
+ zio_bench.io_size, zio_bench.io_offset,
+ BENCH_ASHIFT, ncols+1, ncols,
+ PARITY_PQR, rto_opts.rto_expand_offset);
+ } else {
+ rm_bench = vdev_raidz_map_alloc(&zio_bench,
+ BENCH_ASHIFT, ncols, PARITY_PQR);
+ }
/* estimate iteration count */
iter_cnt = (REC_BENCH_MEMORY);
@@ -197,7 +212,7 @@ run_rec_bench_impl(const char *impl)
}
}
-void
+static void
run_rec_bench(void)
{
char **impl_name;
diff --git a/cmd/raidz_test/raidz_test.c b/cmd/raidz_test/raidz_test.c
index a05070399c..c1610a8d1b 100644
--- a/cmd/raidz_test/raidz_test.c
+++ b/cmd/raidz_test/raidz_test.c
@@ -37,11 +37,11 @@
static int *rand_data;
raidz_test_opts_t rto_opts;
-static char gdb[256];
-static const char gdb_tmpl[] = "gdb -ex \"set pagination 0\" -p %d";
+static char pid_s[16];
static void sig_handler(int signo)
{
+ int old_errno = errno;
struct sigaction action;
/*
* Restore default action and re-raise signal so SIGSEGV and
@@ -52,10 +52,19 @@ static void sig_handler(int signo)
action.sa_flags = 0;
(void) sigaction(signo, &action, NULL);
- if (rto_opts.rto_gdb)
- if (system(gdb)) { }
+ if (rto_opts.rto_gdb) {
+ pid_t pid = fork();
+ if (pid == 0) {
+ execlp("gdb", "gdb", "-ex", "set pagination 0",
+ "-p", pid_s, NULL);
+ _exit(-1);
+ } else if (pid > 0)
+ while (waitpid(pid, NULL, 0) == -1 && errno == EINTR)
+ ;
+ }
raise(signo);
+ errno = old_errno;
}
static void print_opts(raidz_test_opts_t *opts, boolean_t force)
@@ -77,16 +86,20 @@ static void print_opts(raidz_test_opts_t *opts, boolean_t force)
(void) fprintf(stdout, DBLSEP "Running with options:\n"
" (-a) zio ashift : %zu\n"
" (-o) zio offset : 1 << %zu\n"
+ " (-e) expanded map : %s\n"
+ " (-r) reflow offset : %llx\n"
" (-d) number of raidz data columns : %zu\n"
" (-s) size of DATA : 1 << %zu\n"
" (-S) sweep parameters : %s \n"
" (-v) verbose : %s \n\n",
- opts->rto_ashift, /* -a */
- ilog2(opts->rto_offset), /* -o */
- opts->rto_dcols, /* -d */
- ilog2(opts->rto_dsize), /* -s */
- opts->rto_sweep ? "yes" : "no", /* -S */
- verbose); /* -v */
+ opts->rto_ashift, /* -a */
+ ilog2(opts->rto_offset), /* -o */
+ opts->rto_expand ? "yes" : "no", /* -e */
+ (u_longlong_t)opts->rto_expand_offset, /* -r */
+ opts->rto_dcols, /* -d */
+ ilog2(opts->rto_dsize), /* -s */
+ opts->rto_sweep ? "yes" : "no", /* -S */
+ verbose); /* -v */
}
}
@@ -104,6 +117,8 @@ static void usage(boolean_t requested)
"\t[-S parameter sweep (default: %s)]\n"
"\t[-t timeout for parameter sweep test]\n"
"\t[-B benchmark all raidz implementations]\n"
+ "\t[-e use expanded raidz map (default: %s)]\n"
+ "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
"\t[-v increase verbosity (default: %zu)]\n"
"\t[-h (print help)]\n"
"\t[-T test the test, see if failure would be detected]\n"
@@ -114,6 +129,8 @@ static void usage(boolean_t requested)
o->rto_dcols, /* -d */
ilog2(o->rto_dsize), /* -s */
rto_opts.rto_sweep ? "yes" : "no", /* -S */
+ rto_opts.rto_expand ? "yes" : "no", /* -e */
+ (u_longlong_t)o->rto_expand_offset, /* -r */
o->rto_v); /* -d */
exit(requested ? 0 : 1);
@@ -128,7 +145,7 @@ static void process_options(int argc, char **argv)
bcopy(&rto_opts_defaults, o, sizeof (*o));
- while ((opt = getopt(argc, argv, "TDBSvha:o:d:s:t:")) != -1) {
+ while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
value = 0;
switch (opt) {
@@ -136,6 +153,12 @@ static void process_options(int argc, char **argv)
value = strtoull(optarg, NULL, 0);
o->rto_ashift = MIN(13, MAX(9, value));
break;
+ case 'e':
+ o->rto_expand = 1;
+ break;
+ case 'r':
+ o->rto_expand_offset = strtoull(optarg, NULL, 0);
+ break;
case 'o':
value = strtoull(optarg, NULL, 0);
o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
@@ -179,25 +202,34 @@ static void process_options(int argc, char **argv)
}
}
-#define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_abd)
-#define DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size)
+#define DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
+#define DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
-#define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_abd)
-#define CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size)
+#define CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
+#define CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
static int
cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
{
- int i, ret = 0;
+ int r, i, ret = 0;
VERIFY(parity >= 1 && parity <= 3);
- for (i = 0; i < parity; i++) {
- if (abd_cmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i))
- != 0) {
- ret++;
- LOG_OPT(D_DEBUG, opts,
- "\nParity block [%d] different!\n", i);
+ for (r = 0; r < rm->rm_nrows; r++) {
+ raidz_row_t * const rr = rm->rm_row[r];
+ raidz_row_t * const rrg = opts->rm_golden->rm_row[r];
+ for (i = 0; i < parity; i++) {
+ if (CODE_COL_SIZE(rrg, i) == 0) {
+ VERIFY0(CODE_COL_SIZE(rr, i));
+ continue;
+ }
+
+ if (abd_cmp(CODE_COL(rr, i),
+ CODE_COL(rrg, i)) != 0) {
+ ret++;
+ LOG_OPT(D_DEBUG, opts,
+ "\nParity block [%d] different!\n", i);
+ }
}
}
return (ret);
@@ -206,16 +238,26 @@ cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
static int
cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
{
- int i, ret = 0;
- int dcols = opts->rm_golden->rm_cols - raidz_parity(opts->rm_golden);
+ int r, i, dcols, ret = 0;
- for (i = 0; i < dcols; i++) {
- if (abd_cmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i))
- != 0) {
- ret++;
+ for (r = 0; r < rm->rm_nrows; r++) {
+ raidz_row_t *rr = rm->rm_row[r];
+ raidz_row_t *rrg = opts->rm_golden->rm_row[r];
+ dcols = opts->rm_golden->rm_row[0]->rr_cols -
+ raidz_parity(opts->rm_golden);
+ for (i = 0; i < dcols; i++) {
+ if (DATA_COL_SIZE(rrg, i) == 0) {
+ VERIFY0(DATA_COL_SIZE(rr, i));
+ continue;
+ }
- LOG_OPT(D_DEBUG, opts,
- "\nData block [%d] different!\n", i);
+ if (abd_cmp(DATA_COL(rrg, i),
+ DATA_COL(rr, i)) != 0) {
+ ret++;
+
+ LOG_OPT(D_DEBUG, opts,
+ "\nData block [%d] different!\n", i);
+ }
}
}
return (ret);
@@ -236,12 +278,13 @@ init_rand(void *data, size_t size, void *private)
static void
corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
{
- int i;
- raidz_col_t *col;
-
- for (i = 0; i < cnt; i++) {
- col = &rm->rm_col[tgts[i]];
- abd_iterate_func(col->rc_abd, 0, col->rc_size, init_rand, NULL);
+ for (int r = 0; r < rm->rm_nrows; r++) {
+ raidz_row_t *rr = rm->rm_row[r];
+ for (int i = 0; i < cnt; i++) {
+ raidz_col_t *col = &rr->rr_col[tgts[i]];
+ abd_iterate_func(col->rc_abd, 0, col->rc_size,
+ init_rand, NULL);
+ }
}
}
@@ -288,10 +331,22 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
VERIFY0(vdev_raidz_impl_set("original"));
- opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
- opts->rto_ashift, total_ncols, parity);
- rm_test = vdev_raidz_map_alloc(zio_test,
- opts->rto_ashift, total_ncols, parity);
+ if (opts->rto_expand) {
+ opts->rm_golden =
+ vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd,
+ opts->zio_golden->io_size, opts->zio_golden->io_offset,
+ opts->rto_ashift, total_ncols+1, total_ncols,
+ parity, opts->rto_expand_offset);
+ rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd,
+ zio_test->io_size, zio_test->io_offset,
+ opts->rto_ashift, total_ncols+1, total_ncols,
+ parity, opts->rto_expand_offset);
+ } else {
+ opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
+ opts->rto_ashift, total_ncols, parity);
+ rm_test = vdev_raidz_map_alloc(zio_test,
+ opts->rto_ashift, total_ncols, parity);
+ }
VERIFY(opts->zio_golden);
VERIFY(opts->rm_golden);
@@ -312,6 +367,187 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
return (err);
}
+/*
+ * If reflow is not in progress, reflow_offset should be UINT64_MAX.
+ * For each row, if the row is entirely before reflow_offset, it will
+ * come from the new location. Otherwise this row will come from the
+ * old location. Therefore, rows that straddle the reflow_offset will
+ * come from the old location.
+ *
+ * NOTE: Until raidz expansion is implemented this function is only
+ * needed by raidz_test.c to the multi-row raid_map_t functionality.
+ */
+raidz_map_t *
+vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
+ uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols,
+ uint64_t nparity, uint64_t reflow_offset)
+{
+ /* The zio's size in units of the vdev's minimum sector size. */
+ uint64_t s = size >> ashift;
+ uint64_t q, r, bc, devidx, asize = 0, tot;
+
+ /*
+ * "Quotient": The number of data sectors for this stripe on all but
+ * the "big column" child vdevs that also contain "remainder" data.
+ * AKA "full rows"
+ */
+ q = s / (logical_cols - nparity);
+
+ /*
+ * "Remainder": The number of partial stripe data sectors in this I/O.
+ * This will add a sector to some, but not all, child vdevs.
+ */
+ r = s - q * (logical_cols - nparity);
+
+ /* The number of "big columns" - those which contain remainder data. */
+ bc = (r == 0 ? 0 : r + nparity);
+
+ /*
+ * The total number of data and parity sectors associated with
+ * this I/O.
+ */
+ tot = s + nparity * (q + (r == 0 ? 0 : 1));
+
+ /* How many rows contain data (not skip) */
+ uint64_t rows = howmany(tot, logical_cols);
+ int cols = MIN(tot, logical_cols);
+
+ raidz_map_t *rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]),
+ KM_SLEEP);
+ rm->rm_nrows = rows;
+
+ for (uint64_t row = 0; row < rows; row++) {
+ raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t,
+ rr_col[cols]), KM_SLEEP);
+ rm->rm_row[row] = rr;
+
+ /* The starting RAIDZ (parent) vdev sector of the row. */
+ uint64_t b = (offset >> ashift) + row * logical_cols;
+
+ /*
+ * If we are in the middle of a reflow, and any part of this
+ * row has not been copied, then use the old location of
+ * this row.
+ */
+ int row_phys_cols = physical_cols;
+ if (b + (logical_cols - nparity) > reflow_offset >> ashift)
+ row_phys_cols--;
+
+ /* starting child of this row */
+ uint64_t child_id = b % row_phys_cols;
+ /* The starting byte offset on each child vdev. */
+ uint64_t child_offset = (b / row_phys_cols) << ashift;
+
+ /*
+ * We set cols to the entire width of the block, even
+ * if this row is shorter. This is needed because parity
+ * generation (for Q and R) needs to know the entire width,
+ * because it treats the short row as though it was
+ * full-width (and the "phantom" sectors were zero-filled).
+ *
+ * Another approach to this would be to set cols shorter
+ * (to just the number of columns that we might do i/o to)
+ * and have another mechanism to tell the parity generation
+ * about the "entire width". Reconstruction (at least
+ * vdev_raidz_reconstruct_general()) would also need to
+ * know about the "entire width".
+ */
+ rr->rr_cols = cols;
+ rr->rr_bigcols = bc;
+ rr->rr_missingdata = 0;
+ rr->rr_missingparity = 0;
+ rr->rr_firstdatacol = nparity;
+ rr->rr_abd_empty = NULL;
+ rr->rr_nempty = 0;
+
+ for (int c = 0; c < rr->rr_cols; c++, child_id++) {
+ if (child_id >= row_phys_cols) {
+ child_id -= row_phys_cols;
+ child_offset += 1ULL << ashift;
+ }
+ rr->rr_col[c].rc_devidx = child_id;
+ rr->rr_col[c].rc_offset = child_offset;
+ rr->rr_col[c].rc_orig_data = NULL;
+ rr->rr_col[c].rc_error = 0;
+ rr->rr_col[c].rc_tried = 0;
+ rr->rr_col[c].rc_skipped = 0;
+ rr->rr_col[c].rc_need_orig_restore = B_FALSE;
+
+ uint64_t dc = c - rr->rr_firstdatacol;
+ if (c < rr->rr_firstdatacol) {
+ rr->rr_col[c].rc_size = 1ULL << ashift;
+ rr->rr_col[c].rc_abd =
+ abd_alloc_linear(rr->rr_col[c].rc_size,
+ B_TRUE);
+ } else if (row == rows - 1 && bc != 0 && c >= bc) {
+ /*
+ * Past the end, this for parity generation.
+ */
+ rr->rr_col[c].rc_size = 0;
+ rr->rr_col[c].rc_abd = NULL;
+ } else {
+ /*
+ * "data column" (col excluding parity)
+ * Add an ASCII art diagram here
+ */
+ uint64_t off;
+
+ if (c < bc || r == 0) {
+ off = dc * rows + row;
+ } else {
+ off = r * rows +
+ (dc - r) * (rows - 1) + row;
+ }
+ rr->rr_col[c].rc_size = 1ULL << ashift;
+ rr->rr_col[c].rc_abd = abd_get_offset_struct(
+ &rr->rr_col[c].rc_abdstruct,
+ abd, off << ashift, 1 << ashift);
+ }
+
+ asize += rr->rr_col[c].rc_size;
+ }
+ /*
+ * If all data stored spans all columns, there's a danger that
+ * parity will always be on the same device and, since parity
+ * isn't read during normal operation, that that device's I/O
+ * bandwidth won't be used effectively. We therefore switch
+ * the parity every 1MB.
+ *
+ * ...at least that was, ostensibly, the theory. As a practical
+ * matter unless we juggle the parity between all devices
+ * evenly, we won't see any benefit. Further, occasional writes
+ * that aren't a multiple of the LCM of the number of children
+ * and the minimum stripe width are sufficient to avoid pessimal
+ * behavior. Unfortunately, this decision created an implicit
+ * on-disk format requirement that we need to support for all
+ * eternity, but only for single-parity RAID-Z.
+ *
+ * If we intend to skip a sector in the zeroth column for
+ * padding we must make sure to note this swap. We will never
+ * intend to skip the first column since at least one data and
+ * one parity column must appear in each row.
+ */
+ if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 &&
+ (offset & (1ULL << 20))) {
+ ASSERT(rr->rr_cols >= 2);
+ ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size);
+ devidx = rr->rr_col[0].rc_devidx;
+ uint64_t o = rr->rr_col[0].rc_offset;
+ rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
+ rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset;
+ rr->rr_col[1].rc_devidx = devidx;
+ rr->rr_col[1].rc_offset = o;
+ }
+
+ }
+ ASSERT3U(asize, ==, tot << ashift);
+
+ /* init RAIDZ parity ops */
+ rm->rm_ops = vdev_raidz_math_get_ops();
+
+ return (rm);
+}
+
static raidz_map_t *
init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
{
@@ -330,8 +566,15 @@ init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
(*zio)->io_abd = raidz_alloc(alloc_dsize);
init_zio_abd(*zio);
- rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
- total_ncols, parity);
+ if (opts->rto_expand) {
+ rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd,
+ (*zio)->io_size, (*zio)->io_offset,
+ opts->rto_ashift, total_ncols+1, total_ncols,
+ parity, opts->rto_expand_offset);
+ } else {
+ rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
+ total_ncols, parity);
+ }
VERIFY(rm);
/* Make sure code columns are destroyed */
@@ -420,7 +663,7 @@ run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
if (fn < RAIDZ_REC_PQ) {
/* can reconstruct 1 failed data disk */
for (x0 = 0; x0 < opts->rto_dcols; x0++) {
- if (x0 >= rm->rm_cols - raidz_parity(rm))
+ if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
continue;
/* Check if should stop */
@@ -445,10 +688,11 @@ run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
} else if (fn < RAIDZ_REC_PQR) {
/* can reconstruct 2 failed data disk */
for (x0 = 0; x0 < opts->rto_dcols; x0++) {
- if (x0 >= rm->rm_cols - raidz_parity(rm))
+ if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
continue;
for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
- if (x1 >= rm->rm_cols - raidz_parity(rm))
+ if (x1 >= rm->rm_row[0]->rr_cols -
+ raidz_parity(rm))
continue;
/* Check if should stop */
@@ -475,14 +719,15 @@ run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
} else {
/* can reconstruct 3 failed data disk */
for (x0 = 0; x0 < opts->rto_dcols; x0++) {
- if (x0 >= rm->rm_cols - raidz_parity(rm))
+ if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
continue;
for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
- if (x1 >= rm->rm_cols - raidz_parity(rm))
+ if (x1 >= rm->rm_row[0]->rr_cols -
+ raidz_parity(rm))
continue;
for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
- if (x2 >=
- rm->rm_cols - raidz_parity(rm))
+ if (x2 >= rm->rm_row[0]->rr_cols -
+ raidz_parity(rm))
continue;
/* Check if should stop */
@@ -700,6 +945,8 @@ run_sweep(void)
opts->rto_dcols = dcols_v[d];
opts->rto_offset = (1 << ashift_v[a]) * rand();
opts->rto_dsize = size_v[s];
+ opts->rto_expand = rto_opts.rto_expand;
+ opts->rto_expand_offset = rto_opts.rto_expand_offset;
opts->rto_v = 0; /* be quiet */
VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
@@ -732,6 +979,7 @@ exit:
return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
}
+
int
main(int argc, char **argv)
{
@@ -739,8 +987,8 @@ main(int argc, char **argv)
struct sigaction action;
int err = 0;
- /* init gdb string early */
- (void) sprintf(gdb, gdb_tmpl, getpid());
+ /* init gdb pid string early */
+ (void) sprintf(pid_s, "%d", getpid());
action.sa_handler = sig_handler;
sigemptyset(&action.sa_mask);
@@ -757,7 +1005,7 @@ main(int argc, char **argv)
process_options(argc, argv);
- kernel_init(FREAD);
+ kernel_init(SPA_MODE_READ);
/* setup random data because rand() is not reentrant */
rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
diff --git a/cmd/raidz_test/raidz_test.h b/cmd/raidz_test/raidz_test.h
index a7fd26b8b2..0f7f4cee3e 100644
--- a/cmd/raidz_test/raidz_test.h
+++ b/cmd/raidz_test/raidz_test.h
@@ -38,18 +38,21 @@ static const char *raidz_impl_names[] = {
"avx512bw",
"aarch64_neon",
"aarch64_neonx2",
+ "powerpc_altivec",
NULL
};
typedef struct raidz_test_opts {
size_t rto_ashift;
- size_t rto_offset;
+ uint64_t rto_offset;
size_t rto_dcols;
size_t rto_dsize;
size_t rto_v;
size_t rto_sweep;
size_t rto_sweep_timeout;
size_t rto_benchmark;
+ size_t rto_expand;
+ uint64_t rto_expand_offset;
size_t rto_sanity;
size_t rto_gdb;
@@ -68,6 +71,8 @@ static const raidz_test_opts_t rto_opts_defaults = {
.rto_v = 0,
.rto_sweep = 0,
.rto_benchmark = 0,
+ .rto_expand = 0,
+ .rto_expand_offset = -1ULL,
.rto_sanity = 0,
.rto_gdb = 0,
.rto_should_stop = B_FALSE
@@ -112,4 +117,7 @@ void init_zio_abd(zio_t *zio);
void run_raidz_benchmark(void);
+struct raidz_map *vdev_raidz_map_alloc_expanded(abd_t *, uint64_t, uint64_t,
+ uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
+
#endif /* RAIDZ_TEST_H */
diff --git a/cmd/vdev_id/Makefile.am b/cmd/vdev_id/Makefile.am
index fb815faad0..4071c6d5ed 100644
--- a/cmd/vdev_id/Makefile.am
+++ b/cmd/vdev_id/Makefile.am
@@ -1 +1,3 @@
+include $(top_srcdir)/config/Shellcheck.am
+
dist_udev_SCRIPTS = vdev_id
diff --git a/cmd/vdev_id/vdev_id b/cmd/vdev_id/vdev_id
index 3796ab4885..8cc4399a56 100755
--- a/cmd/vdev_id/vdev_id
+++ b/cmd/vdev_id/vdev_id
@@ -79,6 +79,34 @@
# channel 86:00.0 1 A
# channel 86:00.0 0 B
+# #
+# # Example vdev_id.conf - multipath / multijbod-daisychaining
+# #
+#
+# multipath yes
+# multijbod yes
+#
+# # PCI_ID HBA PORT CHANNEL NAME
+# channel 85:00.0 1 A
+# channel 85:00.0 0 B
+# channel 86:00.0 1 A
+# channel 86:00.0 0 B
+
+# #
+# # Example vdev_id.conf - multipath / mixed
+# #
+#
+# multipath yes
+# slot mix
+#
+# # PCI_ID HBA PORT CHANNEL NAME
+# channel 85:00.0 3 A
+# channel 85:00.0 2 B
+# channel 86:00.0 3 A
+# channel 86:00.0 2 B
+# channel af:00.0 0 C
+# channel af:00.0 1 C
+
# #
# # Example vdev_id.conf - alias
# #
@@ -92,9 +120,10 @@ PATH=/bin:/sbin:/usr/bin:/usr/sbin
CONFIG=/etc/zfs/vdev_id.conf
PHYS_PER_PORT=
DEV=
-MULTIPATH=
TOPOLOGY=
BAY=
+ENCL_ID=""
+UNIQ_ENCL_ID=""
usage() {
cat << EOF
@@ -102,71 +131,153 @@ Usage: vdev_id [-h]
vdev_id <-d device> [-c config_file] [-p phys_per_port]
[-g sas_direct|sas_switch|scsi] [-m]
- -c specify name of alernate config file [default=$CONFIG]
+ -c specify name of an alternative config file [default=$CONFIG]
-d specify basename of device (i.e. sda)
-e Create enclose device symlinks only (/dev/by-enclosure)
-g Storage network topology [default="$TOPOLOGY"]
-m Run in multipath mode
+ -j Run in multijbod mode
-p number of phy's per switch port [default=$PHYS_PER_PORT]
-h show this summary
EOF
- exit 0
+ exit 1
+ # exit with error to avoid processing usage message by a udev rule
}
map_slot() {
- local LINUX_SLOT=$1
- local CHANNEL=$2
- local MAPPED_SLOT=
+ LINUX_SLOT=$1
+ CHANNEL=$2
- MAPPED_SLOT=`awk "\\$1 == \"slot\" && \\$2 == ${LINUX_SLOT} && \
- \\$4 ~ /^${CHANNEL}$|^$/ { print \\$3; exit }" $CONFIG`
+ MAPPED_SLOT=$(awk -v linux_slot="$LINUX_SLOT" -v channel="$CHANNEL" \
+ '$1 == "slot" && $2 == linux_slot && \
+ ($4 ~ "^"channel"$" || $4 ~ /^$/) { print $3; exit}' $CONFIG)
if [ -z "$MAPPED_SLOT" ] ; then
MAPPED_SLOT=$LINUX_SLOT
fi
- printf "%d" ${MAPPED_SLOT}
+ printf "%d" "${MAPPED_SLOT}"
}
map_channel() {
- local MAPPED_CHAN=
- local PCI_ID=$1
- local PORT=$2
+ MAPPED_CHAN=
+ PCI_ID=$1
+ PORT=$2
case $TOPOLOGY in
"sas_switch")
- MAPPED_CHAN=`awk "\\$1 == \"channel\" && \\$2 == ${PORT} \
- { print \\$3; exit }" $CONFIG`
+ MAPPED_CHAN=$(awk -v port="$PORT" \
+ '$1 == "channel" && $2 == port \
+ { print $3; exit }' $CONFIG)
;;
"sas_direct"|"scsi")
- MAPPED_CHAN=`awk "\\$1 == \"channel\" && \
- \\$2 == \"${PCI_ID}\" && \\$3 == ${PORT} \
- { print \\$4; exit }" $CONFIG`
+ MAPPED_CHAN=$(awk -v pciID="$PCI_ID" -v port="$PORT" \
+ '$1 == "channel" && $2 == pciID && $3 == port \
+ {print $4}' $CONFIG)
;;
esac
- printf "%s" ${MAPPED_CHAN}
+ printf "%s" "${MAPPED_CHAN}"
+}
+
+get_encl_id() {
+ set -- $(echo $1)
+ count=$#
+
+ i=1
+ while [ $i -le $count ] ; do
+ d=$(eval echo '$'{$i})
+ id=$(cat "/sys/class/enclosure/${d}/id")
+ ENCL_ID="${ENCL_ID} $id"
+ i=$((i + 1))
+ done
+}
+
+get_uniq_encl_id() {
+ for uuid in ${ENCL_ID}; do
+ found=0
+
+ for count in ${UNIQ_ENCL_ID}; do
+ if [ $count = $uuid ]; then
+ found=1
+ break
+ fi
+ done
+
+ if [ $found -eq 0 ]; then
+ UNIQ_ENCL_ID="${UNIQ_ENCL_ID} $uuid"
+ fi
+ done
+}
+
+# map_jbod explainer: The bsg driver knows the difference between a SAS
+# expander and fanout expander. Use hostX instance along with top-level
+# (whole enclosure) expander instances in /sys/class/enclosure and
+# matching a field in an array of expanders, using the index of the
+# matched array field as the enclosure instance, thereby making jbod IDs
+# dynamic. Avoids reliance on high overhead userspace commands like
+# multipath and lsscsi and instead uses existing sysfs data. $HOSTCHAN
+# variable derived from devpath gymnastics in sas_handler() function.
+map_jbod() {
+ DEVEXP=$(ls -l "/sys/block/$DEV/device/" | grep enclos | awk -F/ '{print $(NF-1) }')
+ DEV=$1
+
+ # Use "set --" to create index values (Arrays)
+ set -- $(ls -l /sys/class/enclosure | grep -v "^total" | awk '{print $9}')
+ # Get count of total elements
+ JBOD_COUNT=$#
+ JBOD_ITEM=$*
+
+ # Build JBODs (enclosure) id from sys/class/enclosure//id
+ get_encl_id "$JBOD_ITEM"
+ # Different expander instances for each paths.
+ # Filter out and keep only unique id.
+ get_uniq_encl_id
+
+ # Identify final 'mapped jbod'
+ j=0
+ for count in ${UNIQ_ENCL_ID}; do
+ i=1
+ j=$((j + 1))
+ while [ $i -le $JBOD_COUNT ] ; do
+ d=$(eval echo '$'{$i})
+ id=$(cat "/sys/class/enclosure/${d}/id")
+ if [ "$d" = "$DEVEXP" ] && [ $id = $count ] ; then
+ MAPPED_JBOD=$j
+ break
+ fi
+ i=$((i + 1))
+ done
+ done
+
+ printf "%d" "${MAPPED_JBOD}"
}
sas_handler() {
if [ -z "$PHYS_PER_PORT" ] ; then
- PHYS_PER_PORT=`awk "\\$1 == \"phys_per_port\" \
- {print \\$2; exit}" $CONFIG`
+ PHYS_PER_PORT=$(awk '$1 == "phys_per_port" \
+ {print $2; exit}' $CONFIG)
fi
PHYS_PER_PORT=${PHYS_PER_PORT:-4}
- if ! echo $PHYS_PER_PORT | grep -q -E '^[0-9]+$' ; then
+
+ if ! echo "$PHYS_PER_PORT" | grep -q -E '^[0-9]+$' ; then
echo "Error: phys_per_port value $PHYS_PER_PORT is non-numeric"
exit 1
fi
if [ -z "$MULTIPATH_MODE" ] ; then
- MULTIPATH_MODE=`awk "\\$1 == \"multipath\" \
- {print \\$2; exit}" $CONFIG`
+ MULTIPATH_MODE=$(awk '$1 == "multipath" \
+ {print $2; exit}' $CONFIG)
+ fi
+
+ if [ -z "$MULTIJBOD_MODE" ] ; then
+ MULTIJBOD_MODE=$(awk '$1 == "multijbod" \
+ {print $2; exit}' $CONFIG)
fi
# Use first running component device if we're handling a dm-mpath device
if [ "$MULTIPATH_MODE" = "yes" ] ; then
# If udev didn't tell us the UUID via DM_NAME, check /dev/mapper
if [ -z "$DM_NAME" ] ; then
- DM_NAME=`ls -l --full-time /dev/mapper |
- awk "/\/$DEV$/{print \\$9}"`
+ DM_NAME=$(ls -l --full-time /dev/mapper |
+ grep "$DEV"$ | awk '{print $9}')
fi
# For raw disks udev exports DEVTYPE=partition when
@@ -176,28 +287,50 @@ sas_handler() {
# we have to append the -part suffix directly in the
# helper.
if [ "$DEVTYPE" != "partition" ] ; then
- PART=`echo $DM_NAME | awk -Fp '/p/{print "-part"$2}'`
+ # Match p[number], remove the 'p' and prepend "-part"
+ PART=$(echo "$DM_NAME" |
+ awk 'match($0,/p[0-9]+$/) {print "-part"substr($0,RSTART+1,RLENGTH-1)}')
fi
# Strip off partition information.
- DM_NAME=`echo $DM_NAME | sed 's/p[0-9][0-9]*$//'`
+ DM_NAME=$(echo "$DM_NAME" | sed 's/p[0-9][0-9]*$//')
if [ -z "$DM_NAME" ] ; then
return
fi
- # Get the raw scsi device name from multipath -ll. Strip off
- # leading pipe symbols to make field numbering consistent.
- DEV=`multipath -ll $DM_NAME |
- awk '/running/{gsub("^[|]"," "); print $3 ; exit}'`
+ # Utilize DM device name to gather subordinate block devices
+ # using sysfs to avoid userspace utilities
+
+ # If our DEVNAME is something like /dev/dm-177, then we may be
+ # able to get our DMDEV from it.
+ DMDEV=$(echo $DEVNAME | sed 's;/dev/;;g')
+ if [ ! -e /sys/block/$DMDEV/slaves/* ] ; then
+ # It's not there, try looking in /dev/mapper
+ DMDEV=$(ls -l --full-time /dev/mapper | grep $DM_NAME |
+ awk '{gsub("../", " "); print $NF}')
+ fi
+
+ # Use sysfs pointers in /sys/block/dm-X/slaves because using
+ # userspace tools creates lots of overhead and should be avoided
+ # whenever possible. Use awk to isolate lowest instance of
+ # sd device member in dm device group regardless of string
+ # length.
+ DEV=$(ls "/sys/block/$DMDEV/slaves" | awk '
+ { len=sprintf ("%20s",length($0)); gsub(/ /,0,str); a[NR]=len "_" $0; }
+ END {
+ asort(a)
+ print substr(a[1],22)
+ }')
+
if [ -z "$DEV" ] ; then
return
fi
fi
- if echo $DEV | grep -q ^/devices/ ; then
+ if echo "$DEV" | grep -q ^/devices/ ; then
sys_path=$DEV
else
- sys_path=`udevadm info -q path -p /sys/block/$DEV 2>/dev/null`
+ sys_path=$(udevadm info -q path -p "/sys/block/$DEV" 2>/dev/null)
fi
# Use positional parameters as an ad-hoc array
@@ -207,84 +340,104 @@ sas_handler() {
# Get path up to /sys/.../hostX
i=1
- while [ $i -le $num_dirs ] ; do
- d=$(eval echo \${$i})
+
+ while [ $i -le "$num_dirs" ] ; do
+ d=$(eval echo '$'{$i})
scsi_host_dir="$scsi_host_dir/$d"
- echo $d | grep -q -E '^host[0-9]+$' && break
- i=$(($i + 1))
+ echo "$d" | grep -q -E '^host[0-9]+$' && break
+ i=$((i + 1))
done
- if [ $i = $num_dirs ] ; then
+ # Lets grab the SAS host channel number and save it for JBOD sorting later
+ HOSTCHAN=$(echo "$d" | awk -F/ '{ gsub("host","",$NF); print $NF}')
+
+ if [ $i = "$num_dirs" ] ; then
return
fi
- PCI_ID=$(eval echo \${$(($i -1))} | awk -F: '{print $2":"$3}')
+ PCI_ID=$(eval echo '$'{$((i -1))} | awk -F: '{print $2":"$3}')
# In sas_switch mode, the directory four levels beneath
# /sys/.../hostX contains symlinks to phy devices that reveal
# the switch port number. In sas_direct mode, the phy links one
# directory down reveal the HBA port.
port_dir=$scsi_host_dir
+
case $TOPOLOGY in
- "sas_switch") j=$(($i + 4)) ;;
- "sas_direct") j=$(($i + 1)) ;;
+ "sas_switch") j=$((i + 4)) ;;
+ "sas_direct") j=$((i + 1)) ;;
esac
- i=$(($i + 1))
+ i=$((i + 1))
+
while [ $i -le $j ] ; do
- port_dir="$port_dir/$(eval echo \${$i})"
- i=$(($i + 1))
+ port_dir="$port_dir/$(eval echo '$'{$i})"
+ i=$((i + 1))
done
- PHY=`ls -d $port_dir/phy* 2>/dev/null | head -1 | awk -F: '{print $NF}'`
+ PHY=$(ls -vd "$port_dir"/phy* 2>/dev/null | head -1 | awk -F: '{print $NF}')
if [ -z "$PHY" ] ; then
PHY=0
fi
- PORT=$(( $PHY / $PHYS_PER_PORT ))
+ PORT=$((PHY / PHYS_PER_PORT))
# Look in /sys/.../sas_device/end_device-X for the bay_identifier
# attribute.
end_device_dir=$port_dir
- while [ $i -lt $num_dirs ] ; do
- d=$(eval echo \${$i})
+
+ while [ $i -lt "$num_dirs" ] ; do
+ d=$(eval echo '$'{$i})
end_device_dir="$end_device_dir/$d"
- if echo $d | grep -q '^end_device' ; then
+ if echo "$d" | grep -q '^end_device' ; then
end_device_dir="$end_device_dir/sas_device/$d"
break
fi
- i=$(($i + 1))
+ i=$((i + 1))
done
+ # Add 'mix' slot type for environments where dm-multipath devices
+ # include end-devices connected via SAS expanders or direct connection
+ # to SAS HBA. A mixed connectivity environment such as pool devices
+ # contained in a SAS JBOD and spare drives or log devices directly
+ # connected in a server backplane without expanders in the I/O path.
SLOT=
+
case $BAY in
"bay")
- SLOT=`cat $end_device_dir/bay_identifier 2>/dev/null`
+ SLOT=$(cat "$end_device_dir/bay_identifier" 2>/dev/null)
+ ;;
+ "mix")
+ if [ $(cat "$end_device_dir/bay_identifier" 2>/dev/null) ] ; then
+ SLOT=$(cat "$end_device_dir/bay_identifier" 2>/dev/null)
+ else
+ SLOT=$(cat "$end_device_dir/phy_identifier" 2>/dev/null)
+ fi
;;
"phy")
- SLOT=`cat $end_device_dir/phy_identifier 2>/dev/null`
+ SLOT=$(cat "$end_device_dir/phy_identifier" 2>/dev/null)
;;
"port")
- d=$(eval echo \${$i})
- SLOT=`echo $d | sed -e 's/^.*://'`
+ d=$(eval echo '$'{$i})
+ SLOT=$(echo "$d" | sed -e 's/^.*://')
;;
"id")
- i=$(($i + 1))
- d=$(eval echo \${$i})
- SLOT=`echo $d | sed -e 's/^.*://'`
+ i=$((i + 1))
+ d=$(eval echo '$'{$i})
+ SLOT=$(echo "$d" | sed -e 's/^.*://')
;;
"lun")
- i=$(($i + 2))
- d=$(eval echo \${$i})
- SLOT=`echo $d | sed -e 's/^.*://'`
+ i=$((i + 2))
+ d=$(eval echo '$'{$i})
+ SLOT=$(echo "$d" | sed -e 's/^.*://')
;;
"ses")
# look for this SAS path in all SCSI Enclosure Services
# (SES) enclosures
- sas_address=`cat $end_device_dir/sas_address 2>/dev/null`
- enclosures=`lsscsi -g | \
- sed -n -e '/enclosu/s/^.* \([^ ][^ ]*\) *$/\1/p'`
+ sas_address=$(cat "$end_device_dir/sas_address" 2>/dev/null)
+ enclosures=$(lsscsi -g | \
+ sed -n -e '/enclosu/s/^.* \([^ ][^ ]*\) *$/\1/p')
for enclosure in $enclosures; do
- set -- $(sg_ses -p aes $enclosure | \
+ set -- $(sg_ses -p aes "$enclosure" | \
awk "/device slot number:/{slot=\$12} \
/SAS address: $sas_address/\
{print slot}")
@@ -299,42 +452,55 @@ sas_handler() {
return
fi
- CHAN=`map_channel $PCI_ID $PORT`
- SLOT=`map_slot $SLOT $CHAN`
- if [ -z "$CHAN" ] ; then
- return
+ if [ "$MULTIJBOD_MODE" = "yes" ] ; then
+ CHAN=$(map_channel "$PCI_ID" "$PORT")
+ SLOT=$(map_slot "$SLOT" "$CHAN")
+ JBOD=$(map_jbod "$DEV")
+
+ if [ -z "$CHAN" ] ; then
+ return
+ fi
+ echo "${CHAN}"-"${JBOD}"-"${SLOT}${PART}"
+ else
+ CHAN=$(map_channel "$PCI_ID" "$PORT")
+ SLOT=$(map_slot "$SLOT" "$CHAN")
+
+ if [ -z "$CHAN" ] ; then
+ return
+ fi
+ echo "${CHAN}${SLOT}${PART}"
fi
- echo ${CHAN}${SLOT}${PART}
}
scsi_handler() {
if [ -z "$FIRST_BAY_NUMBER" ] ; then
- FIRST_BAY_NUMBER=`awk "\\$1 == \"first_bay_number\" \
- {print \\$2; exit}" $CONFIG`
+ FIRST_BAY_NUMBER=$(awk '$1 == "first_bay_number" \
+ {print $2; exit}' $CONFIG)
fi
FIRST_BAY_NUMBER=${FIRST_BAY_NUMBER:-0}
if [ -z "$PHYS_PER_PORT" ] ; then
- PHYS_PER_PORT=`awk "\\$1 == \"phys_per_port\" \
- {print \\$2; exit}" $CONFIG`
+ PHYS_PER_PORT=$(awk '$1 == "phys_per_port" \
+ {print $2; exit}' $CONFIG)
fi
PHYS_PER_PORT=${PHYS_PER_PORT:-4}
- if ! echo $PHYS_PER_PORT | grep -q -E '^[0-9]+$' ; then
+
+ if ! echo "$PHYS_PER_PORT" | grep -q -E '^[0-9]+$' ; then
echo "Error: phys_per_port value $PHYS_PER_PORT is non-numeric"
exit 1
fi
if [ -z "$MULTIPATH_MODE" ] ; then
- MULTIPATH_MODE=`awk "\\$1 == \"multipath\" \
- {print \\$2; exit}" $CONFIG`
+ MULTIPATH_MODE=$(awk '$1 == "multipath" \
+ {print $2; exit}' $CONFIG)
fi
# Use first running component device if we're handling a dm-mpath device
if [ "$MULTIPATH_MODE" = "yes" ] ; then
# If udev didn't tell us the UUID via DM_NAME, check /dev/mapper
if [ -z "$DM_NAME" ] ; then
- DM_NAME=`ls -l --full-time /dev/mapper |
- awk "/\/$DEV$/{print \\$9}"`
+ DM_NAME=$(ls -l --full-time /dev/mapper |
+ grep "$DEV"$ | awk '{print $9}')
fi
# For raw disks udev exports DEVTYPE=partition when
@@ -344,28 +510,30 @@ scsi_handler() {
# we have to append the -part suffix directly in the
# helper.
if [ "$DEVTYPE" != "partition" ] ; then
- PART=`echo $DM_NAME | awk -Fp '/p/{print "-part"$2}'`
+ # Match p[number], remove the 'p' and prepend "-part"
+ PART=$(echo "$DM_NAME" |
+ awk 'match($0,/p[0-9]+$/) {print "-part"substr($0,RSTART+1,RLENGTH-1)}')
fi
# Strip off partition information.
- DM_NAME=`echo $DM_NAME | sed 's/p[0-9][0-9]*$//'`
+ DM_NAME=$(echo "$DM_NAME" | sed 's/p[0-9][0-9]*$//')
if [ -z "$DM_NAME" ] ; then
return
fi
# Get the raw scsi device name from multipath -ll. Strip off
# leading pipe symbols to make field numbering consistent.
- DEV=`multipath -ll $DM_NAME |
- awk '/running/{gsub("^[|]"," "); print $3 ; exit}'`
+ DEV=$(multipath -ll "$DM_NAME" |
+ awk '/running/{gsub("^[|]"," "); print $3 ; exit}')
if [ -z "$DEV" ] ; then
return
fi
fi
- if echo $DEV | grep -q ^/devices/ ; then
+ if echo "$DEV" | grep -q ^/devices/ ; then
sys_path=$DEV
else
- sys_path=`udevadm info -q path -p /sys/block/$DEV 2>/dev/null`
+ sys_path=$(udevadm info -q path -p "/sys/block/$DEV" 2>/dev/null)
fi
# expect sys_path like this, for example:
@@ -378,44 +546,47 @@ scsi_handler() {
# Get path up to /sys/.../hostX
i=1
- while [ $i -le $num_dirs ] ; do
- d=$(eval echo \${$i})
+
+ while [ $i -le "$num_dirs" ] ; do
+ d=$(eval echo '$'{$i})
scsi_host_dir="$scsi_host_dir/$d"
- echo $d | grep -q -E '^host[0-9]+$' && break
- i=$(($i + 1))
+
+ echo "$d" | grep -q -E '^host[0-9]+$' && break
+ i=$((i + 1))
done
- if [ $i = $num_dirs ] ; then
+ if [ $i = "$num_dirs" ] ; then
return
fi
- PCI_ID=$(eval echo \${$(($i -1))} | awk -F: '{print $2":"$3}')
+ PCI_ID=$(eval echo '$'{$((i -1))} | awk -F: '{print $2":"$3}')
# In scsi mode, the directory two levels beneath
# /sys/.../hostX reveals the port and slot.
port_dir=$scsi_host_dir
- j=$(($i + 2))
+ j=$((i + 2))
- i=$(($i + 1))
+ i=$((i + 1))
while [ $i -le $j ] ; do
- port_dir="$port_dir/$(eval echo \${$i})"
- i=$(($i + 1))
+ port_dir="$port_dir/$(eval echo '$'{$i})"
+ i=$((i + 1))
done
- set -- $(echo $port_dir | sed -e 's/^.*:\([^:]*\):\([^:]*\)$/\1 \2/')
+ set -- $(echo "$port_dir" | sed -e 's/^.*:\([^:]*\):\([^:]*\)$/\1 \2/')
PORT=$1
- SLOT=$(($2 + $FIRST_BAY_NUMBER))
+ SLOT=$(($2 + FIRST_BAY_NUMBER))
if [ -z "$SLOT" ] ; then
return
fi
- CHAN=`map_channel $PCI_ID $PORT`
- SLOT=`map_slot $SLOT $CHAN`
+ CHAN=$(map_channel "$PCI_ID" "$PORT")
+ SLOT=$(map_slot "$SLOT" "$CHAN")
+
if [ -z "$CHAN" ] ; then
return
fi
- echo ${CHAN}${SLOT}${PART}
+ echo "${CHAN}${SLOT}${PART}"
}
# Figure out the name for the enclosure symlink
@@ -426,7 +597,7 @@ enclosure_handler () {
# Get the enclosure ID ("0:0:0:0")
ENC=$(basename $(readlink -m "/sys/$DEVPATH/../.."))
- if [ ! -d /sys/class/enclosure/$ENC ] ; then
+ if [ ! -d "/sys/class/enclosure/$ENC" ] ; then
# Not an enclosure, bail out
return
fi
@@ -434,14 +605,14 @@ enclosure_handler () {
# Get the long sysfs device path to our enclosure. Looks like:
# /devices/pci0000:00/0000:00:03.0/0000:05:00.0/host0/port-0:0/ ... /enclosure/0:0:0:0
- ENC_DEVICE=$(readlink /sys/class/enclosure/$ENC)
+ ENC_DEVICE=$(readlink "/sys/class/enclosure/$ENC")
# Grab the full path to the hosts port dir:
# /devices/pci0000:00/0000:00:03.0/0000:05:00.0/host0/port-0:0
- PORT_DIR=$(echo $ENC_DEVICE | grep -Eo '.+host[0-9]+/port-[0-9]+:[0-9]+')
+ PORT_DIR=$(echo "$ENC_DEVICE" | grep -Eo '.+host[0-9]+/port-[0-9]+:[0-9]+')
# Get the port number
- PORT_ID=$(echo $PORT_DIR | grep -Eo "[0-9]+$")
+ PORT_ID=$(echo "$PORT_DIR" | grep -Eo "[0-9]+$")
# The PCI directory is two directories up from the port directory
# /sys/devices/pci0000:00/0000:00:03.0/0000:05:00.0
@@ -452,7 +623,7 @@ enclosure_handler () {
# Name our device according to vdev_id.conf (like "L0" or "U1").
NAME=$(awk "/channel/{if (\$1 == \"channel\" && \$2 == \"$PCI_ID\" && \
- \$3 == \"$PORT_ID\") {print \$4int(count[\$4])}; count[\$4]++}" $CONFIG)
+ \$3 == \"$PORT_ID\") {print \$4\$3}}" $CONFIG)
echo "${NAME}"
}
@@ -487,10 +658,12 @@ alias_handler () {
# digits as partitions, causing alias creation to fail. This
# ambiguity seems unavoidable, so devices using this facility
# must not use such names.
- local DM_PART=
- if echo $DM_NAME | grep -q -E 'p[0-9][0-9]*$' ; then
+ DM_PART=
+ if echo "$DM_NAME" | grep -q -E 'p[0-9][0-9]*$' ; then
if [ "$DEVTYPE" != "partition" ] ; then
- DM_PART=`echo $DM_NAME | awk -Fp '/p/{print "-part"$2}'`
+ # Match p[number], remove the 'p' and prepend "-part"
+ DM_PART=$(echo "$DM_NAME" |
+ awk 'match($0,/p[0-9]+$/) {print "-part"substr($0,RSTART+1,RLENGTH-1)}')
fi
fi
@@ -498,21 +671,25 @@ alias_handler () {
for link in $DEVLINKS ; do
# Remove partition information to match key of top-level device.
if [ -n "$DM_PART" ] ; then
- link=`echo $link | sed 's/p[0-9][0-9]*$//'`
+ link=$(echo "$link" | sed 's/p[0-9][0-9]*$//')
fi
# Check both the fully qualified and the base name of link.
- for l in $link `basename $link` ; do
- alias=`awk "\\$1 == \"alias\" && \\$3 == \"${l}\" \
- { print \\$2; exit }" $CONFIG`
- if [ -n "$alias" ] ; then
- echo ${alias}${DM_PART}
- return
+ for l in $link $(basename "$link") ; do
+ if [ ! -z "$l" ]; then
+ alias=$(awk -v var="$l" '($1 == "alias") && \
+ ($3 == var) \
+ { print $2; exit }' $CONFIG)
+ if [ -n "$alias" ] ; then
+ echo "${alias}${DM_PART}"
+ return
+ fi
fi
done
done
}
-while getopts 'c:d:eg:mp:h' OPTION; do
+# main
+while getopts 'c:d:eg:jmp:h' OPTION; do
case ${OPTION} in
c)
CONFIG=${OPTARG}
@@ -525,7 +702,9 @@ while getopts 'c:d:eg:mp:h' OPTION; do
# create the enclosure device symlinks only. We also need
# "enclosure_symlinks yes" set in vdev_id.config to actually create the
# symlink.
- ENCLOSURE_MODE=$(awk '{if ($1 == "enclosure_symlinks") print $2}' $CONFIG)
+ ENCLOSURE_MODE=$(awk '{if ($1 == "enclosure_symlinks") \
+ print $2}' "$CONFIG")
+
if [ "$ENCLOSURE_MODE" != "yes" ] ; then
exit 0
fi
@@ -536,6 +715,9 @@ while getopts 'c:d:eg:mp:h' OPTION; do
p)
PHYS_PER_PORT=${OPTARG}
;;
+ j)
+ MULTIJBOD_MODE=yes
+ ;;
m)
MULTIPATH_MODE=yes
;;
@@ -545,34 +727,35 @@ while getopts 'c:d:eg:mp:h' OPTION; do
esac
done
-if [ ! -r $CONFIG ] ; then
- exit 0
+if [ ! -r "$CONFIG" ] ; then
+ echo "Error: Config file \"$CONFIG\" not found"
+ exit 1
fi
-if [ -z "$DEV" -a -z "$ENCLOSURE_MODE" ] ; then
+if [ -z "$DEV" ] && [ -z "$ENCLOSURE_MODE" ] ; then
echo "Error: missing required option -d"
exit 1
fi
if [ -z "$TOPOLOGY" ] ; then
- TOPOLOGY=`awk "\\$1 == \"topology\" {print \\$2; exit}" $CONFIG`
+ TOPOLOGY=$(awk '($1 == "topology") {print $2; exit}' "$CONFIG")
fi
if [ -z "$BAY" ] ; then
- BAY=`awk "\\$1 == \"slot\" {print \\$2; exit}" $CONFIG`
+ BAY=$(awk '($1 == "slot") {print $2; exit}' "$CONFIG")
fi
TOPOLOGY=${TOPOLOGY:-sas_direct}
# Should we create /dev/by-enclosure symlinks?
-if [ "$ENCLOSURE_MODE" = "yes" -a "$TOPOLOGY" = "sas_direct" ] ; then
+if [ "$ENCLOSURE_MODE" = "yes" ] && [ "$TOPOLOGY" = "sas_direct" ] ; then
ID_ENCLOSURE=$(enclosure_handler)
if [ -z "$ID_ENCLOSURE" ] ; then
exit 0
fi
# Just create the symlinks to the enclosure devices and then exit.
- ENCLOSURE_PREFIX=$(awk '/enclosure_symlinks_prefix/{print $2}' $CONFIG)
+ ENCLOSURE_PREFIX=$(awk '/enclosure_symlinks_prefix/{print $2}' "$CONFIG")
if [ -z "$ENCLOSURE_PREFIX" ] ; then
ENCLOSURE_PREFIX="enc"
fi
@@ -582,16 +765,16 @@ if [ "$ENCLOSURE_MODE" = "yes" -a "$TOPOLOGY" = "sas_direct" ] ; then
fi
# First check if an alias was defined for this device.
-ID_VDEV=`alias_handler`
+ID_VDEV=$(alias_handler)
if [ -z "$ID_VDEV" ] ; then
BAY=${BAY:-bay}
case $TOPOLOGY in
sas_direct|sas_switch)
- ID_VDEV=`sas_handler`
+ ID_VDEV=$(sas_handler)
;;
scsi)
- ID_VDEV=`scsi_handler`
+ ID_VDEV=$(scsi_handler)
;;
*)
echo "Error: unknown topology $TOPOLOGY"
diff --git a/cmd/zdb/Makefile.am b/cmd/zdb/Makefile.am
index 1fa7ec651b..c5858c2980 100644
--- a/cmd/zdb/Makefile.am
+++ b/cmd/zdb/Makefile.am
@@ -1,11 +1,7 @@
include $(top_srcdir)/config/Rules.am
# Unconditionally enable debugging for zdb
-AM_CPPFLAGS += -DDEBUG -UNDEBUG
-
-DEFAULT_INCLUDES += \
- -I$(top_srcdir)/include \
- -I$(top_srcdir)/lib/libspl/include
+AM_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
sbin_PROGRAMS = zdb
@@ -15,5 +11,8 @@ zdb_SOURCES = \
zdb.h
zdb_LDADD = \
- $(top_builddir)/lib/libnvpair/libnvpair.la \
- $(top_builddir)/lib/libzpool/libzpool.la
+ $(abs_top_builddir)/lib/libzpool/libzpool.la \
+ $(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+ $(abs_top_builddir)/lib/libnvpair/libnvpair.la
+
+include $(top_srcdir)/config/CppCheck.am
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 4b07cdb8e0..8bbb77479b 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -21,16 +21,22 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Nexenta Systems, Inc.
* Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
* Copyright (c) 2015, 2017, Intel Corporation.
+ * Copyright (c) 2020 Datto Inc.
+ * Copyright (c) 2020, The FreeBSD Foundation [1]
+ *
+ * [1] Portions of this software were developed by Allan Jude
+ * under sponsorship from the FreeBSD Foundation.
+ * Copyright (c) 2021 Allan Jude
+ * Copyright (c) 2021 Toomas Soome
*/
#include
#include
-#include
#include
#include
#include
@@ -50,23 +56,28 @@
#include
#include
#include
+#include
#include
#include
#include
#include
#include
+#include
#include
#include
#include
#include
#include
+#include
#include
#include
#include
#include
#include
#include
+#include
#include
+#include
#include
#include
@@ -83,6 +94,13 @@
(idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \
DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES)
+/* Some platforms require part of inode IDs to be remapped */
+#ifdef __APPLE__
+#define ZDB_MAP_OBJECT_ID(obj) INO_XNUTOZFS(obj, 2)
+#else
+#define ZDB_MAP_OBJECT_ID(obj) (obj)
+#endif
+
static char *
zdb_ot_name(dmu_object_type_t type)
{
@@ -97,25 +115,650 @@ zdb_ot_name(dmu_object_type_t type)
extern int reference_tracking_enable;
extern int zfs_recover;
-extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
+extern unsigned long zfs_arc_meta_min, zfs_arc_meta_limit;
extern int zfs_vdev_async_read_max_active;
extern boolean_t spa_load_verify_dryrun;
+extern boolean_t spa_mode_readable_spacemaps;
extern int zfs_reconstruct_indirect_combinations_max;
+extern int zfs_btree_verify_intensity;
static const char cmdname[] = "zdb";
uint8_t dump_opt[256];
typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
-uint64_t *zopt_object = NULL;
-static unsigned zopt_objects = 0;
-uint64_t max_inflight = 1000;
+uint64_t *zopt_metaslab = NULL;
+static unsigned zopt_metaslab_args = 0;
+
+typedef struct zopt_object_range {
+ uint64_t zor_obj_start;
+ uint64_t zor_obj_end;
+ uint64_t zor_flags;
+} zopt_object_range_t;
+zopt_object_range_t *zopt_object_ranges = NULL;
+static unsigned zopt_object_args = 0;
+
+static int flagbits[256];
+
+#define ZOR_FLAG_PLAIN_FILE 0x0001
+#define ZOR_FLAG_DIRECTORY 0x0002
+#define ZOR_FLAG_SPACE_MAP 0x0004
+#define ZOR_FLAG_ZAP 0x0008
+#define ZOR_FLAG_ALL_TYPES -1
+#define ZOR_SUPPORTED_FLAGS (ZOR_FLAG_PLAIN_FILE | \
+ ZOR_FLAG_DIRECTORY | \
+ ZOR_FLAG_SPACE_MAP | \
+ ZOR_FLAG_ZAP)
+
+#define ZDB_FLAG_CHECKSUM 0x0001
+#define ZDB_FLAG_DECOMPRESS 0x0002
+#define ZDB_FLAG_BSWAP 0x0004
+#define ZDB_FLAG_GBH 0x0008
+#define ZDB_FLAG_INDIRECT 0x0010
+#define ZDB_FLAG_RAW 0x0020
+#define ZDB_FLAG_PRINT_BLKPTR 0x0040
+#define ZDB_FLAG_VERBOSE 0x0080
+
+uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */
static int leaked_objects = 0;
static range_tree_t *mos_refd_objs;
-static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *);
+static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *,
+ boolean_t);
static void mos_obj_refd(uint64_t);
static void mos_obj_refd_multiple(uint64_t);
+static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free,
+ dmu_tx_t *tx);
+
+typedef struct sublivelist_verify {
+ /* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */
+ zfs_btree_t sv_pair;
+
+ /* ALLOC's without a matching FREE, accumulates across sub-livelists */
+ zfs_btree_t sv_leftover;
+} sublivelist_verify_t;
+
+static int
+livelist_compare(const void *larg, const void *rarg)
+{
+ const blkptr_t *l = larg;
+ const blkptr_t *r = rarg;
+
+ /* Sort them according to dva[0] */
+ uint64_t l_dva0_vdev, r_dva0_vdev;
+ l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]);
+ r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]);
+ if (l_dva0_vdev < r_dva0_vdev)
+ return (-1);
+ else if (l_dva0_vdev > r_dva0_vdev)
+ return (+1);
+
+ /* if vdevs are equal, sort by offsets. */
+ uint64_t l_dva0_offset;
+ uint64_t r_dva0_offset;
+ l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]);
+ r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]);
+ if (l_dva0_offset < r_dva0_offset) {
+ return (-1);
+ } else if (l_dva0_offset > r_dva0_offset) {
+ return (+1);
+ }
+
+ /*
+ * Since we're storing blkptrs without cancelling FREE/ALLOC pairs,
+ * it's possible the offsets are equal. In that case, sort by txg
+ */
+ if (l->blk_birth < r->blk_birth) {
+ return (-1);
+ } else if (l->blk_birth > r->blk_birth) {
+ return (+1);
+ }
+ return (0);
+}
+
+typedef struct sublivelist_verify_block {
+ dva_t svb_dva;
+
+ /*
+ * We need this to check if the block marked as allocated
+ * in the livelist was freed (and potentially reallocated)
+ * in the metaslab spacemaps at a later TXG.
+ */
+ uint64_t svb_allocated_txg;
+} sublivelist_verify_block_t;
+
+static void zdb_print_blkptr(const blkptr_t *bp, int flags);
+
+typedef struct sublivelist_verify_block_refcnt {
+ /* block pointer entry in livelist being verified */
+ blkptr_t svbr_blk;
+
+ /*
+ * Refcount gets incremented to 1 when we encounter the first
+ * FREE entry for the svfbr block pointer and a node for it
+ * is created in our ZDB verification/tracking metadata.
+ *
+ * As we encounter more FREE entries we increment this counter
+ * and similarly decrement it whenever we find the respective
+ * ALLOC entries for this block.
+ *
+ * When the refcount gets to 0 it means that all the FREE and
+ * ALLOC entries of this block have paired up and we no longer
+ * need to track it in our verification logic (e.g. the node
+ * containing this struct in our verification data structure
+ * should be freed).
+ *
+ * [refer to sublivelist_verify_blkptr() for the actual code]
+ */
+ uint32_t svbr_refcnt;
+} sublivelist_verify_block_refcnt_t;
+
+static int
+sublivelist_block_refcnt_compare(const void *larg, const void *rarg)
+{
+ const sublivelist_verify_block_refcnt_t *l = larg;
+ const sublivelist_verify_block_refcnt_t *r = rarg;
+ return (livelist_compare(&l->svbr_blk, &r->svbr_blk));
+}
+
+static int
+sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free,
+ dmu_tx_t *tx)
+{
+ ASSERT3P(tx, ==, NULL);
+ struct sublivelist_verify *sv = arg;
+ sublivelist_verify_block_refcnt_t current = {
+ .svbr_blk = *bp,
+
+ /*
+ * Start with 1 in case this is the first free entry.
+ * This field is not used for our B-Tree comparisons
+ * anyway.
+ */
+ .svbr_refcnt = 1,
+ };
+
+ zfs_btree_index_t where;
+ sublivelist_verify_block_refcnt_t *pair =
+ zfs_btree_find(&sv->sv_pair, ¤t, &where);
+ if (free) {
+ if (pair == NULL) {
+ /* first free entry for this block pointer */
+ zfs_btree_add(&sv->sv_pair, ¤t);
+ } else {
+ pair->svbr_refcnt++;
+ }
+ } else {
+ if (pair == NULL) {
+ /* block that is currently marked as allocated */
+ for (int i = 0; i < SPA_DVAS_PER_BP; i++) {
+ if (DVA_IS_EMPTY(&bp->blk_dva[i]))
+ break;
+ sublivelist_verify_block_t svb = {
+ .svb_dva = bp->blk_dva[i],
+ .svb_allocated_txg = bp->blk_birth
+ };
+
+ if (zfs_btree_find(&sv->sv_leftover, &svb,
+ &where) == NULL) {
+ zfs_btree_add_idx(&sv->sv_leftover,
+ &svb, &where);
+ }
+ }
+ } else {
+ /* alloc matches a free entry */
+ pair->svbr_refcnt--;
+ if (pair->svbr_refcnt == 0) {
+ /* all allocs and frees have been matched */
+ zfs_btree_remove_idx(&sv->sv_pair, &where);
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle)
+{
+ int err;
+ struct sublivelist_verify *sv = args;
+
+ zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare,
+ sizeof (sublivelist_verify_block_refcnt_t));
+
+ err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
+ sv, NULL);
+
+ sublivelist_verify_block_refcnt_t *e;
+ zfs_btree_index_t *cookie = NULL;
+ while ((e = zfs_btree_destroy_nodes(&sv->sv_pair, &cookie)) != NULL) {
+ char blkbuf[BP_SPRINTF_LEN];
+ snprintf_blkptr_compact(blkbuf, sizeof (blkbuf),
+ &e->svbr_blk, B_TRUE);
+ (void) printf("\tERROR: %d unmatched FREE(s): %s\n",
+ e->svbr_refcnt, blkbuf);
+ }
+ zfs_btree_destroy(&sv->sv_pair);
+
+ return (err);
+}
+
+static int
+livelist_block_compare(const void *larg, const void *rarg)
+{
+ const sublivelist_verify_block_t *l = larg;
+ const sublivelist_verify_block_t *r = rarg;
+
+ if (DVA_GET_VDEV(&l->svb_dva) < DVA_GET_VDEV(&r->svb_dva))
+ return (-1);
+ else if (DVA_GET_VDEV(&l->svb_dva) > DVA_GET_VDEV(&r->svb_dva))
+ return (+1);
+
+ if (DVA_GET_OFFSET(&l->svb_dva) < DVA_GET_OFFSET(&r->svb_dva))
+ return (-1);
+ else if (DVA_GET_OFFSET(&l->svb_dva) > DVA_GET_OFFSET(&r->svb_dva))
+ return (+1);
+
+ if (DVA_GET_ASIZE(&l->svb_dva) < DVA_GET_ASIZE(&r->svb_dva))
+ return (-1);
+ else if (DVA_GET_ASIZE(&l->svb_dva) > DVA_GET_ASIZE(&r->svb_dva))
+ return (+1);
+
+ return (0);
+}
+
+/*
+ * Check for errors in a livelist while tracking all unfreed ALLOCs in the
+ * sublivelist_verify_t: sv->sv_leftover
+ */
+static void
+livelist_verify(dsl_deadlist_t *dl, void *arg)
+{
+ sublivelist_verify_t *sv = arg;
+ dsl_deadlist_iterate(dl, sublivelist_verify_func, sv);
+}
+
+/*
+ * Check for errors in the livelist entry and discard the intermediary
+ * data structures
+ */
+/* ARGSUSED */
+static int
+sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle)
+{
+ sublivelist_verify_t sv;
+ zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
+ sizeof (sublivelist_verify_block_t));
+ int err = sublivelist_verify_func(&sv, dle);
+ zfs_btree_clear(&sv.sv_leftover);
+ zfs_btree_destroy(&sv.sv_leftover);
+ return (err);
+}
+
+typedef struct metaslab_verify {
+ /*
+ * Tree containing all the leftover ALLOCs from the livelists
+ * that are part of this metaslab.
+ */
+ zfs_btree_t mv_livelist_allocs;
+
+ /*
+ * Metaslab information.
+ */
+ uint64_t mv_vdid;
+ uint64_t mv_msid;
+ uint64_t mv_start;
+ uint64_t mv_end;
+
+ /*
+ * What's currently allocated for this metaslab.
+ */
+ range_tree_t *mv_allocated;
+} metaslab_verify_t;
+
+typedef void ll_iter_t(dsl_deadlist_t *ll, void *arg);
+
+typedef int (*zdb_log_sm_cb_t)(spa_t *spa, space_map_entry_t *sme, uint64_t txg,
+ void *arg);
+
+typedef struct unflushed_iter_cb_arg {
+ spa_t *uic_spa;
+ uint64_t uic_txg;
+ void *uic_arg;
+ zdb_log_sm_cb_t uic_cb;
+} unflushed_iter_cb_arg_t;
+
+static int
+iterate_through_spacemap_logs_cb(space_map_entry_t *sme, void *arg)
+{
+ unflushed_iter_cb_arg_t *uic = arg;
+ return (uic->uic_cb(uic->uic_spa, sme, uic->uic_txg, uic->uic_arg));
+}
+
+static void
+iterate_through_spacemap_logs(spa_t *spa, zdb_log_sm_cb_t cb, void *arg)
+{
+ if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
+ return;
+
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+ for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
+ sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
+ space_map_t *sm = NULL;
+ VERIFY0(space_map_open(&sm, spa_meta_objset(spa),
+ sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT));
+
+ unflushed_iter_cb_arg_t uic = {
+ .uic_spa = spa,
+ .uic_txg = sls->sls_txg,
+ .uic_arg = arg,
+ .uic_cb = cb
+ };
+ VERIFY0(space_map_iterate(sm, space_map_length(sm),
+ iterate_through_spacemap_logs_cb, &uic));
+ space_map_close(sm);
+ }
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+}
+
+static void
+verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg,
+ uint64_t offset, uint64_t size)
+{
+ sublivelist_verify_block_t svb;
+ DVA_SET_VDEV(&svb.svb_dva, mv->mv_vdid);
+ DVA_SET_OFFSET(&svb.svb_dva, offset);
+ DVA_SET_ASIZE(&svb.svb_dva, size);
+ zfs_btree_index_t where;
+ uint64_t end_offset = offset + size;
+
+ /*
+ * Look for an exact match for spacemap entry in the livelist entries.
+ * Then, look for other livelist entries that fall within the range
+ * of the spacemap entry as it may have been condensed
+ */
+ sublivelist_verify_block_t *found =
+ zfs_btree_find(&mv->mv_livelist_allocs, &svb, &where);
+ if (found == NULL) {
+ found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where);
+ }
+ for (; found != NULL && DVA_GET_VDEV(&found->svb_dva) == mv->mv_vdid &&
+ DVA_GET_OFFSET(&found->svb_dva) < end_offset;
+ found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) {
+ if (found->svb_allocated_txg <= txg) {
+ (void) printf("ERROR: Livelist ALLOC [%llx:%llx] "
+ "from TXG %llx FREED at TXG %llx\n",
+ (u_longlong_t)DVA_GET_OFFSET(&found->svb_dva),
+ (u_longlong_t)DVA_GET_ASIZE(&found->svb_dva),
+ (u_longlong_t)found->svb_allocated_txg,
+ (u_longlong_t)txg);
+ }
+ }
+}
+
+static int
+metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg)
+{
+ metaslab_verify_t *mv = arg;
+ uint64_t offset = sme->sme_offset;
+ uint64_t size = sme->sme_run;
+ uint64_t txg = sme->sme_txg;
+
+ if (sme->sme_type == SM_ALLOC) {
+ if (range_tree_contains(mv->mv_allocated,
+ offset, size)) {
+ (void) printf("ERROR: DOUBLE ALLOC: "
+ "%llu [%llx:%llx] "
+ "%llu:%llu LOG_SM\n",
+ (u_longlong_t)txg, (u_longlong_t)offset,
+ (u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
+ (u_longlong_t)mv->mv_msid);
+ } else {
+ range_tree_add(mv->mv_allocated,
+ offset, size);
+ }
+ } else {
+ if (!range_tree_contains(mv->mv_allocated,
+ offset, size)) {
+ (void) printf("ERROR: DOUBLE FREE: "
+ "%llu [%llx:%llx] "
+ "%llu:%llu LOG_SM\n",
+ (u_longlong_t)txg, (u_longlong_t)offset,
+ (u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
+ (u_longlong_t)mv->mv_msid);
+ } else {
+ range_tree_remove(mv->mv_allocated,
+ offset, size);
+ }
+ }
+
+ if (sme->sme_type != SM_ALLOC) {
+ /*
+ * If something is freed in the spacemap, verify that
+ * it is not listed as allocated in the livelist.
+ */
+ verify_livelist_allocs(mv, txg, offset, size);
+ }
+ return (0);
+}
+
+static int
+spacemap_check_sm_log_cb(spa_t *spa, space_map_entry_t *sme,
+ uint64_t txg, void *arg)
+{
+ metaslab_verify_t *mv = arg;
+ uint64_t offset = sme->sme_offset;
+ uint64_t vdev_id = sme->sme_vdev;
+
+ vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+
+ /* skip indirect vdevs */
+ if (!vdev_is_concrete(vd))
+ return (0);
+
+ if (vdev_id != mv->mv_vdid)
+ return (0);
+
+ metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
+ if (ms->ms_id != mv->mv_msid)
+ return (0);
+
+ if (txg < metaslab_unflushed_txg(ms))
+ return (0);
+
+
+ ASSERT3U(txg, ==, sme->sme_txg);
+ return (metaslab_spacemap_validation_cb(sme, mv));
+}
+
+static void
+spacemap_check_sm_log(spa_t *spa, metaslab_verify_t *mv)
+{
+ iterate_through_spacemap_logs(spa, spacemap_check_sm_log_cb, mv);
+}
+
+static void
+spacemap_check_ms_sm(space_map_t *sm, metaslab_verify_t *mv)
+{
+ if (sm == NULL)
+ return;
+
+ VERIFY0(space_map_iterate(sm, space_map_length(sm),
+ metaslab_spacemap_validation_cb, mv));
+}
+
+static void iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg);
+
+/*
+ * Transfer blocks from sv_leftover tree to the mv_livelist_allocs if
+ * they are part of that metaslab (mv_msid).
+ */
+static void
+mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv)
+{
+ zfs_btree_index_t where;
+ sublivelist_verify_block_t *svb;
+ ASSERT3U(zfs_btree_numnodes(&mv->mv_livelist_allocs), ==, 0);
+ for (svb = zfs_btree_first(&sv->sv_leftover, &where);
+ svb != NULL;
+ svb = zfs_btree_next(&sv->sv_leftover, &where, &where)) {
+ if (DVA_GET_VDEV(&svb->svb_dva) != mv->mv_vdid)
+ continue;
+
+ if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start &&
+ (DVA_GET_OFFSET(&svb->svb_dva) +
+ DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_start) {
+ (void) printf("ERROR: Found block that crosses "
+ "metaslab boundary: <%llu:%llx:%llx>\n",
+ (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
+ (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
+ (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
+ continue;
+ }
+
+ if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start)
+ continue;
+
+ if (DVA_GET_OFFSET(&svb->svb_dva) >= mv->mv_end)
+ continue;
+
+ if ((DVA_GET_OFFSET(&svb->svb_dva) +
+ DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_end) {
+ (void) printf("ERROR: Found block that crosses "
+ "metaslab boundary: <%llu:%llx:%llx>\n",
+ (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
+ (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
+ (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
+ continue;
+ }
+
+ zfs_btree_add(&mv->mv_livelist_allocs, svb);
+ }
+
+ for (svb = zfs_btree_first(&mv->mv_livelist_allocs, &where);
+ svb != NULL;
+ svb = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) {
+ zfs_btree_remove(&sv->sv_leftover, svb);
+ }
+}
+
+/*
+ * [Livelist Check]
+ * Iterate through all the sublivelists and:
+ * - report leftover frees (**)
+ * - record leftover ALLOCs together with their TXG [see Cross Check]
+ *
+ * (**) Note: Double ALLOCs are valid in datasets that have dedup
+ * enabled. Similarly double FREEs are allowed as well but
+ * only if they pair up with a corresponding ALLOC entry once
+ * we our done with our sublivelist iteration.
+ *
+ * [Spacemap Check]
+ * for each metaslab:
+ * - iterate over spacemap and then the metaslab's entries in the
+ * spacemap log, then report any double FREEs and ALLOCs (do not
+ * blow up).
+ *
+ * [Cross Check]
+ * After finishing the Livelist Check phase and while being in the
+ * Spacemap Check phase, we find all the recorded leftover ALLOCs
+ * of the livelist check that are part of the metaslab that we are
+ * currently looking at in the Spacemap Check. We report any entries
+ * that are marked as ALLOCs in the livelists but have been actually
+ * freed (and potentially allocated again) after their TXG stamp in
+ * the spacemaps. Also report any ALLOCs from the livelists that
+ * belong to indirect vdevs (e.g. their vdev completed removal).
+ *
+ * Note that this will miss Log Spacemap entries that cancelled each other
+ * out before being flushed to the metaslab, so we are not guaranteed
+ * to match all erroneous ALLOCs.
+ */
+static void
+livelist_metaslab_validate(spa_t *spa)
+{
+ (void) printf("Verifying deleted livelist entries\n");
+
+ sublivelist_verify_t sv;
+ zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
+ sizeof (sublivelist_verify_block_t));
+ iterate_deleted_livelists(spa, livelist_verify, &sv);
+
+ (void) printf("Verifying metaslab entries\n");
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (uint64_t c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+
+ if (!vdev_is_concrete(vd))
+ continue;
+
+ for (uint64_t mid = 0; mid < vd->vdev_ms_count; mid++) {
+ metaslab_t *m = vd->vdev_ms[mid];
+
+ (void) fprintf(stderr,
+ "\rverifying concrete vdev %llu, "
+ "metaslab %llu of %llu ...",
+ (longlong_t)vd->vdev_id,
+ (longlong_t)mid,
+ (longlong_t)vd->vdev_ms_count);
+
+ uint64_t shift, start;
+ range_seg_type_t type =
+ metaslab_calculate_range_tree_type(vd, m,
+ &start, &shift);
+ metaslab_verify_t mv;
+ mv.mv_allocated = range_tree_create(NULL,
+ type, NULL, start, shift);
+ mv.mv_vdid = vd->vdev_id;
+ mv.mv_msid = m->ms_id;
+ mv.mv_start = m->ms_start;
+ mv.mv_end = m->ms_start + m->ms_size;
+ zfs_btree_create(&mv.mv_livelist_allocs,
+ livelist_block_compare,
+ sizeof (sublivelist_verify_block_t));
+
+ mv_populate_livelist_allocs(&mv, &sv);
+
+ spacemap_check_ms_sm(m->ms_sm, &mv);
+ spacemap_check_sm_log(spa, &mv);
+
+ range_tree_vacate(mv.mv_allocated, NULL, NULL);
+ range_tree_destroy(mv.mv_allocated);
+ zfs_btree_clear(&mv.mv_livelist_allocs);
+ zfs_btree_destroy(&mv.mv_livelist_allocs);
+ }
+ }
+ (void) fprintf(stderr, "\n");
+
+ /*
+ * If there are any segments in the leftover tree after we walked
+ * through all the metaslabs in the concrete vdevs then this means
+ * that we have segments in the livelists that belong to indirect
+ * vdevs and are marked as allocated.
+ */
+ if (zfs_btree_numnodes(&sv.sv_leftover) == 0) {
+ zfs_btree_destroy(&sv.sv_leftover);
+ return;
+ }
+ (void) printf("ERROR: Found livelist blocks marked as allocated "
+ "for indirect vdevs:\n");
+
+ zfs_btree_index_t *where = NULL;
+ sublivelist_verify_block_t *svb;
+ while ((svb = zfs_btree_destroy_nodes(&sv.sv_leftover, &where)) !=
+ NULL) {
+ int vdev_id = DVA_GET_VDEV(&svb->svb_dva);
+ ASSERT3U(vdev_id, <, rvd->vdev_children);
+ vdev_t *vd = rvd->vdev_child[vdev_id];
+ ASSERT(!vdev_is_concrete(vd));
+ (void) printf("<%d:%llx:%llx> TXG %llx\n",
+ vdev_id, (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
+ (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva),
+ (u_longlong_t)svb->svb_allocated_txg);
+ }
+ (void) printf("\n");
+ zfs_btree_destroy(&sv.sv_leftover);
+}
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
@@ -137,31 +780,45 @@ static void
usage(void)
{
(void) fprintf(stderr,
- "Usage:\t%s [-AbcdDFGhikLMPsvX] [-e [-V] [-p ...]] "
+ "Usage:\t%s [-AbcdDFGhikLMPsvXy] [-e [-V] [-p ...]] "
"[-I ]\n"
"\t\t[-o =]... [-t ] [-U ] [-x ]\n"
- "\t\t[ [