From 9290fb7361fb2692ee62f0f12fa384ad885dbe56 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Thu, 4 Jun 2020 13:32:32 -0400 Subject: [PATCH] Add zfsd for FreeBSD Signed-off-by: Ryan Moeller --- Makefile.am | 2 +- cmd/Makefile.am | 3 + cmd/zfsd/.gitignore | 1 + cmd/zfsd/Makefile.am | 25 + cmd/zfsd/callout.cc | 220 +++++++ cmd/zfsd/callout.h | 185 ++++++ cmd/zfsd/case_file.cc | 1195 ++++++++++++++++++++++++++++++++++++ cmd/zfsd/case_file.h | 452 ++++++++++++++ cmd/zfsd/vdev.cc | 358 +++++++++++ cmd/zfsd/vdev.h | 188 ++++++ cmd/zfsd/vdev_iterator.cc | 172 ++++++ cmd/zfsd/vdev_iterator.h | 123 ++++ cmd/zfsd/zfsd.8 | 154 +++++ cmd/zfsd/zfsd.cc | 449 ++++++++++++++ cmd/zfsd/zfsd.h | 228 +++++++ cmd/zfsd/zfsd_event.cc | 488 +++++++++++++++ cmd/zfsd/zfsd_event.h | 144 +++++ cmd/zfsd/zfsd_exception.cc | 135 ++++ cmd/zfsd/zfsd_exception.h | 109 ++++ cmd/zfsd/zfsd_main.cc | 90 +++ cmd/zfsd/zpool_list.cc | 122 ++++ cmd/zfsd/zpool_list.h | 133 ++++ config/Rules.am | 9 + configure.ac | 2 + etc/Makefile.am | 8 + etc/rc.d/.gitignore | 1 + etc/rc.d/zfsd.in | 14 + 27 files changed, 5009 insertions(+), 1 deletion(-) create mode 100644 cmd/zfsd/.gitignore create mode 100644 cmd/zfsd/Makefile.am create mode 100644 cmd/zfsd/callout.cc create mode 100644 cmd/zfsd/callout.h create mode 100644 cmd/zfsd/case_file.cc create mode 100644 cmd/zfsd/case_file.h create mode 100644 cmd/zfsd/vdev.cc create mode 100644 cmd/zfsd/vdev.h create mode 100644 cmd/zfsd/vdev_iterator.cc create mode 100644 cmd/zfsd/vdev_iterator.h create mode 100644 cmd/zfsd/zfsd.8 create mode 100644 cmd/zfsd/zfsd.cc create mode 100644 cmd/zfsd/zfsd.h create mode 100644 cmd/zfsd/zfsd_event.cc create mode 100644 cmd/zfsd/zfsd_event.h create mode 100644 cmd/zfsd/zfsd_exception.cc create mode 100644 cmd/zfsd/zfsd_exception.h create mode 100644 cmd/zfsd/zfsd_main.cc create mode 100644 cmd/zfsd/zpool_list.cc create mode 100644 cmd/zfsd/zpool_list.h create mode 100644 etc/rc.d/.gitignore create mode 100644 etc/rc.d/zfsd.in diff --git a/Makefile.am b/Makefile.am index 76ec01f2aa..f49766a45a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -119,7 +119,7 @@ cstyle_line = -exec ${top_srcdir}/scripts/cstyle.pl -cpP {} + endif CHECKS += cstyle cstyle: - $(AM_V_at)find $(top_srcdir) -name build -prune \ + $(AM_V_at)find $(top_srcdir) -name build -prune -o -name zfsd -prune \ -o -type f -name '*.[hc]' \ ! -name 'zfs_config.*' ! -name '*.mod.c' \ ! -name 'nfs41acl_xdr.c' ! -name 'nfs41acl.h' \ diff --git a/cmd/Makefile.am b/cmd/Makefile.am index 2bd9d039f2..74a93c08a9 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -64,6 +64,9 @@ include $(srcdir)/%D%/zpool/Makefile.am include $(srcdir)/%D%/zpool_influxdb/Makefile.am include $(srcdir)/%D%/zstream/Makefile.am +if BUILD_FREEBSD +include $(srcdir)/%D%/zfsd/Makefile.am +endif if BUILD_LINUX mounthelper_PROGRAMS += mount.zfs diff --git a/cmd/zfsd/.gitignore b/cmd/zfsd/.gitignore new file mode 100644 index 0000000000..5e3efca4d9 --- /dev/null +++ b/cmd/zfsd/.gitignore @@ -0,0 +1 @@ +/zfsd diff --git a/cmd/zfsd/Makefile.am b/cmd/zfsd/Makefile.am new file mode 100644 index 0000000000..0371d389be --- /dev/null +++ b/cmd/zfsd/Makefile.am @@ -0,0 +1,25 @@ +zfsd_CFLAGS = $(AM_CFLAGS) +zfsd_CXXFLAGS = $(AM_CXXFLAGS) +zfsd_CPPFLAGS = $(AM_CPPFLAGS) + +sbin_PROGRAMS += zfsd + +zfsd_SOURCES = \ + %D%/callout.cc \ + %D%/case_file.cc \ + %D%/vdev.cc \ + %D%/vdev_iterator.cc \ + %D%/zfsd.cc \ + %D%/zfsd_event.cc \ + %D%/zfsd_exception.cc \ + %D%/zfsd_main.cc \ + %D%/zpool_list.cc + +zfsd_LDADD = \ + libnvpair.la \ + libuutil.la \ + libzfs_core.la \ + libzfs.la + +zfsd_LDADD += -lrt -lprivatedevdctl -lgeom -lbsdxml -lsbuf +zfsd_LDFLAGS = -pthread diff --git a/cmd/zfsd/callout.cc b/cmd/zfsd/callout.cc new file mode 100644 index 0000000000..3e5cd57795 --- /dev/null +++ b/cmd/zfsd/callout.cc @@ -0,0 +1,220 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file callout.cc + * + * \brief Implementation of the Callout class - multi-client + * timer services built on top of the POSIX interval timer. + */ + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "callout.h" +#include "vdev_iterator.h" +#include "zfsd.h" +#include "zfsd_exception.h" + +std::list Callout::s_activeCallouts; +bool Callout::s_alarmFired(false); + +void +Callout::Init() +{ + signal(SIGALRM, Callout::AlarmSignalHandler); +} + +bool +Callout::Stop() +{ + if (!IsPending()) + return (false); + + for (std::list::iterator it(s_activeCallouts.begin()); + it != s_activeCallouts.end(); it++) { + if (*it != this) + continue; + + it = s_activeCallouts.erase(it); + if (it != s_activeCallouts.end()) { + + /* + * Maintain correct interval for the + * callouts that follow the just removed + * entry. + */ + timeradd(&(*it)->m_interval, &m_interval, + &(*it)->m_interval); + } + break; + } + m_pending = false; + return (true); +} + +bool +Callout::Reset(const timeval &interval, CalloutFunc_t *func, void *arg) +{ + bool cancelled(false); + + if (!timerisset(&interval)) + throw ZfsdException("Callout::Reset: interval of 0"); + + cancelled = Stop(); + + m_interval = interval; + m_func = func; + m_arg = arg; + m_pending = true; + + std::list::iterator it(s_activeCallouts.begin()); + for (; it != s_activeCallouts.end(); it++) { + + if (timercmp(&(*it)->m_interval, &m_interval, <=)) { + /* + * Decrease our interval by those that come + * before us. + */ + timersub(&m_interval, &(*it)->m_interval, &m_interval); + } else { + /* + * Account for the time between the newly + * inserted event and those that follow. + */ + timersub(&(*it)->m_interval, &m_interval, + &(*it)->m_interval); + break; + } + } + s_activeCallouts.insert(it, this); + + + if (s_activeCallouts.front() == this) { + itimerval timerval = { {0, 0}, m_interval }; + + setitimer(ITIMER_REAL, &timerval, NULL); + } + + return (cancelled); +} + +void +Callout::AlarmSignalHandler(int) +{ + s_alarmFired = true; + ZfsDaemon::WakeEventLoop(); +} + +void +Callout::ExpireCallouts() +{ + if (!s_alarmFired) + return; + + s_alarmFired = false; + if (s_activeCallouts.empty()) { + /* Callout removal/SIGALRM race was lost. */ + return; + } + + /* + * Expire the first callout (the one we used to set the + * interval timer) as well as any callouts following that + * expire at the same time (have a zero interval from + * the callout before it). + */ + do { + Callout *cur(s_activeCallouts.front()); + s_activeCallouts.pop_front(); + cur->m_pending = false; + cur->m_func(cur->m_arg); + } while (!s_activeCallouts.empty() + && timerisset(&s_activeCallouts.front()->m_interval) == 0); + + if (!s_activeCallouts.empty()) { + Callout *next(s_activeCallouts.front()); + itimerval timerval = { { 0, 0 }, next->m_interval }; + + setitimer(ITIMER_REAL, &timerval, NULL); + } +} + +timeval +Callout::TimeRemaining() const +{ + /* + * Outline: Add the m_interval for each callout in s_activeCallouts + * ahead of this, except for the first callout. Add to that the result + * of getitimer (That's because the first callout stores its original + * interval setting while the timer is ticking). + */ + itimerval timervalToAlarm; + timeval timeToExpiry; + std::list::iterator it; + + if (!IsPending()) { + timeToExpiry.tv_sec = INT_MAX; + timeToExpiry.tv_usec = 999999; /*maximum normalized value*/ + return (timeToExpiry); + } + + timerclear(&timeToExpiry); + getitimer(ITIMER_REAL, &timervalToAlarm); + timeval& timeToAlarm = timervalToAlarm.it_value; + timeradd(&timeToExpiry, &timeToAlarm, &timeToExpiry); + + it =s_activeCallouts.begin(); + it++; /*skip the first callout in the list*/ + for (; it != s_activeCallouts.end(); it++) { + timeradd(&timeToExpiry, &(*it)->m_interval, &timeToExpiry); + if ((*it) == this) + break; + } + return (timeToExpiry); +} diff --git a/cmd/zfsd/callout.h b/cmd/zfsd/callout.h new file mode 100644 index 0000000000..d6b83bcfe5 --- /dev/null +++ b/cmd/zfsd/callout.h @@ -0,0 +1,185 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file callout.h + * + * \brief Interface for timer based callback services. + * + * Header requirements: + * + * #include + * + * #include + */ + +#ifndef _CALLOUT_H_ +#define _CALLOUT_H_ + +/** + * \brief Type of the function callback from a Callout. + */ +typedef void CalloutFunc_t(void *); + +/** + * \brief Interface to a schedulable one-shot timer with the granularity + * of the system clock (see setitimer(2)). + * + * Determination of callback expiration is triggered by the SIGALRM + * signal. Callout callbacks are always delivered from Zfsd's event + * processing loop. + * + * Periodic actions can be triggered via the Callout mechanisms by + * resetting the Callout from within its callback. + */ +class Callout +{ +public: + + /** + * Initialize the Callout subsystem. + */ + static void Init(); + + /** + * Function called (via SIGALRM) when our interval + * timer expires. + */ + static void AlarmSignalHandler(int); + + /** + * Execute callbacks for all callouts that have the same + * expiration time as the first callout in the list. + */ + static void ExpireCallouts(); + + /** Constructor. */ + Callout(); + + /** + * Returns true if callout has not been stopped, + * or deactivated since the last time the callout was + * reset. + */ + bool IsActive() const; + + /** + * Returns true if callout is still waiting to expire. + */ + bool IsPending() const; + + /** + * Disestablish a callout. + */ + bool Stop(); + + /** + * \brief Establish or change a timeout. + * + * \param interval Timeval indicating the time which must elapse + * before this callout fires. + * \param func Pointer to the callback function + * \param arg Argument pointer to pass to callback function + * + * \return Cancellation status. + * true: The previous callback was pending and therefore + * was cancelled. + * false: The callout was not pending at the time of this + * reset request. + * In all cases, a new callout is established. + */ + bool Reset(const timeval &interval, CalloutFunc_t *func, void *arg); + + /** + * \brief Calculate the remaining time until this Callout's timer + * expires. + * + * The return value will be slightly greater than the actual time to + * expiry. + * + * If the callout is not pending, returns INT_MAX. + */ + timeval TimeRemaining() const; + +private: + /** + * All active callouts sorted by expiration time. The callout + * with the nearest expiration time is at the head of the list. + */ + static std::list s_activeCallouts; + + /** + * The interval timer has expired. This variable is set from + * signal handler context and tested from Zfsd::EventLoop() + * context via ExpireCallouts(). + */ + static bool s_alarmFired; + + /** + * Time, relative to others in the active list, until + * this callout is fired. + */ + timeval m_interval; + + /** Callback function argument. */ + void *m_arg; + + /** + * The callback function associated with this timer + * entry. + */ + CalloutFunc_t *m_func; + + /** State of this callout. */ + bool m_pending; +}; + +//- Callout public const methods ---------------------------------------------- +inline bool +Callout::IsPending() const +{ + return (m_pending); +} + +//- Callout public methods ---------------------------------------------------- +inline +Callout::Callout() + : m_arg(0), + m_func(NULL), + m_pending(false) +{ + timerclear(&m_interval); +} + +#endif /* CALLOUT_H_ */ diff --git a/cmd/zfsd/case_file.cc b/cmd/zfsd/case_file.cc new file mode 100644 index 0000000000..5064d3c266 --- /dev/null +++ b/cmd/zfsd/case_file.cc @@ -0,0 +1,1195 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file case_file.cc + * + * We keep case files for any leaf vdev that is not in the optimal state. + * However, we only serialize to disk those events that need to be preserved + * across reboots. For now, this is just a log of soft errors which we + * accumulate in order to mark a device as degraded. + */ +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "callout.h" +#include "vdev_iterator.h" +#include "zfsd_event.h" +#include "case_file.h" +#include "vdev.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +__FBSDID("$FreeBSD$"); + +/*============================ Namespace Control =============================*/ +using std::hex; +using std::ifstream; +using std::stringstream; +using std::setfill; +using std::setw; + +using DevdCtl::Event; +using DevdCtl::EventFactory; +using DevdCtl::EventList; +using DevdCtl::Guid; +using DevdCtl::ParseException; + +/*--------------------------------- CaseFile ---------------------------------*/ +//- CaseFile Static Data ------------------------------------------------------- + +CaseFileList CaseFile::s_activeCases; +const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; +const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; + +//- CaseFile Static Public Methods --------------------------------------------- +CaseFile * +CaseFile::Find(Guid poolGUID, Guid vdevGUID) +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) { + + if (((*curCase)->PoolGUID() != poolGUID + && Guid::InvalidGuid() != poolGUID) + || (*curCase)->VdevGUID() != vdevGUID) + continue; + + /* + * We only carry one active case per-vdev. + */ + return (*curCase); + } + return (NULL); +} + +void +CaseFile::Find(Guid poolGUID, Guid vdevGUID, CaseFileList &cases) +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) { + if (((*curCase)->PoolGUID() != poolGUID && + Guid::InvalidGuid() != poolGUID) || + (*curCase)->VdevGUID() != vdevGUID) + continue; + + /* + * We can have multiple cases for spare vdevs + */ + cases.push_back(*curCase); + if (!(*curCase)->IsSpare()) { + return; + } + } +} + +CaseFile * +CaseFile::Find(const string &physPath) +{ + CaseFile *result = NULL; + + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) { + + if ((*curCase)->PhysicalPath() != physPath) + continue; + + if (result != NULL) { + syslog(LOG_WARNING, "Multiple casefiles found for " + "physical path %s. " + "This is most likely a bug in zfsd", + physPath.c_str()); + } + result = *curCase; + } + return (result); +} + + +void +CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) +{ + CaseFileList::iterator casefile; + for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ + CaseFileList::iterator next = casefile; + next++; + if (poolGUID == (*casefile)->PoolGUID()) + (*casefile)->ReEvaluate(event); + casefile = next; + } +} + +CaseFile & +CaseFile::Create(Vdev &vdev) +{ + CaseFile *activeCase; + + activeCase = Find(vdev.PoolGUID(), vdev.GUID()); + if (activeCase == NULL) + activeCase = new CaseFile(vdev); + + return (*activeCase); +} + +void +CaseFile::DeSerialize() +{ + struct dirent **caseFiles; + + int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, + DeSerializeSelector, /*compar*/NULL)); + + if (numCaseFiles == -1) + return; + if (numCaseFiles == 0) { + free(caseFiles); + return; + } + + for (int i = 0; i < numCaseFiles; i++) { + + DeSerializeFile(caseFiles[i]->d_name); + free(caseFiles[i]); + } + free(caseFiles); +} + +bool +CaseFile::Empty() +{ + return (s_activeCases.empty()); +} + +void +CaseFile::LogAll() +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) + (*curCase)->Log(); +} + +void +CaseFile::PurgeAll() +{ + /* + * Serialize casefiles before deleting them so that they can be reread + * and revalidated during BuildCaseFiles. + * CaseFiles remove themselves from this list on destruction. + */ + while (s_activeCases.size() != 0) { + CaseFile *casefile = s_activeCases.front(); + casefile->Serialize(); + delete casefile; + } + +} + +int +CaseFile::IsSpare() +{ + return (m_is_spare); +} + +//- CaseFile Public Methods ---------------------------------------------------- +bool +CaseFile::RefreshVdevState() +{ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); + if (casePool == NULL) + return (false); + + Vdev vd(casePool, CaseVdev(casePool)); + if (vd.DoesNotExist()) + return (false); + + m_vdevState = vd.State(); + m_vdevPhysPath = vd.PhysicalPath(); + return (true); +} + +bool +CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) +{ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); + int flags = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE; + + if (pool == NULL || !RefreshVdevState()) { + /* + * The pool or vdev for this case file is no longer + * part of the configuration. This can happen + * if we process a device arrival notification + * before seeing the ZFS configuration change + * event. + */ + syslog(LOG_INFO, + "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " + "Closing\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str()); + Close(); + + /* + * Since this event was not used to close this + * case, do not report it as consumed. + */ + return (/*consumed*/false); + } + + if (VdevState() > VDEV_STATE_CANT_OPEN) { + /* + * For now, newly discovered devices only help for + * devices that are missing. In the future, we might + * use a newly inserted spare to replace a degraded + * or faulted device. + */ + syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", + PoolGUIDString().c_str(), VdevGUIDString().c_str()); + return (/*consumed*/false); + } + + if (vdev != NULL + && ( vdev->PoolGUID() == m_poolGUID + || vdev->PoolGUID() == Guid::InvalidGuid()) + && vdev->GUID() == m_vdevGUID) { + + if (IsSpare()) + flags |= ZFS_ONLINE_SPARE; + if (zpool_vdev_online(pool, vdev->GUIDString().c_str(), + flags, &m_vdevState) != 0) { + syslog(LOG_ERR, + "Failed to online vdev(%s/%s:%s): %s: %s\n", + zpool_get_name(pool), vdev->GUIDString().c_str(), + devPath.c_str(), libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + return (/*consumed*/false); + } + + syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", + zpool_get_name(pool), vdev->GUIDString().c_str(), + devPath.c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + + /* + * Check the vdev state post the online action to see + * if we can retire this case. + */ + CloseIfSolved(); + + return (/*consumed*/true); + } + + /* + * If the auto-replace policy is enabled, and we have physical + * path information, try a physical path replacement. + */ + if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { + syslog(LOG_INFO, + "CaseFile(%s:%s:%s): AutoReplace not set. " + "Ignoring device insertion.\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + return (/*consumed*/false); + } + + if (PhysicalPath().empty()) { + syslog(LOG_INFO, + "CaseFile(%s:%s:%s): No physical path information. " + "Ignoring device insertion.\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + return (/*consumed*/false); + } + + if (physPath != PhysicalPath()) { + syslog(LOG_INFO, + "CaseFile(%s:%s:%s): Physical path mismatch. " + "Ignoring device insertion.\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + return (/*consumed*/false); + } + + /* Write a label on the newly inserted disk. */ + if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { + syslog(LOG_ERR, + "Replace vdev(%s/%s) by physical path (label): %s: %s\n", + zpool_get_name(pool), VdevGUIDString().c_str(), + libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + return (/*consumed*/false); + } + + syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", + PoolGUIDString().c_str(), VdevGUIDString().c_str(), + devPath.c_str()); + return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); +} + +bool +CaseFile::ReEvaluate(const ZfsEvent &event) +{ + bool consumed(false); + + if (event.Value("type") == "misc.fs.zfs.vdev_remove") { + /* + * The Vdev we represent has been removed from the + * configuration. This case is no longer of value. + */ + Close(); + + return (/*consumed*/true); + } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { + /* This Pool has been destroyed. Discard the case */ + Close(); + + return (/*consumed*/true); + } else if (event.Value("type") == "misc.fs.zfs.config_sync") { + RefreshVdevState(); + if (VdevState() < VDEV_STATE_HEALTHY) + consumed = ActivateSpare(); + } + + + if (event.Value("class") == "resource.fs.zfs.removed") { + bool spare_activated; + + if (!RefreshVdevState()) { + /* + * The pool or vdev for this case file is no longer + * part of the configuration. This can happen + * if we process a device arrival notification + * before seeing the ZFS configuration change + * event. + */ + syslog(LOG_INFO, + "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " + "unconfigured. Closing\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str()); + /* + * Close the case now so we won't waste cycles in the + * system rescan + */ + Close(); + + /* + * Since this event was not used to close this + * case, do not report it as consumed. + */ + return (/*consumed*/false); + } + + /* + * Discard any tentative I/O error events for + * this case. They were most likely caused by the + * hot-unplug of this device. + */ + PurgeTentativeEvents(); + + /* Try to activate spares if they are available */ + spare_activated = ActivateSpare(); + + /* + * Rescan the drives in the system to see if a recent + * drive arrival can be used to solve this case. + */ + ZfsDaemon::RequestSystemRescan(); + + /* + * Consume the event if we successfully activated a spare. + * Otherwise, leave it in the unconsumed events list so that the + * future addition of a spare to this pool might be able to + * close the case + */ + consumed = spare_activated; + } else if (event.Value("class") == "resource.fs.zfs.statechange") { + RefreshVdevState(); + /* + * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to + * activate a hotspare. Otherwise, ignore the event + */ + if (VdevState() == VDEV_STATE_FAULTED || + VdevState() == VDEV_STATE_DEGRADED || + VdevState() == VDEV_STATE_CANT_OPEN) + (void) ActivateSpare(); + consumed = true; + } + else if (event.Value("class") == "ereport.fs.zfs.io" || + event.Value("class") == "ereport.fs.zfs.checksum") { + + m_tentativeEvents.push_front(event.DeepCopy()); + RegisterCallout(event); + consumed = true; + } + + bool closed(CloseIfSolved()); + + return (consumed || closed); +} + +/* Find a Vdev containing the vdev with the given GUID */ +static nvlist_t* +find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid) +{ + nvlist_t **vdevChildren; + int error; + unsigned ch, numChildren; + + error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, + &vdevChildren, &numChildren); + + if (error != 0 || numChildren == 0) + return (NULL); + + for (ch = 0; ch < numChildren; ch++) { + nvlist *result; + Vdev vdev(pool_config, vdevChildren[ch]); + + if (vdev.GUID() == child_guid) + return (config); + + result = find_parent(pool_config, vdevChildren[ch], child_guid); + if (result != NULL) + return (result); + } + + return (NULL); +} + +bool +CaseFile::ActivateSpare() { + nvlist_t *config, *nvroot, *parent_config; + nvlist_t **spares; + const char *devPath, *poolname, *vdev_type; + u_int nspares, i; + int error; + + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); + if (zhp == NULL) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " + "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID); + return (false); + } + poolname = zpool_get_name(zhp); + config = zpool_get_config(zhp, NULL); + if (config == NULL) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " + "config for pool %s", poolname); + return (false); + } + error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); + if (error != 0){ + syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " + "tree for pool %s", poolname); + return (false); + } + + parent_config = find_parent(config, nvroot, m_vdevGUID); + if (parent_config != NULL) { + const char *parent_type; + + /* + * Don't activate spares for members of a "replacing" vdev. + * They're already dealt with. Sparing them will just drag out + * the resilver process. + */ + error = nvlist_lookup_string(parent_config, + ZPOOL_CONFIG_TYPE, &parent_type); + if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0) + return (false); + } + + nspares = 0; + nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, + &nspares); + if (nspares == 0) { + /* The pool has no spares configured */ + syslog(LOG_INFO, "CaseFile::ActivateSpare: " + "No spares available for pool %s", poolname); + return (false); + } + for (i = 0; i < nspares; i++) { + uint64_t *nvlist_array; + vdev_stat_t *vs; + uint_t nstats; + + if (nvlist_lookup_uint64_array(spares[i], + ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " + "find vdev stats for pool %s, spare %d", + poolname, i); + return (false); + } + vs = reinterpret_cast(nvlist_array); + + if ((vs->vs_aux != VDEV_AUX_SPARED) + && (vs->vs_state == VDEV_STATE_HEALTHY)) { + /* We found a usable spare */ + break; + } + } + + if (i == nspares) { + /* No available spares were found */ + return (false); + } + + error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); + if (error != 0) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " + "the path of pool %s, spare %d. Error %d", + poolname, i, error); + return (false); + } + + error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); + if (error != 0) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " + "the vdev type of pool %s, spare %d. Error %d", + poolname, i, error); + return (false); + } + + return (Replace(vdev_type, devPath, /*isspare*/true)); +} + +void +CaseFile::RegisterCallout(const Event &event) +{ + timeval now, countdown, elapsed, timestamp, zero, remaining; + + gettimeofday(&now, 0); + timestamp = event.GetTimestamp(); + timersub(&now, ×tamp, &elapsed); + timersub(&s_removeGracePeriod, &elapsed, &countdown); + /* + * If countdown is <= zero, Reset the timer to the + * smallest positive time value instead + */ + timerclear(&zero); + if (timercmp(&countdown, &zero, <=)) { + timerclear(&countdown); + countdown.tv_usec = 1; + } + + remaining = m_tentativeTimer.TimeRemaining(); + + if (!m_tentativeTimer.IsPending() + || timercmp(&countdown, &remaining, <)) + m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); +} + + +bool +CaseFile::CloseIfSolved() +{ + if (m_events.empty() + && m_tentativeEvents.empty()) { + + /* + * We currently do not track or take actions on + * devices in the degraded or faulted state. + * Once we have support for spare pools, we'll + * retain these cases so that any spares added in + * the future can be applied to them. + */ + switch (VdevState()) { + case VDEV_STATE_HEALTHY: + /* No need to keep cases for healthy vdevs */ + Close(); + return (true); + case VDEV_STATE_REMOVED: + case VDEV_STATE_CANT_OPEN: + /* + * Keep open. We may solve it with a newly inserted + * device. + */ + case VDEV_STATE_FAULTED: + case VDEV_STATE_DEGRADED: + /* + * Keep open. We may solve it with the future + * addition of a spare to the pool + */ + case VDEV_STATE_UNKNOWN: + case VDEV_STATE_CLOSED: + case VDEV_STATE_OFFLINE: + /* + * Keep open? This may not be the correct behavior, + * but it's what we've always done + */ + ; + } + + /* + * Re-serialize the case in order to remove any + * previous event data. + */ + Serialize(); + } + + return (false); +} + +void +CaseFile::Log() +{ + syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), + VdevGUIDString().c_str(), PhysicalPath().c_str()); + syslog(LOG_INFO, "\tVdev State = %s\n", + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + if (m_tentativeEvents.size() != 0) { + syslog(LOG_INFO, "\t=== Tentative Events ===\n"); + for (EventList::iterator event(m_tentativeEvents.begin()); + event != m_tentativeEvents.end(); event++) + (*event)->Log(LOG_INFO); + } + if (m_events.size() != 0) { + syslog(LOG_INFO, "\t=== Events ===\n"); + for (EventList::iterator event(m_events.begin()); + event != m_events.end(); event++) + (*event)->Log(LOG_INFO); + } +} + +//- CaseFile Static Protected Methods ------------------------------------------ +void +CaseFile::OnGracePeriodEnded(void *arg) +{ + CaseFile &casefile(*static_cast(arg)); + + casefile.OnGracePeriodEnded(); +} + +int +CaseFile::DeSerializeSelector(const struct dirent *dirEntry) +{ + uint64_t poolGUID; + uint64_t vdevGUID; + + if (dirEntry->d_type == DT_REG + && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", + &poolGUID, &vdevGUID) == 2) + return (1); + return (0); +} + +void +CaseFile::DeSerializeFile(const char *fileName) +{ + string fullName(s_caseFilePath + '/' + fileName); + CaseFile *existingCaseFile(NULL); + CaseFile *caseFile(NULL); + + try { + uint64_t poolGUID; + uint64_t vdevGUID; + nvlist_t *vdevConf; + + if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", + &poolGUID, &vdevGUID) != 2) { + throw ZfsdException("CaseFile::DeSerialize: " + "Unintelligible CaseFile filename %s.\n", fileName); + } + existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); + if (existingCaseFile != NULL) { + /* + * If the vdev is already degraded or faulted, + * there's no point in keeping the state around + * that we use to put a drive into the degraded + * state. However, if the vdev is simply missing, + * preserve the case data in the hopes that it will + * return. + */ + caseFile = existingCaseFile; + vdev_state curState(caseFile->VdevState()); + if (curState > VDEV_STATE_CANT_OPEN + && curState < VDEV_STATE_HEALTHY) { + unlink(fileName); + return; + } + } else { + ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); + if (zpl.empty() + || (vdevConf = VdevIterator(zpl.front()) + .Find(vdevGUID)) == NULL) { + /* + * Either the pool no longer exists + * or this vdev is no longer a member of + * the pool. + */ + unlink(fullName.c_str()); + return; + } + + /* + * Any vdev we find that does not have a case file + * must be in the healthy state and thus worthy of + * continued SERD data tracking. + */ + caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); + } + + ifstream caseStream(fullName.c_str()); + if (!caseStream) + throw ZfsdException("CaseFile::DeSerialize: Unable to " + "read %s.\n", fileName); + + caseFile->DeSerialize(caseStream); + } catch (const ParseException &exp) { + + exp.Log(); + if (caseFile != existingCaseFile) + delete caseFile; + + /* + * Since we can't parse the file, unlink it so we don't + * trip over it again. + */ + unlink(fileName); + } catch (const ZfsdException &zfsException) { + + zfsException.Log(); + if (caseFile != existingCaseFile) + delete caseFile; + } +} + +//- CaseFile Protected Methods ------------------------------------------------- +CaseFile::CaseFile(const Vdev &vdev) + : m_poolGUID(vdev.PoolGUID()), + m_vdevGUID(vdev.GUID()), + m_vdevState(vdev.State()), + m_vdevPhysPath(vdev.PhysicalPath()), + m_is_spare(vdev.IsSpare()) +{ + stringstream guidString; + + guidString << m_vdevGUID; + m_vdevGUIDString = guidString.str(); + guidString.str(""); + guidString << m_poolGUID; + m_poolGUIDString = guidString.str(); + + s_activeCases.push_back(this); + + syslog(LOG_INFO, "Creating new CaseFile:\n"); + Log(); +} + +CaseFile::~CaseFile() +{ + PurgeEvents(); + PurgeTentativeEvents(); + m_tentativeTimer.Stop(); + s_activeCases.remove(this); +} + +void +CaseFile::PurgeEvents() +{ + for (EventList::iterator event(m_events.begin()); + event != m_events.end(); event++) + delete *event; + + m_events.clear(); +} + +void +CaseFile::PurgeTentativeEvents() +{ + for (EventList::iterator event(m_tentativeEvents.begin()); + event != m_tentativeEvents.end(); event++) + delete *event; + + m_tentativeEvents.clear(); +} + +void +CaseFile::SerializeEvList(const EventList events, int fd, + const char* prefix) const +{ + if (events.empty()) + return; + for (EventList::const_iterator curEvent = events.begin(); + curEvent != events.end(); curEvent++) { + const string &eventString((*curEvent)->GetEventString()); + + // TODO: replace many write(2) calls with a single writev(2) + if (prefix) + write(fd, prefix, strlen(prefix)); + write(fd, eventString.c_str(), eventString.length()); + } +} + +void +CaseFile::Serialize() +{ + stringstream saveFile; + + saveFile << setfill('0') + << s_caseFilePath << "/" + << "pool_" << PoolGUIDString() + << "_vdev_" << VdevGUIDString() + << ".case"; + + if (m_events.empty() && m_tentativeEvents.empty()) { + unlink(saveFile.str().c_str()); + return; + } + + int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); + if (fd == -1) { + syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", + saveFile.str().c_str()); + return; + } + SerializeEvList(m_events, fd); + SerializeEvList(m_tentativeEvents, fd, "tentative "); + close(fd); +} + +/* + * XXX: This method assumes that events may not contain embedded newlines. If + * ever events can contain embedded newlines, then CaseFile must switch + * serialization formats + */ +void +CaseFile::DeSerialize(ifstream &caseStream) +{ + string evString; + const EventFactory &factory(ZfsDaemon::Get().GetFactory()); + + caseStream >> std::noskipws >> std::ws; + while (caseStream.good()) { + /* + * Outline: + * read the beginning of a line and check it for + * "tentative". If found, discard "tentative". + * Create a new event + * continue + */ + EventList* destEvents; + const string tentFlag("tentative "); + string line; + std::stringbuf lineBuf; + + caseStream.get(lineBuf); + caseStream.ignore(); /*discard the newline character*/ + line = lineBuf.str(); + if (line.compare(0, tentFlag.size(), tentFlag) == 0) { + /* Discard "tentative" */ + line.erase(0, tentFlag.size()); + destEvents = &m_tentativeEvents; + } else { + destEvents = &m_events; + } + Event *event(Event::CreateEvent(factory, line)); + if (event != NULL) { + destEvents->push_back(event); + RegisterCallout(*event); + } + } +} + +void +CaseFile::Close() +{ + /* + * This case is no longer relevant. Clean up our + * serialization file, and delete the case. + */ + syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", + PoolGUIDString().c_str(), VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + + /* + * Serialization of a Case with no event data, clears the + * Serialization data for that event. + */ + PurgeEvents(); + Serialize(); + + delete this; +} + +void +CaseFile::OnGracePeriodEnded() +{ + bool should_fault, should_degrade; + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); + + m_events.splice(m_events.begin(), m_tentativeEvents); + should_fault = ShouldFault(); + should_degrade = ShouldDegrade(); + + if (should_fault || should_degrade) { + if (zhp == NULL + || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { + /* + * Either the pool no longer exists + * or this vdev is no longer a member of + * the pool. + */ + Close(); + return; + } + + } + + /* A fault condition has priority over a degrade condition */ + if (ShouldFault()) { + /* Fault the vdev and close the case. */ + if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, + VDEV_AUX_ERR_EXCEEDED) == 0) { + syslog(LOG_INFO, "Faulting vdev(%s/%s)", + PoolGUIDString().c_str(), + VdevGUIDString().c_str()); + Close(); + return; + } + else { + syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + } + } + else if (ShouldDegrade()) { + /* Degrade the vdev and close the case. */ + if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, + VDEV_AUX_ERR_EXCEEDED) == 0) { + syslog(LOG_INFO, "Degrading vdev(%s/%s)", + PoolGUIDString().c_str(), + VdevGUIDString().c_str()); + Close(); + return; + } + else { + syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + } + } + Serialize(); +} + +Vdev +CaseFile::BeingReplacedBy(zpool_handle_t *zhp) { + Vdev vd(zhp, CaseVdev(zhp)); + std::list children; + std::list::iterator children_it; + + Vdev parent(vd.Parent()); + Vdev replacing(NonexistentVdev); + + /* + * To determine whether we are being replaced by another spare that + * is still working, then make sure that it is currently spared and + * that the spare is either resilvering or healthy. If any of these + * conditions fail, then we are not being replaced by a spare. + * + * If the spare is healthy, then the case file should be closed very + * soon after this check. + */ + if (parent.DoesNotExist() + || parent.Name(zhp, /*verbose*/false) != "spare") + return (NonexistentVdev); + + children = parent.Children(); + children_it = children.begin(); + for (;children_it != children.end(); children_it++) { + Vdev child = *children_it; + + /* Skip our vdev. */ + if (child.GUID() == VdevGUID()) + continue; + /* + * Accept the first child that doesn't match our GUID, or + * any resilvering/healthy device if one exists. + */ + if (replacing.DoesNotExist() || child.IsResilvering() + || child.State() == VDEV_STATE_HEALTHY) + replacing = child; + } + + return (replacing); +} + +bool +CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { + nvlist_t *nvroot, *newvd; + const char *poolname; + string oldstr(VdevGUIDString()); + bool retval = true; + + /* Figure out what pool we're working on */ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); + if (zhp == NULL) { + syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " + "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID); + return (false); + } + poolname = zpool_get_name(zhp); + Vdev vd(zhp, CaseVdev(zhp)); + Vdev replaced(BeingReplacedBy(zhp)); + + if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { + /* If we are already being replaced by a working spare, pass. */ + if (replaced.IsResilvering() + || replaced.State() == VDEV_STATE_HEALTHY) { + syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " + "replaced", VdevGUIDString().c_str(), path); + return (/*consumed*/false); + } + /* + * If we have already been replaced by a spare, but that spare + * is broken, we must spare the spare, not the original device. + */ + oldstr = replaced.GUIDString(); + syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " + "broken spare %s instead", VdevGUIDString().c_str(), + path, oldstr.c_str()); + } + + /* + * Build a root vdev/leaf vdev configuration suitable for + * zpool_vdev_attach. Only enough data for the kernel to find + * the device (i.e. type and disk device node path) are needed. + */ + nvroot = NULL; + newvd = NULL; + + if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 + || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { + syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " + "configuration data.", poolname, oldstr.c_str()); + if (nvroot != NULL) + nvlist_free(nvroot); + return (false); + } + if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 + || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 + || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 + || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &newvd, 1) != 0) { + syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " + "configuration data.", poolname, oldstr.c_str()); + nvlist_free(newvd); + nvlist_free(nvroot); + return (true); + } + + /* Data was copied when added to the root vdev. */ + nvlist_free(newvd); + + retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, + /*replace*/B_TRUE, /*rebuild*/ B_FALSE) == 0); + if (retval) + syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", + poolname, oldstr.c_str(), path); + else + syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", + poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + nvlist_free(nvroot); + + return (retval); +} + +/* Does the argument event refer to a checksum error? */ +static bool +IsChecksumEvent(const Event* const event) +{ + return ("ereport.fs.zfs.checksum" == event->Value("type")); +} + +/* Does the argument event refer to an IO error? */ +static bool +IsIOEvent(const Event* const event) +{ + return ("ereport.fs.zfs.io" == event->Value("type")); +} + +bool +CaseFile::ShouldDegrade() const +{ + return (std::count_if(m_events.begin(), m_events.end(), + IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); +} + +bool +CaseFile::ShouldFault() const +{ + return (std::count_if(m_events.begin(), m_events.end(), + IsIOEvent) > ZFS_DEGRADE_IO_COUNT); +} + +nvlist_t * +CaseFile::CaseVdev(zpool_handle_t *zhp) const +{ + return (VdevIterator(zhp).Find(VdevGUID())); +} diff --git a/cmd/zfsd/case_file.h b/cmd/zfsd/case_file.h new file mode 100644 index 0000000000..d7475d331a --- /dev/null +++ b/cmd/zfsd/case_file.h @@ -0,0 +1,452 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file case_file.h + * + * CaseFile objects aggregate vdev faults that may require ZFSD action + * in order to maintain the health of a ZFS pool. + * + * Header requirements: + * + * #include + * + * #include "callout.h" + * #include "zfsd_event.h" + */ +#ifndef _CASE_FILE_H_ +#define _CASE_FILE_H_ + +/*=========================== Forward Declarations ===========================*/ +class CaseFile; +class Vdev; + +/*============================= Class Definitions ============================*/ +/*------------------------------- CaseFileList -------------------------------*/ +/** + * CaseFileList is a specialization of the standard list STL container. + */ +typedef std::list< CaseFile *> CaseFileList; + +/*--------------------------------- CaseFile ---------------------------------*/ +/** + * A CaseFile object is instantiated anytime a vdev for an active pool + * experiences an I/O error, is faulted by ZFS, or is determined to be + * missing/removed. + * + * A vdev may have at most one CaseFile. + * + * CaseFiles are retired when a vdev leaves an active pool configuration + * or an action is taken to resolve the issues recorded in the CaseFile. + * + * Logging a case against a vdev does not imply that an immediate action + * to resolve a fault is required or even desired. For example, a CaseFile + * must accumulate a number of I/O errors in order to flag a device as + * degraded. + * + * Vdev I/O errors are not recorded in ZFS label inforamation. For this + * reasons, CaseFile%%s with accumulated I/O error events are serialized + * to the file system so that they survive across boots. Currently all + * other fault types can be reconstructed from ZFS label information, so + * CaseFile%%s for missing, faulted, or degradded members are just recreated + * at ZFSD startup instead of being deserialized from the file system. + */ +class CaseFile +{ +public: + /** + * \brief Find a CaseFile object by a vdev's pool/vdev GUID tuple. + * + * \param poolGUID Pool GUID for the vdev of the CaseFile to find. + * If InvalidGuid, then only match the vdev GUID + * instead of both pool and vdev GUIDs. + * \param vdevGUID Vdev GUID for the vdev of the CaseFile to find. + * + * \return If found, a pointer to a valid CaseFile object. + * Otherwise NULL. + */ + static CaseFile *Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID); + + /** + * \brief Find multiple CaseFile objects by a vdev's pool/vdev + * GUID tuple (special case for spare vdevs) + * + * \param poolGUID Pool GUID for the vdev of the CaseFile to find. + * If InvalidGuid, then only match the vdev GUID + * instead of both pool and vdev GUIDs. + * \param vdevGUID Vdev GUID for the vdev of the CaseFile to find. + * \param caseList List of cases associated with the vdev. + */ + static void Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID, + CaseFileList &caseList); + + /** + * \brief Find a CaseFile object by a vdev's current/last known + * physical path. + * + * \param physPath Physical path of the vdev of the CaseFile to find. + * + * \return If found, a pointer to a valid CaseFile object. + * Otherwise NULL. + */ + static CaseFile *Find(const string &physPath); + + /** + * \brief ReEvaluate all open cases whose pool guid matches the argument + * + * \param poolGUID Only reevaluate cases for this pool + * \param event Try to consume this event with the casefile + */ + static void ReEvaluateByGuid(DevdCtl::Guid poolGUID, + const ZfsEvent &event); + + /** + * \brief Create or return an existing active CaseFile for the + * specified vdev. + * + * \param vdev The vdev object for which to find/create a CaseFile. + * + * \return A reference to a valid CaseFile object. + */ + static CaseFile &Create(Vdev &vdev); + + /** + * \brief Deserialize all serialized CaseFile objects found in + * the file system. + */ + static void DeSerialize(); + + /** + * \brief returns true if there are no CaseFiles + */ + static bool Empty(); + + /** + * \brief Emit syslog data on all active CaseFile%%s in the system. + */ + static void LogAll(); + + /** + * \brief Destroy the in-core cache of CaseFile data. + * + * This routine does not disturb the on disk, serialized, CaseFile + * data. + */ + static void PurgeAll(); + + DevdCtl::Guid PoolGUID() const; + DevdCtl::Guid VdevGUID() const; + vdev_state VdevState() const; + const string &PoolGUIDString() const; + const string &VdevGUIDString() const; + const string &PhysicalPath() const; + + /** + * \brief Attempt to resolve this CaseFile using the disk + * resource at the given device/physical path/vdev object + * tuple. + * + * \param devPath The devfs path for the disk resource. + * \param physPath The physical path information reported by + * the disk resource. + * \param vdev If the disk contains ZFS label information, + * a pointer to the disk label's vdev object + * data. Otherwise NULL. + * + * \return True if this event was consumed by this CaseFile. + */ + bool ReEvaluate(const string &devPath, const string &physPath, + Vdev *vdev); + + /** + * \brief Update this CaseFile in light of the provided ZfsEvent. + * + * Must be virtual so it can be overridden in the unit tests + * + * \param event The ZfsEvent to evaluate. + * + * \return True if this event was consumed by this CaseFile. + */ + virtual bool ReEvaluate(const ZfsEvent &event); + + /** + * \brief Register an itimer callout for the given event, if necessary + */ + virtual void RegisterCallout(const DevdCtl::Event &event); + + /** + * \brief Close a case if it is no longer relevant. + * + * This method deals with cases tracking soft errors. Soft errors + * will be discarded should a remove event occur within a short period + * of the soft errors being reported. We also discard the events + * if the vdev is marked degraded or failed. + * + * \return True if the case is closed. False otherwise. + */ + bool CloseIfSolved(); + + /** + * \brief Emit data about this CaseFile via syslog(3). + */ + void Log(); + + /** + * \brief Whether we should degrade this vdev + */ + bool ShouldDegrade() const; + + /** + * \brief Whether we should fault this vdev + */ + bool ShouldFault() const; + + /** + * \brief If this vdev is spare + */ + int IsSpare(); + +protected: + enum { + /** + * The number of soft errors on a vdev required + * to transition a vdev from healthy to degraded + * status. + */ + ZFS_DEGRADE_IO_COUNT = 50 + }; + + static CalloutFunc_t OnGracePeriodEnded; + + /** + * \brief scandir(3) filter function used to find files containing + * serialized CaseFile data. + * + * \param dirEntry Directory entry for the file to filter. + * + * \return Non-zero for a file to include in the selection, + * otherwise 0. + */ + static int DeSerializeSelector(const struct dirent *dirEntry); + + /** + * \brief Given the name of a file containing serialized events from a + * CaseFile object, create/update an in-core CaseFile object + * representing the serialized data. + * + * \param fileName The name of a file containing serialized events + * from a CaseFile object. + */ + static void DeSerializeFile(const char *fileName); + + /** Constructor. */ + CaseFile(const Vdev &vdev); + + /** + * Destructor. + * Must be virtual so it can be subclassed in the unit tests + */ + virtual ~CaseFile(); + + /** + * \brief Reload state for the vdev associated with this CaseFile. + * + * \return True if the refresh was successful. False if the system + * has no record of the pool or vdev for this CaseFile. + */ + virtual bool RefreshVdevState(); + + /** + * \brief Free all events in the m_events list. + */ + void PurgeEvents(); + + /** + * \brief Free all events in the m_tentativeEvents list. + */ + void PurgeTentativeEvents(); + + /** + * \brief Commit to file system storage. + */ + void Serialize(); + + /** + * \brief Retrieve event data from a serialization stream. + * + * \param caseStream The serializtion stream to parse. + */ + void DeSerialize(std::ifstream &caseStream); + + /** + * \brief Serializes the supplied event list and writes it to fd + * + * \param prefix If not NULL, this prefix will be prepended to + * every event in the file. + */ + void SerializeEvList(const DevdCtl::EventList events, int fd, + const char* prefix=NULL) const; + + /** + * \brief Unconditionally close a CaseFile. + */ + virtual void Close(); + + /** + * \brief Callout callback invoked when the remove timer grace + * period expires. + * + * If no remove events are received prior to the grace period + * firing, then any tentative events are promoted and counted + * against the health of the vdev. + */ + void OnGracePeriodEnded(); + + /** + * \brief Attempt to activate a spare on this case's pool. + * + * Call this whenever a pool becomes degraded. It will look for any + * spare devices and activate one to replace the casefile's vdev. It + * will _not_ close the casefile; that should only happen when the + * missing drive is replaced or the user promotes the spare. + * + * \return True if a spare was activated + */ + bool ActivateSpare(); + + /** + * \brief replace a pool's vdev with another + * + * \param vdev_type The type of the new vdev. Usually either + * VDEV_TYPE_DISK or VDEV_TYPE_FILE + * \param path The file system path to the new vdev + * \param isspare Whether the new vdev is a spare + * + * \return true iff the replacement was successful + */ + bool Replace(const char* vdev_type, const char* path, bool isspare); + + /** + * \brief Which vdev, if any, is replacing ours. + * + * \param zhp Pool handle state from the caller context + * + * \return the vdev that is currently replacing ours, + * or NonexistentVdev if there isn't one. + */ + Vdev BeingReplacedBy(zpool_handle_t *zhp); + + /** + * \brief All CaseFiles being tracked by ZFSD. + */ + static CaseFileList s_activeCases; + + /** + * \brief The file system path to serialized CaseFile data. + */ + static const string s_caseFilePath; + + /** + * \brief The time ZFSD waits before promoting a tentative event + * into a permanent event. + */ + static const timeval s_removeGracePeriod; + + /** + * \brief A list of soft error events counted against the health of + * a vdev. + */ + DevdCtl::EventList m_events; + + /** + * \brief A list of soft error events waiting for a grace period + * expiration before being counted against the health of + * a vdev. + */ + DevdCtl::EventList m_tentativeEvents; + + DevdCtl::Guid m_poolGUID; + DevdCtl::Guid m_vdevGUID; + vdev_state m_vdevState; + string m_poolGUIDString; + string m_vdevGUIDString; + string m_vdevPhysPath; + int m_is_spare; + + /** + * \brief Callout activated when a grace period + */ + Callout m_tentativeTimer; + +private: + nvlist_t *CaseVdev(zpool_handle_t *zhp) const; +}; + +inline DevdCtl::Guid +CaseFile::PoolGUID() const +{ + return (m_poolGUID); +} + +inline DevdCtl::Guid +CaseFile::VdevGUID() const +{ + return (m_vdevGUID); +} + +inline vdev_state +CaseFile::VdevState() const +{ + return (m_vdevState); +} + +inline const string & +CaseFile::PoolGUIDString() const +{ + return (m_poolGUIDString); +} + +inline const string & +CaseFile::VdevGUIDString() const +{ + return (m_vdevGUIDString); +} + +inline const string & +CaseFile::PhysicalPath() const +{ + return (m_vdevPhysPath); +} + +#endif /* _CASE_FILE_H_ */ diff --git a/cmd/zfsd/vdev.cc b/cmd/zfsd/vdev.cc new file mode 100644 index 0000000000..a0e089a5bd --- /dev/null +++ b/cmd/zfsd/vdev.cc @@ -0,0 +1,358 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file vdev.cc + * + * Implementation of the Vdev class. + */ +#include +#include +#include +#include + +#include +/* + * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with + * C++ flush methods + */ +#undef flush + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "vdev.h" +#include "vdev_iterator.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +__FBSDID("$FreeBSD$"); +/*============================ Namespace Control =============================*/ +using std::string; +using std::stringstream; + +//- Special objects ----------------------------------------------------------- +Vdev NonexistentVdev; + +//- Vdev Inline Public Methods ------------------------------------------------ +/*=========================== Class Implementations ==========================*/ +/*----------------------------------- Vdev -----------------------------------*/ + +/* Special constructor for NonexistentVdev. */ +Vdev::Vdev() + : m_poolConfig(NULL), + m_config(NULL) +{} + +bool +Vdev::VdevLookupPoolGuid() +{ + uint64_t guid; + if (nvlist_lookup_uint64(m_poolConfig, ZPOOL_CONFIG_POOL_GUID, &guid)) + return (false); + m_poolGUID = guid; + return (true); +} + +void +Vdev::VdevLookupGuid() +{ + uint64_t guid; + if (nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_GUID, &guid) != 0) + throw ZfsdException("Unable to extract vdev GUID " + "from vdev config data."); + m_vdevGUID = guid; +} + +Vdev::Vdev(zpool_handle_t *pool, nvlist_t *config) + : m_poolConfig(zpool_get_config(pool, NULL)), + m_config(config) +{ + if (!VdevLookupPoolGuid()) + throw ZfsdException("Can't extract pool GUID from handle."); + VdevLookupGuid(); +} + +Vdev::Vdev(nvlist_t *poolConfig, nvlist_t *config) + : m_poolConfig(poolConfig), + m_config(config) +{ + if (!VdevLookupPoolGuid()) + throw ZfsdException("Can't extract pool GUID from config."); + VdevLookupGuid(); +} + +Vdev::Vdev(nvlist_t *labelConfig) + : m_poolConfig(labelConfig), + m_config(labelConfig) +{ + /* + * Spares do not have a Pool GUID. Tolerate its absence. + * Code accessing this Vdev in a context where the Pool GUID is + * required will find it invalid (as it is upon Vdev construction) + * and act accordingly. + */ + (void) VdevLookupPoolGuid(); + VdevLookupGuid(); + + try { + m_config = VdevIterator(labelConfig).Find(m_vdevGUID); + } catch (const ZfsdException &exp) { + /* + * When reading a spare's label, it is normal not to find + * a list of vdevs + */ + m_config = NULL; + } +} + +bool +Vdev::IsSpare() const +{ + uint64_t is_spare(0); + + if (m_config == NULL) + return (false); + + (void)nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_IS_SPARE, &is_spare); + return (bool(is_spare)); +} + +vdev_state +Vdev::State() const +{ + uint64_t *nvlist_array; + vdev_stat_t *vs; + uint_t vsc; + + if (m_config == NULL) { + /* + * If we couldn't find the list of vdevs, that normally means + * that this is an available hotspare. In that case, we will + * presume it to be healthy. Even if this spare had formerly + * been in use, been degraded, and been replaced, the act of + * replacement wipes the degraded bit from the label. So we + * have no choice but to presume that it is healthy. + */ + return (VDEV_STATE_HEALTHY); + } + + if (nvlist_lookup_uint64_array(m_config, ZPOOL_CONFIG_VDEV_STATS, + &nvlist_array, &vsc) == 0) { + vs = reinterpret_cast(nvlist_array); + return (static_cast(vs->vs_state)); + } + + /* + * Stats are not available. This vdev was created from a label. + * Synthesize a state based on available data. + */ + uint64_t faulted(0); + uint64_t degraded(0); + (void)nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_FAULTED, &faulted); + (void)nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_DEGRADED, °raded); + if (faulted) + return (VDEV_STATE_FAULTED); + if (degraded) + return (VDEV_STATE_DEGRADED); + return (VDEV_STATE_HEALTHY); +} + +std::list +Vdev::Children() +{ + nvlist_t **vdevChildren; + int result; + u_int numChildren; + std::list children; + + if (m_poolConfig == NULL || m_config == NULL) + return (children); + + result = nvlist_lookup_nvlist_array(m_config, + ZPOOL_CONFIG_CHILDREN, &vdevChildren, &numChildren); + if (result != 0) + return (children); + + for (u_int c = 0;c < numChildren; c++) + children.push_back(Vdev(m_poolConfig, vdevChildren[c])); + + return (children); +} + +Vdev +Vdev::RootVdev() +{ + nvlist_t *rootVdev; + + if (m_poolConfig == NULL) + return (NonexistentVdev); + + if (nvlist_lookup_nvlist(m_poolConfig, ZPOOL_CONFIG_VDEV_TREE, + &rootVdev) != 0) + return (NonexistentVdev); + return (Vdev(m_poolConfig, rootVdev)); +} + +/* + * Find our parent. This requires doing a traversal of the config; we can't + * cache it as leaf vdevs may change their pool config location (spare, + * replacing, mirror, etc). + */ +Vdev +Vdev::Parent() +{ + std::list to_examine; + std::list children; + std::list::iterator children_it; + + to_examine.push_back(RootVdev()); + for (;;) { + if (to_examine.empty()) + return (NonexistentVdev); + Vdev vd = to_examine.front(); + if (vd.DoesNotExist()) + return (NonexistentVdev); + to_examine.pop_front(); + children = vd.Children(); + children_it = children.begin(); + for (;children_it != children.end(); children_it++) { + Vdev child = *children_it; + + if (child.GUID() == GUID()) + return (vd); + to_examine.push_front(child); + } + } +} + +bool +Vdev::IsAvailableSpare() const +{ + /* If we have a pool guid, we cannot be an available spare. */ + if (PoolGUID()) + return (false); + + return (true); +} + +bool +Vdev::IsSpare() +{ + uint64_t spare; + if (nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_IS_SPARE, &spare) != 0) + return (false); + return (spare != 0); +} + +bool +Vdev::IsActiveSpare() const +{ + vdev_stat_t *vs; + uint_t c; + + if (m_poolConfig == NULL) + return (false); + + (void) nvlist_lookup_uint64_array(m_config, ZPOOL_CONFIG_VDEV_STATS, + reinterpret_cast(&vs), &c); + if (vs == NULL || vs->vs_aux != VDEV_AUX_SPARED) + return (false); + return (true); +} + +bool +Vdev::IsResilvering() const +{ + pool_scan_stat_t *ps = NULL; + uint_t c; + + if (State() != VDEV_STATE_HEALTHY) + return (false); + + (void) nvlist_lookup_uint64_array(m_config, ZPOOL_CONFIG_SCAN_STATS, + reinterpret_cast(&ps), &c); + if (ps == NULL || ps->pss_func != POOL_SCAN_RESILVER) + return (false); + return (true); +} + +string +Vdev::GUIDString() const +{ + stringstream vdevGUIDString; + + vdevGUIDString << GUID(); + return (vdevGUIDString.str()); +} + +string +Vdev::Name(zpool_handle_t *zhp, bool verbose) const +{ + return (zpool_vdev_name(g_zfsHandle, zhp, m_config, + verbose ? B_TRUE : B_FALSE)); +} + +string +Vdev::Path() const +{ + const char *path(NULL); + + if ((m_config != NULL) + && (nvlist_lookup_string(m_config, ZPOOL_CONFIG_PATH, &path) == 0)) + return (path); + + return (""); +} + +string +Vdev::PhysicalPath() const +{ + const char *path(NULL); + + if ((m_config != NULL) && (nvlist_lookup_string(m_config, + ZPOOL_CONFIG_PHYS_PATH, &path) == 0)) + return (path); + + return (""); +} diff --git a/cmd/zfsd/vdev.h b/cmd/zfsd/vdev.h new file mode 100644 index 0000000000..322efc8f4e --- /dev/null +++ b/cmd/zfsd/vdev.h @@ -0,0 +1,188 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file vdev.h + * + * Definition of the Vdev class. + * + * Header requirements: + * + * #include + * #include + * + * #include + */ +#ifndef _VDEV_H_ +#define _VDEV_H_ + +/*=========================== Forward Declarations ===========================*/ +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +/*============================= Class Definitions ============================*/ +/*----------------------------------- Vdev -----------------------------------*/ +/** + * \brief Wrapper class for a vdev's name/value configuration list + * simplifying access to commonly used vdev attributes. + */ +class Vdev +{ +public: + /** + * \brief Instantiate a vdev object for a vdev that is a member + * of an imported pool. + * + * \param pool The pool object containing the vdev with + * configuration data provided in vdevConfig. + * \param vdevConfig Vdev configuration data. + * + * This method should be used whenever dealing with vdev's + * enumerated via the ZpoolList class. The in-core configuration + * data for a vdev does not contain all of the items found in + * the on-disk label. This requires the vdev class to augment + * the data in vdevConfig with data found in the pool object. + */ + Vdev(zpool_handle_t *pool, nvlist_t *vdevConfig); + + /** + * \brief Instantiate a vdev object for a vdev that is a member + * of a pool configuration. + * + * \param poolConfig The pool configuration containing the vdev + * configuration data provided in vdevConfig. + * \param vdevConfig Vdev configuration data. + * + * This method should be used whenever dealing with vdev's + * enumerated via the ZpoolList class. The in-core configuration + * data for a vdev does not contain all of the items found in + * the on-disk label. This requires the vdev class to augment + * the data in vdevConfig with data found in the pool object. + */ + Vdev(nvlist_t *poolConfig, nvlist_t *vdevConfig); + + /** + * \brief Instantiate a vdev object from a ZFS label stored on + * the device. + * + * \param vdevConfig The name/value list retrieved by reading + * the label information on a leaf vdev. + */ + Vdev(nvlist_t *vdevConfig); + + /** + * \brief No-op copy constructor for nonexistent vdevs. + */ + Vdev(); + + /** + * \brief No-op virtual destructor, since this class has virtual + * functions. + */ + virtual ~Vdev(); + bool DoesNotExist() const; + + /** + * \brief Return a list of the vdev's children. + */ + std::list Children(); + + virtual DevdCtl::Guid GUID() const; + bool IsSpare() const; + virtual DevdCtl::Guid PoolGUID() const; + virtual vdev_state State() const; + std::string Path() const; + virtual std::string PhysicalPath() const; + std::string GUIDString() const; + nvlist_t *PoolConfig() const; + nvlist_t *Config() const; + Vdev Parent(); + Vdev RootVdev(); + std::string Name(zpool_handle_t *, bool verbose) const; + bool IsSpare(); + bool IsAvailableSpare() const; + bool IsActiveSpare() const; + bool IsResilvering() const; + +private: + void VdevLookupGuid(); + bool VdevLookupPoolGuid(); + DevdCtl::Guid m_poolGUID; + DevdCtl::Guid m_vdevGUID; + nvlist_t *m_poolConfig; + nvlist_t *m_config; +}; + +//- Special objects ----------------------------------------------------------- +extern Vdev NonexistentVdev; + +//- Vdev Inline Public Methods ------------------------------------------------ +inline Vdev::~Vdev() +{ +} + +inline DevdCtl::Guid +Vdev::PoolGUID() const +{ + return (m_poolGUID); +} + +inline DevdCtl::Guid +Vdev::GUID() const +{ + return (m_vdevGUID); +} + +inline nvlist_t * +Vdev::PoolConfig() const +{ + return (m_poolConfig); +} + +inline nvlist_t * +Vdev::Config() const +{ + return (m_config); +} + +inline bool +Vdev::DoesNotExist() const +{ + return (m_config == NULL); +} + +#endif /* _VDEV_H_ */ diff --git a/cmd/zfsd/vdev_iterator.cc b/cmd/zfsd/vdev_iterator.cc new file mode 100644 index 0000000000..c23399ed68 --- /dev/null +++ b/cmd/zfsd/vdev_iterator.cc @@ -0,0 +1,172 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file vdev_iterator.cc + * + * Implementation of the VdevIterator class. + */ +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#include +#include + +#include "vdev.h" +#include "vdev_iterator.h" +#include "zfsd_exception.h" + +/*============================ Namespace Control =============================*/ +using DevdCtl::Guid; + +/*=========================== Class Implementations ==========================*/ +/*------------------------------- VdevIterator -------------------------------*/ +VdevIterator::VdevIterator(zpool_handle_t *pool) + : m_poolConfig(zpool_get_config(pool, NULL)) +{ + Reset(); +} + +VdevIterator::VdevIterator(nvlist_t *poolConfig) + : m_poolConfig(poolConfig) +{ + Reset(); +} + +void +VdevIterator::Reset() +{ + nvlist_t *rootVdev; + nvlist **cache_child; + nvlist **spare_child; + int result; + uint_t cache_children; + uint_t spare_children; + + result = nvlist_lookup_nvlist(m_poolConfig, + ZPOOL_CONFIG_VDEV_TREE, + &rootVdev); + if (result != 0) + throw ZfsdException(m_poolConfig, "Unable to extract " + "ZPOOL_CONFIG_VDEV_TREE from pool."); + m_vdevQueue.assign(1, rootVdev); + result = nvlist_lookup_nvlist_array(rootVdev, + ZPOOL_CONFIG_L2CACHE, + &cache_child, + &cache_children); + if (result == 0) + for (uint_t c = 0; c < cache_children; c++) + m_vdevQueue.push_back(cache_child[c]); + result = nvlist_lookup_nvlist_array(rootVdev, + ZPOOL_CONFIG_SPARES, + &spare_child, + &spare_children); + if (result == 0) + for (uint_t c = 0; c < spare_children; c++) + m_vdevQueue.push_back(spare_child[c]); +} + +nvlist_t * +VdevIterator::Next() +{ + nvlist_t *vdevConfig; + + if (m_vdevQueue.empty()) + return (NULL); + + for (;;) { + nvlist_t **vdevChildren; + int result; + u_int numChildren; + + vdevConfig = m_vdevQueue.front(); + m_vdevQueue.pop_front(); + + /* Expand non-leaf vdevs. */ + result = nvlist_lookup_nvlist_array(vdevConfig, + ZPOOL_CONFIG_CHILDREN, + &vdevChildren, &numChildren); + if (result != 0) { + /* leaf vdev */ + break; + } + + /* + * Insert children at the head of the queue to effect a + * depth first traversal of the tree. + */ + m_vdevQueue.insert(m_vdevQueue.begin(), vdevChildren, + vdevChildren + numChildren); + } + + return (vdevConfig); +} + +void +VdevIterator::Each(VdevCallback_t *callBack, void *callBackArg) +{ + nvlist_t *vdevConfig; + + Reset(); + while ((vdevConfig = Next()) != NULL) { + Vdev vdev(m_poolConfig, vdevConfig); + + if (callBack(vdev, callBackArg)) + break; + } +} + +nvlist_t * +VdevIterator::Find(Guid vdevGUID) +{ + nvlist_t *vdevConfig; + + Reset(); + while ((vdevConfig = Next()) != NULL) { + Vdev vdev(m_poolConfig, vdevConfig); + + if (vdev.GUID() == vdevGUID) + return (vdevConfig); + } + return (NULL); +} diff --git a/cmd/zfsd/vdev_iterator.h b/cmd/zfsd/vdev_iterator.h new file mode 100644 index 0000000000..435582ec1f --- /dev/null +++ b/cmd/zfsd/vdev_iterator.h @@ -0,0 +1,123 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file vdev_iterator.h + * + * VdevIterator class definition. + * + * Header requirements: + * + * #include + */ +#ifndef _VDEV_ITERATOR_H_ +#define _VDEV_ITERATOR_H_ + +/*=========================== Forward Declarations ===========================*/ +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +class Vdev; + +/*============================= Class Definitions ============================*/ +/*------------------------------- VdevIterator -------------------------------*/ +typedef bool VdevCallback_t(Vdev &vdev, void *cbArg); + +/** + * \brief VdevIterator provides mechanisms for traversing and searching + * the leaf vdevs contained in a ZFS pool configuration. + */ +class VdevIterator +{ +public: + /** + * \brief Instantiate a VdevIterator for the given ZFS pool. + * + * \param pool The ZFS pool to traverse/search. + */ + VdevIterator(zpool_handle_t *pool); + + /** + * \brief Instantiate a VdevIterator for the given ZFS pool. + * + * \param poolConfig The configuration data for the ZFS pool + * to traverse/search. + */ + VdevIterator(nvlist_t *poolConfig); + + /** + * \brief Reset this iterator's cursor so that Next() will + * report the first member of the pool. + */ + void Reset(); + + /** + * \brief Report the leaf vdev at this iterator's cursor and increment + * the cursor to the next leaf pool member. + */ + nvlist_t *Next(); + + /** + * \brief Traverse the entire pool configuration starting its + * first member, returning a vdev object with the given + * vdev GUID if found. + * + * \param vdevGUID The vdev GUID of the vdev object to find. + * + * \return A Vdev object for the matching vdev if found. Otherwise + * NULL. + * + * Upon return, the VdevIterator's cursor points to the vdev just + * past the returned vdev or end() if no matching vdev is found. + */ + nvlist_t *Find(DevdCtl::Guid vdevGUID); + + /** + * \brief Perform the specified operation on each leaf member of + * a pool's vdev membership. + * + * \param cb Callback function to execute for each member. + * \param cbArg Argument to pass to cb. + */ + void Each(VdevCallback_t *cb, void *cbArg); + +private: + nvlist_t *m_poolConfig; + std::list m_vdevQueue; +}; + +#endif /* _VDEV_ITERATOR_H_ */ diff --git a/cmd/zfsd/zfsd.8 b/cmd/zfsd/zfsd.8 new file mode 100644 index 0000000000..ce7550b891 --- /dev/null +++ b/cmd/zfsd/zfsd.8 @@ -0,0 +1,154 @@ +.\"- +.\" Copyright (c) 2016 Allan Jude +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd April 18, 2020 +.Dt ZFSD 8 +.Os +.Sh NAME +.Nm zfsd +.Nd ZFS fault management daemon +.Sh SYNOPSIS +.Nm +.Op Fl d +.Sh DESCRIPTION +.Nm +attempts to resolve ZFS faults that the kernel can't resolve by itself. +It listens to +.Xr devctl 4 +events, which are how the kernel notifies userland of events such as I/O +errors and disk removals. +.Nm +attempts to resolve these faults by activating or deactivating hot spares +and onlining offline vdevs. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl d +Run in the foreground instead of daemonizing. +.El +.Pp +System administrators never interact with +.Nm +directly. +Instead, they control its behavior indirectly through zpool configuration. +There are two ways to influence +.Nm : +assigning hotspares and setting pool properties. +Currently, only the +.Em autoreplace +property has any effect. +See +.Xr zpool 8 +for details. +.Pp +.Nm +will attempt to resolve the following types of fault: +.Bl -tag -width a +.It device removal +When a leaf vdev disappears, +.Nm +will activate any available hotspare. +.It device arrival +When a new GEOM device appears, +.Nm +will attempt to read its ZFS label, if any. +If it matches a previously removed vdev on an active pool, +.Nm +will online it. +Once resilvering completes, any active hotspare will detach automatically. +.Pp +If the new device has no ZFS label but its physical path matches the +physical path of a previously removed vdev on an active pool, and that +pool has the autoreplace property set, then +.Nm +will replace the missing vdev with the newly arrived device. +Once resilvering completes, any active hotspare will detach automatically. +.It vdev degrade or fault events +If a vdev becomes degraded or faulted, +.Nm +will activate any available hotspare. +.It I/O errors +If a leaf vdev generates more than 50 I/O errors in a 60 second period, then +.Nm +will mark that vdev as +.Em FAULTED . +ZFS will no longer issue any I/Os to it. +.Nm +will activate a hotspare if one is available. +.It Checksum errors +If a leaf vdev generates more than 50 checksum errors in a 60 second +period, then +.Nm +will mark that vdev as +.Em DEGRADED . +ZFS will still use it, but zfsd will activate a spare anyway. +.It Spare addition +If the system administrator adds a hotspare to a pool that is already degraded, +.Nm +will activate the spare. +.It Resilver complete +.Nm +will detach any hotspare once a permanent replacement finishes resilvering. +.It Physical path change +If the physical path of an existing disk changes, +.Nm +will attempt to replace any missing disk with the same physical path, +if its pool's autoreplace property is set. +.El +.Pp +.Nm +will log interesting events and its actions to syslog with facility +.Em daemon +and identity +.Op zfsd . +.El +.Sh FILES +.Bl -tag -width a -compact +.It Pa /var/db/zfsd/cases +When +.Nm +exits, it serializes any unresolved casefiles here, +then reads them back in when next it starts up. +.El +.Sh SEE ALSO +.Xr devctl 4 , +.Xr zpool 8 +.Sh HISTORY +.Nm +first appeared in +.Fx 11.0 . +.Sh AUTHORS +.Nm +was originally written by +.An Justin Gibbs Aq Mt gibbs@FreeBSD.org +and +.An Alan Somers Aq Mt asomers@FreeBSD.org +.Sh TODO +In the future, +.Nm +should be able to resume a pool that became suspended due to device +removals, if enough missing devices have returned. diff --git a/cmd/zfsd/zfsd.cc b/cmd/zfsd/zfsd.cc new file mode 100644 index 0000000000..29c6b1ae22 --- /dev/null +++ b/cmd/zfsd/zfsd.cc @@ -0,0 +1,449 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file zfsd.cc + * + * The ZFS daemon consumes kernel devdctl(4) event data via devd(8)'s + * unix domain socket in order to react to system changes that impact + * the function of ZFS storage pools. The goal of this daemon is to + * provide similar functionality to the Solaris ZFS Diagnostic Engine + * (zfs-diagnosis), the Solaris ZFS fault handler (zfs-retire), and + * the Solaris ZFS vdev insertion agent (zfs-mod sysevent handler). + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "callout.h" +#include "vdev_iterator.h" +#include "zfsd_event.h" +#include "case_file.h" +#include "vdev.h" +#include "vdev_iterator.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +__FBSDID("$FreeBSD$"); + +/*================================== Macros ==================================*/ +#define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) + +/*============================ Namespace Control =============================*/ +using DevdCtl::Event; +using DevdCtl::EventFactory; +using DevdCtl::EventList; + +/*================================ Global Data ===============================*/ +int g_debug = 0; +libzfs_handle_t *g_zfsHandle; + +/*--------------------------------- ZfsDaemon --------------------------------*/ +//- ZfsDaemon Static Private Data ---------------------------------------------- +ZfsDaemon *ZfsDaemon::s_theZfsDaemon; +bool ZfsDaemon::s_logCaseFiles; +bool ZfsDaemon::s_terminateEventLoop; +char ZfsDaemon::s_pidFilePath[] = "/var/run/zfsd.pid"; +pidfh *ZfsDaemon::s_pidFH; +int ZfsDaemon::s_signalPipeFD[2]; +bool ZfsDaemon::s_systemRescanRequested(false); +EventFactory::Record ZfsDaemon::s_registryEntries[] = +{ + { Event::NOTIFY, "GEOM", &GeomEvent::Builder }, + { Event::NOTIFY, "ZFS", &ZfsEvent::Builder } +}; + +//- ZfsDaemon Static Public Methods -------------------------------------------- +ZfsDaemon & +ZfsDaemon::Get() +{ + return (*s_theZfsDaemon); +} + +void +ZfsDaemon::WakeEventLoop() +{ + write(s_signalPipeFD[1], "+", 1); +} + +void +ZfsDaemon::RequestSystemRescan() +{ + s_systemRescanRequested = true; + ZfsDaemon::WakeEventLoop(); +} + +void +ZfsDaemon::Run() +{ + ZfsDaemon daemon; + + while (s_terminateEventLoop == false) { + + try { + daemon.DisconnectFromDevd(); + + if (daemon.ConnectToDevd() == false) { + sleep(30); + continue; + } + + daemon.DetectMissedEvents(); + + daemon.EventLoop(); + + } catch (const DevdCtl::Exception &exp) { + exp.Log(); + } + } + + daemon.DisconnectFromDevd(); +} + +//- ZfsDaemon Private Methods -------------------------------------------------- +ZfsDaemon::ZfsDaemon() + : Consumer(/*defBuilder*/NULL, s_registryEntries, + NUM_ELEMENTS(s_registryEntries)) +{ + if (s_theZfsDaemon != NULL) + errx(1, "Multiple ZfsDaemon instances created. Exiting"); + + s_theZfsDaemon = this; + + if (pipe(s_signalPipeFD) != 0) + errx(1, "Unable to allocate signal pipe. Exiting"); + + if (fcntl(s_signalPipeFD[0], F_SETFL, O_NONBLOCK) == -1) + errx(1, "Unable to set pipe as non-blocking. Exiting"); + + if (fcntl(s_signalPipeFD[1], F_SETFL, O_NONBLOCK) == -1) + errx(1, "Unable to set pipe as non-blocking. Exiting"); + + signal(SIGHUP, ZfsDaemon::RescanSignalHandler); + signal(SIGINFO, ZfsDaemon::InfoSignalHandler); + signal(SIGINT, ZfsDaemon::QuitSignalHandler); + signal(SIGTERM, ZfsDaemon::QuitSignalHandler); + signal(SIGUSR1, ZfsDaemon::RescanSignalHandler); + + g_zfsHandle = libzfs_init(); + if (g_zfsHandle == NULL) + errx(1, "Unable to initialize ZFS library. Exiting"); + + Callout::Init(); + InitializeSyslog(); + OpenPIDFile(); + + if (g_debug == 0) + daemon(0, 0); + + UpdatePIDFile(); +} + +ZfsDaemon::~ZfsDaemon() +{ + PurgeCaseFiles(); + ClosePIDFile(); +} + +void +ZfsDaemon::PurgeCaseFiles() +{ + CaseFile::PurgeAll(); +} + +bool +ZfsDaemon::VdevAddCaseFile(Vdev &vdev, void *cbArg) +{ + if (vdev.State() != VDEV_STATE_HEALTHY) + CaseFile::Create(vdev); + + return (/*break early*/false); +} + +void +ZfsDaemon::BuildCaseFiles() +{ + ZpoolList zpl; + ZpoolList::iterator pool; + + /* Add CaseFiles for vdevs with issues. */ + for (pool = zpl.begin(); pool != zpl.end(); pool++) + VdevIterator(*pool).Each(VdevAddCaseFile, NULL); + + /* De-serialize any saved cases. */ + CaseFile::DeSerialize(); + + /* Simulate config_sync events to force CaseFile reevaluation */ + for (pool = zpl.begin(); pool != zpl.end(); pool++) { + char evString[160]; + Event *event; + nvlist_t *config; + uint64_t poolGUID; + const char *poolname; + + poolname = zpool_get_name(*pool); + config = zpool_get_config(*pool, NULL); + if (config == NULL) { + syslog(LOG_ERR, "ZFSDaemon::BuildCaseFiles: Could not " + "find pool config for pool %s", poolname); + continue; + } + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &poolGUID) != 0) { + syslog(LOG_ERR, "ZFSDaemon::BuildCaseFiles: Could not " + "find pool guid for pool %s", poolname); + continue; + } + + + snprintf(evString, 160, "!system=ZFS subsystem=ZFS " + "type=misc.fs.zfs.config_sync sub_type=synthesized " + "pool_name=%s pool_guid=%" PRIu64 "\n", poolname, poolGUID); + event = Event::CreateEvent(GetFactory(), string(evString)); + if (event != NULL) { + event->Process(); + delete event; + } + } +} + +void +ZfsDaemon::RescanSystem() +{ + struct gmesh mesh; + struct gclass *mp; + struct ggeom *gp; + struct gprovider *pp; + int result; + + /* + * The devdctl system doesn't replay events for new consumers + * of the interface. Emit manufactured DEVFS arrival events + * for any devices that already before we started or during + * periods where we've lost our connection to devd. + */ + result = geom_gettree(&mesh); + if (result != 0) { + syslog(LOG_ERR, "ZfsDaemon::RescanSystem: " + "geom_gettree failed with error %d\n", result); + return; + } + + const string evStart("!system=DEVFS subsystem=CDEV type=CREATE " + "sub_type=synthesized cdev="); + LIST_FOREACH(mp, &mesh.lg_class, lg_class) { + LIST_FOREACH(gp, &mp->lg_geom, lg_geom) { + LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { + Event *event; + + string evString(evStart + pp->lg_name + "\n"); + event = Event::CreateEvent(GetFactory(), + evString); + if (event != NULL) { + if (event->Process()) + SaveEvent(*event); + delete event; + } + } + } + } + geom_deletetree(&mesh); +} + +void +ZfsDaemon::DetectMissedEvents() +{ + do { + PurgeCaseFiles(); + + /* + * Discard any events waiting for us. We don't know + * if they still apply to the current state of the + * system. + */ + FlushEvents(); + + BuildCaseFiles(); + + /* + * If the system state has changed during our + * interrogation, start over. + */ + } while (s_terminateEventLoop == false && EventsPending()); + + RescanSystem(); +} + +void +ZfsDaemon::EventLoop() +{ + while (s_terminateEventLoop == false) { + struct pollfd fds[2]; + int result; + + if (s_logCaseFiles == true) { + EventList::iterator event(m_unconsumedEvents.begin()); + s_logCaseFiles = false; + CaseFile::LogAll(); + while (event != m_unconsumedEvents.end()) + (*event++)->Log(LOG_INFO); + } + + Callout::ExpireCallouts(); + + /* Wait for data. */ + fds[0].fd = m_devdSockFD; + fds[0].events = POLLIN; + fds[0].revents = 0; + fds[1].fd = s_signalPipeFD[0]; + fds[1].events = POLLIN; + fds[1].revents = 0; + result = poll(fds, NUM_ELEMENTS(fds), /*timeout*/INFTIM); + if (result == -1) { + if (errno == EINTR) + continue; + else + err(1, "Polling for devd events failed"); + } else if (result == 0) { + errx(1, "Unexpected result of 0 from poll. Exiting"); + } + + if ((fds[0].revents & POLLIN) != 0) + ProcessEvents(); + + if ((fds[1].revents & POLLIN) != 0) { + static char discardBuf[128]; + + /* + * This pipe exists just to close the signal + * race. Its contents are of no interest to + * us, but we must ensure that future signals + * have space in the pipe to write. + */ + while (read(s_signalPipeFD[0], discardBuf, + sizeof(discardBuf)) > 0) + ; + } + + if (s_systemRescanRequested == true) { + s_systemRescanRequested = false; + syslog(LOG_INFO, "System Rescan request processed."); + RescanSystem(); + } + + if ((fds[0].revents & POLLERR) != 0) { + syslog(LOG_INFO, "POLLERROR detected on devd socket."); + break; + } + + if ((fds[0].revents & POLLHUP) != 0) { + syslog(LOG_INFO, "POLLHUP detected on devd socket."); + break; + } + } +} +//- ZfsDaemon staic Private Methods -------------------------------------------- +void +ZfsDaemon::InfoSignalHandler(int) +{ + s_logCaseFiles = true; + ZfsDaemon::WakeEventLoop(); +} + +void +ZfsDaemon::RescanSignalHandler(int) +{ + RequestSystemRescan(); +} + +void +ZfsDaemon::QuitSignalHandler(int) +{ + s_terminateEventLoop = true; + ZfsDaemon::WakeEventLoop(); +} + +void +ZfsDaemon::OpenPIDFile() +{ + pid_t otherPID; + + s_pidFH = pidfile_open(s_pidFilePath, 0600, &otherPID); + if (s_pidFH == NULL) { + if (errno == EEXIST) + errx(1, "already running as PID %d. Exiting", otherPID); + warn("cannot open PID file"); + } +} + +void +ZfsDaemon::UpdatePIDFile() +{ + if (s_pidFH != NULL) + pidfile_write(s_pidFH); +} + +void +ZfsDaemon::ClosePIDFile() +{ + if (s_pidFH != NULL) + pidfile_remove(s_pidFH); +} + +void +ZfsDaemon::InitializeSyslog() +{ + openlog("zfsd", LOG_NDELAY, LOG_DAEMON); +} + diff --git a/cmd/zfsd/zfsd.h b/cmd/zfsd/zfsd.h new file mode 100644 index 0000000000..7b4019c7ae --- /dev/null +++ b/cmd/zfsd/zfsd.h @@ -0,0 +1,228 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file zfsd.h + * + * Class definitions and supporting data strutures for the ZFS fault + * management daemon. + * + * Header requirements: + * + * #include + * + * #include + * + * #include + * #include + * #include + * + * #include + * #include + * #include + * #include + * + * #include "vdev_iterator.h" + */ +#ifndef _ZFSD_H_ +#define _ZFSD_H_ + +/*=========================== Forward Declarations ===========================*/ +struct pidfh; + +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct zfs_handle; +typedef struct libzfs_handle libzfs_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +typedef int LeafIterFunc(zpool_handle_t *, nvlist_t *, void *); + +/*================================ Global Data ===============================*/ +extern int g_debug; +extern libzfs_handle_t *g_zfsHandle; + +/*============================= Class Definitions ============================*/ +/*--------------------------------- ZfsDaemon --------------------------------*/ +/** + * Static singleton orchestrating the operations of the ZFS daemon program. + */ +class ZfsDaemon : public DevdCtl::Consumer +{ +public: + /** Return the ZfsDaemon singleton. */ + static ZfsDaemon &Get(); + + /** + * Used by signal handlers to ensure, in a race free way, that + * the event loop will perform at least one more full loop + * before sleeping again. + */ + static void WakeEventLoop(); + + /** + * Schedules a rescan of devices in the system for potential + * candidates to replace a missing vdev. The scan is performed + * during the next run of the event loop. + */ + static void RequestSystemRescan(); + + /** Daemonize and perform all functions of the ZFS daemon. */ + static void Run(); + +private: + ZfsDaemon(); + ~ZfsDaemon(); + + static VdevCallback_t VdevAddCaseFile; + + /** Purge our cache of outstanding ZFS issues in the system. */ + void PurgeCaseFiles(); + + /** Build a cache of outstanding ZFS issues in the system. */ + void BuildCaseFiles(); + + /** + * Iterate over all known issues and attempt to solve them + * given resources currently available in the system. + */ + void RescanSystem(); + + /** + * Interrogate the system looking for previously unknown + * faults that occurred either before ZFSD was started, + * or during a period of lost communication with Devd. + */ + void DetectMissedEvents(); + + /** + * Wait for and process event source activity. + */ + void EventLoop(); + + /** + * Signal handler for which our response is to + * log the current state of the daemon. + * + * \param sigNum The signal caught. + */ + static void InfoSignalHandler(int sigNum); + + /** + * Signal handler for which our response is to + * request a case rescan. + * + * \param sigNum The signal caught. + */ + static void RescanSignalHandler(int sigNum); + + /** + * Signal handler for which our response is to + * gracefully terminate. + * + * \param sigNum The signal caught. + */ + static void QuitSignalHandler(int sigNum); + + /** + * Open and lock our PID file. + */ + static void OpenPIDFile(); + + /** + * Update our PID file with our PID. + */ + static void UpdatePIDFile(); + + /** + * Close and release the lock on our PID file. + */ + static void ClosePIDFile(); + + /** + * Perform syslog configuration. + */ + static void InitializeSyslog(); + + static ZfsDaemon *s_theZfsDaemon; + + /** + * Set to true when our program is signaled to + * gracefully exit. + */ + static bool s_logCaseFiles; + + /** + * Set to true when our program is signaled to + * gracefully exit. + */ + static bool s_terminateEventLoop; + + /** + * The canonical path and file name of zfsd's PID file. + */ + static char s_pidFilePath[]; + + /** + * Control structure for PIDFILE(3) API. + */ + static pidfh *s_pidFH; + + /** + * Pipe file descriptors used to close races with our + * signal handlers. + */ + static int s_signalPipeFD[2]; + + /** + * Flag controlling a rescan from ZFSD's event loop of all + * GEOM providers in the system to find candidates for solving + * cases. + */ + static bool s_systemRescanRequested; + + /** + * Flag controlling whether events can be queued. This boolean + * is set during event replay to ensure that events for pools or + * devices no longer in the system are not retained forever. + */ + static bool s_consumingEvents; + + static DevdCtl::EventFactory::Record s_registryEntries[]; +}; + +#endif /* _ZFSD_H_ */ diff --git a/cmd/zfsd/zfsd_event.cc b/cmd/zfsd/zfsd_event.cc new file mode 100644 index 0000000000..45fc7adbb3 --- /dev/null +++ b/cmd/zfsd/zfsd_event.cc @@ -0,0 +1,488 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file zfsd_event.cc + */ +#include +#include +#include +#include +#include + +#include + +#include +#include +/* + * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with + * C++ flush methods + */ +#undef flush +#undef __init +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "callout.h" +#include "vdev_iterator.h" +#include "zfsd_event.h" +#include "case_file.h" +#include "vdev.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +__FBSDID("$FreeBSD$"); +/*============================ Namespace Control =============================*/ +using DevdCtl::Event; +using DevdCtl::Guid; +using DevdCtl::NVPairMap; +using std::stringstream; + +/*=========================== Class Implementations ==========================*/ + +/*-------------------------------- GeomEvent --------------------------------*/ + +//- GeomEvent Static Public Methods ------------------------------------------- +Event * +GeomEvent::Builder(Event::Type type, + NVPairMap &nvPairs, + const string &eventString) +{ + return (new GeomEvent(type, nvPairs, eventString)); +} + +//- GeomEvent Virtual Public Methods ------------------------------------------ +Event * +GeomEvent::DeepCopy() const +{ + return (new GeomEvent(*this)); +} + +bool +GeomEvent::Process() const +{ + /* + * We only use GEOM events to repair damaged pools. So return early if + * there are no damaged pools + */ + if (CaseFile::Empty()) + return (false); + + /* + * We are only concerned with arrivals and physical path changes, + * because those can be used to satisfy online and autoreplace + * operations + */ + if (Value("type") != "GEOM::physpath" && Value("type") != "CREATE") + return (false); + + /* Log the event since it is of interest. */ + Log(LOG_INFO); + + string devPath; + if (!DevPath(devPath)) + return (false); + + int devFd(open(devPath.c_str(), O_RDONLY)); + if (devFd == -1) + return (false); + + bool inUse; + bool degraded; + nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded)); + + string physPath; + bool havePhysPath(PhysicalPath(physPath)); + + string devName; + DevName(devName); + close(devFd); + + if (inUse && devLabel != NULL) { + OnlineByLabel(devPath, physPath, devLabel); + } else if (degraded) { + syslog(LOG_INFO, "%s is marked degraded. Ignoring " + "as a replace by physical path candidate.\n", + devName.c_str()); + } else if (havePhysPath) { + /* + * TODO: attempt to resolve events using every casefile + * that matches this physpath + */ + CaseFile *caseFile(CaseFile::Find(physPath)); + if (caseFile != NULL) { + syslog(LOG_INFO, + "Found CaseFile(%s:%s:%s) - ReEvaluating\n", + caseFile->PoolGUIDString().c_str(), + caseFile->VdevGUIDString().c_str(), + zpool_state_to_name(caseFile->VdevState(), + VDEV_AUX_NONE)); + caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL); + } + } + return (false); +} + +//- GeomEvent Protected Methods ----------------------------------------------- +GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs, + const string &eventString) + : DevdCtl::GeomEvent(type, nvpairs, eventString) +{ +} + +GeomEvent::GeomEvent(const GeomEvent &src) + : DevdCtl::GeomEvent::GeomEvent(src) +{ +} + +nvlist_t * +GeomEvent::ReadLabel(int devFd, bool &inUse, bool °raded) +{ + pool_state_t poolState; + char *poolName; + boolean_t b_inuse; + int nlabels; + + inUse = false; + degraded = false; + poolName = NULL; + if (zpool_in_use(g_zfsHandle, devFd, &poolState, + &poolName, &b_inuse) == 0) { + nvlist_t *devLabel = NULL; + + inUse = b_inuse == B_TRUE; + if (poolName != NULL) + free(poolName); + + if (zpool_read_label(devFd, &devLabel, &nlabels) != 0) + return (NULL); + /* + * If we find a disk with fewer than the maximum number of + * labels, it might be the whole disk of a partitioned disk + * where ZFS resides on a partition. In that case, we should do + * nothing and wait for the partition to appear. Or, the disk + * might be damaged. In that case, zfsd should do nothing and + * wait for the sysadmin to decide. + */ + if (nlabels != VDEV_LABELS || devLabel == NULL) { + nvlist_free(devLabel); + return (NULL); + } + + try { + Vdev vdev(devLabel); + degraded = vdev.State() != VDEV_STATE_HEALTHY; + return (devLabel); + } catch (ZfsdException &exp) { + string devName = fdevname(devFd); + string devPath = _PATH_DEV + devName; + string context("GeomEvent::ReadLabel: " + + devPath + ": "); + + exp.GetString().insert(0, context); + exp.Log(); + nvlist_free(devLabel); + } + } + return (NULL); +} + +bool +GeomEvent::OnlineByLabel(const string &devPath, const string& physPath, + nvlist_t *devConfig) +{ + bool ret = false; + try { + CaseFileList case_list; + /* + * A device with ZFS label information has been + * inserted. If it matches a device for which we + * have a case, see if we can solve that case. + */ + syslog(LOG_INFO, "Interrogating VDEV label for %s\n", + devPath.c_str()); + Vdev vdev(devConfig); + CaseFile::Find(vdev.PoolGUID(),vdev.GUID(), case_list); + for (CaseFileList::iterator curr = case_list.begin(); + curr != case_list.end(); curr++) { + ret |= (*curr)->ReEvaluate(devPath, physPath, &vdev); + } + return (ret); + + } catch (ZfsdException &exp) { + string context("GeomEvent::OnlineByLabel: " + devPath + ": "); + + exp.GetString().insert(0, context); + exp.Log(); + } + return (ret); +} + + +/*--------------------------------- ZfsEvent ---------------------------------*/ +//- ZfsEvent Static Public Methods --------------------------------------------- +DevdCtl::Event * +ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs, + const string &eventString) +{ + return (new ZfsEvent(type, nvpairs, eventString)); +} + +//- ZfsEvent Virtual Public Methods -------------------------------------------- +Event * +ZfsEvent::DeepCopy() const +{ + return (new ZfsEvent(*this)); +} + +bool +ZfsEvent::Process() const +{ + string logstr(""); + + if (!Contains("class") && !Contains("type")) { + syslog(LOG_ERR, + "ZfsEvent::Process: Missing class or type data."); + return (false); + } + + /* On config syncs, replay any queued events first. */ + if (Value("type").find("misc.fs.zfs.config_sync") == 0) { + /* + * Even if saved events are unconsumed the second time + * around, drop them. Any events that still can't be + * consumed are probably referring to vdevs or pools that + * no longer exist. + */ + ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true); + CaseFile::ReEvaluateByGuid(PoolGUID(), *this); + } + + if (Value("type").find("misc.fs.zfs.") == 0) { + /* Configuration changes, resilver events, etc. */ + ProcessPoolEvent(); + return (false); + } + + if (!Contains("pool_guid") || !Contains("vdev_guid")) { + /* Only currently interested in Vdev related events. */ + return (false); + } + + CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); + if (caseFile != NULL) { + Log(LOG_INFO); + syslog(LOG_INFO, "Evaluating existing case file\n"); + caseFile->ReEvaluate(*this); + return (false); + } + + /* Skip events that can't be handled. */ + Guid poolGUID(PoolGUID()); + /* If there are no replicas for a pool, then it's not manageable. */ + if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) { + stringstream msg; + msg << "No replicas available for pool " << poolGUID; + msg << ", ignoring"; + Log(LOG_INFO); + syslog(LOG_INFO, "%s", msg.str().c_str()); + return (false); + } + + /* + * Create a case file for this vdev, and have it + * evaluate the event. + */ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); + if (zpl.empty()) { + stringstream msg; + int priority = LOG_INFO; + msg << "ZfsEvent::Process: Event for unknown pool "; + msg << poolGUID << " "; + msg << "queued"; + Log(LOG_INFO); + syslog(priority, "%s", msg.str().c_str()); + return (true); + } + + nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID()); + if (vdevConfig == NULL) { + stringstream msg; + int priority = LOG_INFO; + msg << "ZfsEvent::Process: Event for unknown vdev "; + msg << VdevGUID() << " "; + msg << "queued"; + Log(LOG_INFO); + syslog(priority, "%s", msg.str().c_str()); + return (true); + } + + Vdev vdev(zpl.front(), vdevConfig); + caseFile = &CaseFile::Create(vdev); + if (caseFile->ReEvaluate(*this) == false) { + stringstream msg; + int priority = LOG_INFO; + msg << "ZfsEvent::Process: Unconsumed event for vdev("; + msg << zpool_get_name(zpl.front()) << ","; + msg << vdev.GUID() << ") "; + msg << "queued"; + Log(LOG_INFO); + syslog(priority, "%s", msg.str().c_str()); + return (true); + } + return (false); +} + +//- ZfsEvent Protected Methods ------------------------------------------------- +ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs, + const string &eventString) + : DevdCtl::ZfsEvent(type, nvpairs, eventString) +{ +} + +ZfsEvent::ZfsEvent(const ZfsEvent &src) + : DevdCtl::ZfsEvent(src) +{ +} + +/* + * Sometimes the kernel won't detach a spare when it is no longer needed. This + * can happen for example if a drive is removed, then either the pool is + * exported or the machine is powered off, then the drive is reinserted, then + * the machine is powered on or the pool is imported. ZFSD must detach these + * spares itself. + */ +void +ZfsEvent::CleanupSpares() const +{ + Guid poolGUID(PoolGUID()); + ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); + if (!zpl.empty()) { + zpool_handle_t* hdl; + + hdl = zpl.front(); + VdevIterator(hdl).Each(TryDetach, (void*)hdl); + } +} + +void +ZfsEvent::ProcessPoolEvent() const +{ + bool degradedDevice(false); + + /* The pool is destroyed. Discard any open cases */ + if (Value("type") == "misc.fs.zfs.pool_destroy") { + Log(LOG_INFO); + CaseFile::ReEvaluateByGuid(PoolGUID(), *this); + return; + } + + CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); + if (caseFile != NULL) { + if (caseFile->VdevState() != VDEV_STATE_UNKNOWN + && caseFile->VdevState() < VDEV_STATE_HEALTHY) + degradedDevice = true; + + Log(LOG_INFO); + caseFile->ReEvaluate(*this); + } + else if (Value("type") == "misc.fs.zfs.resilver_finish") + { + /* + * It's possible to get a resilver_finish event with no + * corresponding casefile. For example, if a damaged pool were + * exported, repaired, then reimported. + */ + Log(LOG_INFO); + CleanupSpares(); + } + + if (Value("type") == "misc.fs.zfs.vdev_remove" + && degradedDevice == false) { + + /* See if any other cases can make use of this device. */ + Log(LOG_INFO); + ZfsDaemon::RequestSystemRescan(); + } +} + +bool +ZfsEvent::TryDetach(Vdev &vdev, void *cbArg) +{ + /* + * Outline: + * if this device is a spare, and its parent includes one healthy, + * non-spare child, then detach this device. + */ + zpool_handle_t *hdl(static_cast(cbArg)); + + if (vdev.IsSpare()) { + std::list siblings; + std::list::iterator siblings_it; + boolean_t cleanup = B_FALSE; + + Vdev parent = vdev.Parent(); + siblings = parent.Children(); + + /* Determine whether the parent should be cleaned up */ + for (siblings_it = siblings.begin(); + siblings_it != siblings.end(); + siblings_it++) { + Vdev sibling = *siblings_it; + + if (!sibling.IsSpare() && + sibling.State() == VDEV_STATE_HEALTHY) { + cleanup = B_TRUE; + break; + } + } + + if (cleanup) { + syslog(LOG_INFO, "Detaching spare vdev %s from pool %s", + vdev.Path().c_str(), zpool_get_name(hdl)); + zpool_vdev_detach(hdl, vdev.Path().c_str()); + } + + } + + /* Always return false, because there may be other spares to detach */ + return (false); +} diff --git a/cmd/zfsd/zfsd_event.h b/cmd/zfsd/zfsd_event.h new file mode 100644 index 0000000000..fd3f9f7c52 --- /dev/null +++ b/cmd/zfsd/zfsd_event.h @@ -0,0 +1,144 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file dev_ctl_event.h + * + * \brief Class hierarchy used to express events received via + * the devdctl API. + * + * Header requirements: + * #include + * #include + * #include + * + * #include + * #include + */ + +#ifndef _ZFSD_EVENT_H_ +#define _ZFSD_EVENT_H_ + +/*============================ Namespace Control =============================*/ +using std::string; + +/*=========================== Forward Declarations ===========================*/ +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +/*--------------------------------- ZfsEvent ---------------------------------*/ +class ZfsEvent : public DevdCtl::ZfsEvent +{ +public: + /** Specialized DevdCtlEvent object factory for ZFS events. */ + static BuildMethod Builder; + + virtual DevdCtl::Event *DeepCopy() const; + + /** + * Interpret and perform any actions necessary to + * consume the event. + * \return True if this event should be queued for later reevaluation + */ + virtual bool Process() const; + +protected: + /** DeepCopy Constructor. */ + ZfsEvent(const ZfsEvent &src); + + /** Constructor */ + ZfsEvent(Type, DevdCtl::NVPairMap &, const string &); + + /** + * Detach any spares that are no longer needed, but were not + * automatically detached by the kernel + */ + virtual void CleanupSpares() const; + virtual void ProcessPoolEvent() const; + static VdevCallback_t TryDetach; +}; + +class GeomEvent : public DevdCtl::GeomEvent +{ +public: + static BuildMethod Builder; + + virtual DevdCtl::Event *DeepCopy() const; + + virtual bool Process() const; + +protected: + /** DeepCopy Constructor. */ + GeomEvent(const GeomEvent &src); + + /** Constructor */ + GeomEvent(Type, DevdCtl::NVPairMap &, const string &); + + /** + * Attempt to match the ZFS labeled device at devPath with an active + * CaseFile for a missing vdev. If a CaseFile is found, attempt + * to re-integrate the device with its pool. + * + * \param devPath The devfs path to the potential leaf vdev. + * \param physPath The physical path string reported by the device + * at devPath. + * \param devConfig The ZFS label information found on the device + * at devPath. + * + * \return true if the event that caused the online action can + * be considered consumed. + */ + static bool OnlineByLabel(const string &devPath, + const string& physPath, + nvlist_t *devConfig); + + /** + * \brief Read and return label information for a device. + * + * \param devFd The device from which to read ZFS label information. + * \param inUse The device is part of an active or potentially + * active configuration. + * \param degraded The device label indicates the vdev is not healthy. + * + * \return If label information is available, an nvlist describing + * the vdev configuraiton found on the device specified by + * devFd. Otherwise NULL. + */ + static nvlist_t *ReadLabel(int devFd, bool &inUse, bool °raded); + +}; +#endif /*_ZFSD_EVENT_H_ */ diff --git a/cmd/zfsd/zfsd_exception.cc b/cmd/zfsd/zfsd_exception.cc new file mode 100644 index 0000000000..e83dd0f6e5 --- /dev/null +++ b/cmd/zfsd/zfsd_exception.cc @@ -0,0 +1,135 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file zfsd_exception + * + * Implementation of the ZfsdException class. + */ +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +#include + +#include "vdev.h" +#include "zfsd_exception.h" + +__FBSDID("$FreeBSD$"); +/*============================ Namespace Control =============================*/ +using std::endl; +using std::string; +using std::stringstream; + +/*=========================== Class Implementations ==========================*/ +/*------------------------------- ZfsdException ------------------------------*/ +ZfsdException::ZfsdException(const char *fmt, ...) + : DevdCtl::Exception(), + m_poolConfig(NULL), + m_vdevConfig(NULL) +{ + va_list ap; + + va_start(ap, fmt); + FormatLog(fmt, ap); + va_end(ap); +} + +ZfsdException::ZfsdException(zpool_handle_t *pool, const char *fmt, ...) + : DevdCtl::Exception(), + m_poolConfig(zpool_get_config(pool, NULL)), + m_vdevConfig(NULL) +{ + va_list ap; + + va_start(ap, fmt); + FormatLog(fmt, ap); + va_end(ap); +} + +ZfsdException::ZfsdException(nvlist_t *poolConfig, const char *fmt, ...) + : DevdCtl::Exception(), + m_poolConfig(poolConfig), + m_vdevConfig(NULL) +{ + va_list ap; + + va_start(ap, fmt); + FormatLog(fmt, ap); + va_end(ap); +} + +void +ZfsdException::Log() const +{ + stringstream output; + + if (m_poolConfig != NULL) { + + output << "Pool "; + + const char *poolName; + if (nvlist_lookup_string(m_poolConfig, ZPOOL_CONFIG_POOL_NAME, + &poolName) == 0) + output << poolName; + else + output << "Unknown"; + output << ": "; + } + + if (m_vdevConfig != NULL) { + + if (m_poolConfig != NULL) { + Vdev vdev(m_poolConfig, m_vdevConfig); + + output << "Vdev " << vdev.GUID() << ": "; + } else { + Vdev vdev(m_vdevConfig); + + output << "Pool " << vdev.PoolGUID() << ": "; + output << "Vdev " << vdev.GUID() << ": "; + } + } + + output << m_log << endl; + syslog(LOG_ERR, "%s", output.str().c_str()); +} + diff --git a/cmd/zfsd/zfsd_exception.h b/cmd/zfsd/zfsd_exception.h new file mode 100644 index 0000000000..5170b2d0db --- /dev/null +++ b/cmd/zfsd/zfsd_exception.h @@ -0,0 +1,109 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file zfsd_exception.h + * + * Definition of the ZfsdException class hierarchy. All exceptions + * explicitly thrown by Zfsd are defined here. + * + * Header requirements: + * #include + * + * #include + */ +#ifndef _ZFSD_EXCEPTION_H_ +#define _ZFSD_EXCEPTION_H_ + +/*=========================== Forward Declarations ===========================*/ +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +/*============================= Class Definitions ============================*/ +/*------------------------------- ZfsdException ------------------------------*/ +/** + * \brief Class allowing unified reporting/logging of exceptional events. + */ +class ZfsdException : public DevdCtl::Exception +{ +public: + /** + * \brief ZfsdException constructor allowing arbitrary string + * data to be reported. + * + * \param fmt Printf-like string format specifier. + */ + ZfsdException(const char *fmt, ...); + + /** + * \brief ZfsdException constructor allowing arbitrary string + * data to be reported and associated with the configuration + * data for a ZFS pool. + * + * \param pool Pool handle describing the pool to which this + * exception is associated. + * \param fmt Printf-like string format specifier. + * + * Instantiation with this method is used to report global + * pool errors. + */ + ZfsdException(zpool_handle_t *pool, const char *, ...); + + /** + * \brief ZfsdException constructor allowing arbitrary string + * data to be reported and associated with the configuration + * data for a ZFS pool. + * + * \param poolConfig Pool configuration describing the pool to + * which this exception is associated. + * \param fmt Printf-like string format specifier. + * + * Instantiation with this method is used to report global + * pool errors. + */ + ZfsdException(nvlist_t *poolConfig, const char *, ...); + + /** + * \brief Emit exception data to syslog(3). + */ + virtual void Log() const; +private: + nvlist_t *m_poolConfig; + nvlist_t *m_vdevConfig; +}; + +#endif /* _ZFSD_EXCEPTION_H_ */ diff --git a/cmd/zfsd/zfsd_main.cc b/cmd/zfsd/zfsd_main.cc new file mode 100644 index 0000000000..f090631e21 --- /dev/null +++ b/cmd/zfsd/zfsd_main.cc @@ -0,0 +1,90 @@ +/*- + * Copyright (c) 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Alan Somers (Spectra Logic Corporation) + */ + +/** + * \file zfsd_main.cc + * + * main function for the ZFS Daemon. Separated to facilitate testing. + * + */ + +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "vdev_iterator.h" +#include "zfsd.h" + +__FBSDID("$FreeBSD$"); + +/*=============================== Program Main ===============================*/ +static void +usage() +{ + fprintf(stderr, "usage: %s [-d]\n", getprogname()); + exit(1); +} + +/** + * Program entry point. + */ +int +main(int argc, char **argv) +{ + int ch; + + while ((ch = getopt(argc, argv, "d")) != -1) { + switch (ch) { + case 'd': + g_debug++; + break; + default: + usage(); + } + } + + ZfsDaemon::Run(); + + return (0); +} diff --git a/cmd/zfsd/zpool_list.cc b/cmd/zfsd/zpool_list.cc new file mode 100644 index 0000000000..82c35736df --- /dev/null +++ b/cmd/zfsd/zpool_list.cc @@ -0,0 +1,122 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file zpool_list.cc + * + * Implementation of the ZpoolList class. + */ +#include +#include +#include + +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "vdev.h" +#include "vdev_iterator.h" +#include "zpool_list.h" +#include "zfsd.h" + +/*============================ Namespace Control =============================*/ +using DevdCtl::Guid; + +/*=========================== Class Implementations ==========================*/ +/*--------------------------------- ZpoolList --------------------------------*/ +bool +ZpoolList::ZpoolAll(zpool_handle_t *pool, nvlist_t *poolConfig, void *cbArg) +{ + return (true); +} + +bool +ZpoolList::ZpoolByGUID(zpool_handle_t *pool, nvlist_t *poolConfig, + void *cbArg) +{ + Guid *desiredPoolGUID(static_cast(cbArg)); + uint64_t poolGUID; + + /* We are only intested in the pool that matches our pool GUID. */ + return (nvlist_lookup_uint64(poolConfig, ZPOOL_CONFIG_POOL_GUID, + &poolGUID) == 0 + && poolGUID == (uint64_t)*desiredPoolGUID); +} + +bool +ZpoolList::ZpoolByName(zpool_handle_t *pool, nvlist_t *poolConfig, void *cbArg) +{ + const string &desiredPoolName(*static_cast(cbArg)); + + /* We are only intested in the pool that matches our pool GUID. */ + return (desiredPoolName == zpool_get_name(pool)); +} + +int +ZpoolList::LoadIterator(zpool_handle_t *pool, void *data) +{ + ZpoolList *zpl(reinterpret_cast(data)); + nvlist_t *poolConfig(zpool_get_config(pool, NULL)); + + if (zpl->m_filter(pool, poolConfig, zpl->m_filterArg)) + zpl->push_back(pool); + else + zpool_close(pool); + return (0); +} + +ZpoolList::ZpoolList(PoolFilter_t *filter, void * filterArg) + : m_filter(filter), + m_filterArg(filterArg) +{ + zpool_iter(g_zfsHandle, LoadIterator, this); +} + +ZpoolList::~ZpoolList() +{ + for (iterator it(begin()); it != end(); it++) + zpool_close(*it); + + clear(); +} diff --git a/cmd/zfsd/zpool_list.h b/cmd/zfsd/zpool_list.h new file mode 100644 index 0000000000..7d1b176ac7 --- /dev/null +++ b/cmd/zfsd/zpool_list.h @@ -0,0 +1,133 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file zpool_list.h + * + * ZpoolList class definition. ZpoolList is a standard container + * allowing filtering and iteration of imported ZFS pool information. + * + * Header requirements: + * + * #include + * #include + */ +#ifndef _ZPOOL_LIST_H_ +#define _ZPOOL_LIST_H_ + +/*============================ Namespace Control =============================*/ +using std::string; + +/*=========================== Forward Declarations ===========================*/ +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +class Vdev; + +/*============================= Class Definitions ============================*/ +/*--------------------------------- ZpoolList --------------------------------*/ +class ZpoolList; +typedef bool PoolFilter_t(zpool_handle_t *pool, nvlist_t *poolConfig, + void *filterArg); + +/** + * \brief Container of imported ZFS pool data. + * + * ZpoolList is a convenience class that converts libzfs's ZFS + * pool methods into a standard list container. + */ +class ZpoolList : public std::list +{ +public: + /** + * \brief Utility ZpoolList construction filter that causes all + * pools known to the system to be included in the + * instantiated ZpoolList. + */ + static PoolFilter_t ZpoolAll; + + /** + * \brief Utility ZpoolList construction filter that causes only + * a pool known to the system and having the specified GUID + * to be included in the instantiated ZpoolList. + */ + static PoolFilter_t ZpoolByGUID; + + /** + * \brief Utility ZpoolList construction filter that causes only + * pools known to the system and having the specified name + * to be included in the instantiated ZpoolList. + */ + static PoolFilter_t ZpoolByName; + + /** + * \brief ZpoolList constructor + * + * \param filter The filter function to use when constructing + * the ZpoolList. This may be one of the static + * utility filters defined for ZpoolList or a + * user defined function. + * \param filterArg A single argument to pass into the filter function + * when it is invoked on each candidate pool. + */ + ZpoolList(PoolFilter_t *filter = ZpoolAll, void *filterArg = NULL); + ~ZpoolList(); + +private: + /** + * \brief Helper routine used to populate the internal + * data store of ZFS pool objects using libzfs's + * zpool_iter() function. + * + * \param pool The ZFS pool object to filter. + * \param data User argument passed through zpool_iter(). + */ + static int LoadIterator(zpool_handle_t *pool, void *data); + + /** + * \brief The filter with which this ZpoolList was constructed. + */ + PoolFilter_t *m_filter; + + /** + * \brief The filter argument with which this ZpoolList was + * constructed. + */ + void *m_filterArg; +}; + +#endif /* _ZPOOL_ITERATOR_H_ */ diff --git a/config/Rules.am b/config/Rules.am index 30c5f353cd..01d3ff086c 100644 --- a/config/Rules.am +++ b/config/Rules.am @@ -28,6 +28,15 @@ if BUILD_FREEBSD AM_CFLAGS += -fPIC -Werror -Wno-unknown-pragmas -Wno-enum-conversion AM_CFLAGS += -include $(top_srcdir)/include/os/freebsd/spl/sys/ccompile.h AM_CFLAGS += -I/usr/include -I/usr/local/include + +AM_CXXFLAGS = -std=c++17 -Wall -Wstrict-prototypes -fno-strict-aliasing +AM_CXXFLAGS += $(NO_OMIT_FRAME_POINTER) +AM_CXXFLAGS += $(DEBUG_CFLAGS) +AM_CXXFLAGS += $(ASAN_CFLAGS) +AM_CXXFLAGS += $(CODE_COVERAGE_FLAGS) $(NO_FORMAT_ZERO_LENGTH) +AM_CXXFLAGS += -fPIC -Werror -Wno-unknown-pragmas -Wno-enum-conversion +AM_CXXFLAGS += -include $(top_srcdir)/include/os/freebsd/spl/sys/ccompile.h +AM_CXXFLAGS += -I/usr/include -I/usr/local/include endif AM_CPPFLAGS += -D_GNU_SOURCE diff --git a/configure.ac b/configure.ac index 2ce049c582..45de740063 100644 --- a/configure.ac +++ b/configure.ac @@ -49,6 +49,8 @@ AC_CONFIG_HEADERS([zfs_config.h], [ LT_INIT AC_PROG_INSTALL AC_PROG_CC +AC_PROG_CXX +AC_PROG_LIBTOOL AC_PROG_LN_S PKG_PROG_PKG_CONFIG AM_PROG_AS diff --git a/etc/Makefile.am b/etc/Makefile.am index 7187762d38..df7795097d 100644 --- a/etc/Makefile.am +++ b/etc/Makefile.am @@ -21,6 +21,14 @@ SUBSTFILES += $(sysconf_zfs_DATA) SHELLCHECKSCRIPTS += $(sysconf_zfs_DATA) $(call SHELLCHECK_OPTS,$(sysconf_zfs_DATA)): SHELLCHECK_SHELL = sh +if BUILD_FREEBSD +rcdir = $(sysconfdir)/rc.d +rc_SCRIPTS = %D%/rc.d/zfsd + +SUBSTFILES += $(rc_SCRIPTS) +SHELLCHECKSCRIPTS += $(rc_SCRIPTS) +$(call SHELLCHECK_OPTS,$(rc_SCRIPTS)): SHELLCHECK_SHELL = sh +endif if BUILD_LINUX initconf_DATA = \ diff --git a/etc/rc.d/.gitignore b/etc/rc.d/.gitignore new file mode 100644 index 0000000000..5e3efca4d9 --- /dev/null +++ b/etc/rc.d/.gitignore @@ -0,0 +1 @@ +/zfsd diff --git a/etc/rc.d/zfsd.in b/etc/rc.d/zfsd.in new file mode 100644 index 0000000000..942b03f1e4 --- /dev/null +++ b/etc/rc.d/zfsd.in @@ -0,0 +1,14 @@ +#!/bin/sh + +# PROVIDE: zfsd +# REQUIRE: devd zfs +# KEYWORD: nojail shutdown + +. /etc/rc.subr + +name="zfsd" +rcvar="zfsd_enable" +command="@sbindir@/${name}" + +load_rc_config ${name} +run_rc_command "${1}"