From 36b313dacf2f60f526fe98b7e9d1a6bbcbb250d2 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 4 Feb 2009 15:15:41 -0800 Subject: [PATCH] Linux VM integration / device special files Support added to provide reasonable values for the global Solaris VM variables: minfree, desfree, lotsfree, needfree. These values are set to the sum of their per-zone linux counterparts which should be close enough for Solaris consumers. When a non-GPL app links against the SPL we cannot use the udev interfaces, which means non of the device special files are created. Because of this I had added a poor mans udev which cause the SPL to invoke an upcall and create the basic devices when a minor is registered. When a minor is unregistered we use the vnode interface to unlink the special file. --- config/spl-build.m4 | 36 +++++++++ configure | 117 +++++++++++++++++++++++++++++ configure.ac | 3 + include/sys/dnlc.h | 2 + include/sys/kmem.h | 1 + include/sys/sunddi.h | 34 ++++++++- include/sys/sysmacros.h | 5 +- include/sys/vmsystm.h | 62 +++++++++------- module/spl/spl-generic.c | 3 - module/spl/spl-kmem.c | 109 +++++++++++++++++++++++++++ module/spl/spl-module.c | 25 +++++++ module/spl/spl-proc.c | 156 +++++++++++++++++++++++++++++++-------- spl_config.h.in | 9 +++ 13 files changed, 498 insertions(+), 64 deletions(-) diff --git a/config/spl-build.m4 b/config/spl-build.m4 index 2f346f6c8b..b7aa024e43 100644 --- a/config/spl-build.m4 +++ b/config/spl-build.m4 @@ -705,3 +705,39 @@ AC_DEFUN([SPL_AC_3ARGS_ON_EACH_CPU], [ AC_MSG_RESULT(no) ]) ]) + +dnl # +dnl # Distro specific first_online_pgdat symbol export. +dnl # +AC_DEFUN([SPL_AC_FIRST_ONLINE_PGDAT], [ + SPL_CHECK_SYMBOL_EXPORT( + [first_online_pgdat], + [], + [AC_DEFINE(HAVE_FIRST_ONLINE_PGDAT, 1, + [first_online_pgdat() is available])], + []) +]) + +dnl # +dnl # Distro specific next_online_pgdat symbol export. +dnl # +AC_DEFUN([SPL_AC_NEXT_ONLINE_PGDAT], [ + SPL_CHECK_SYMBOL_EXPORT( + [next_online_pgdat], + [], + [AC_DEFINE(HAVE_NEXT_ONLINE_PGDAT, 1, + [next_online_pgdat() is available])], + []) +]) + +dnl # +dnl # Distro specific next_zone symbol export. +dnl # +AC_DEFUN([SPL_AC_NEXT_ZONE], [ + SPL_CHECK_SYMBOL_EXPORT( + [next_zone], + [], + [AC_DEFINE(HAVE_NEXT_ZONE, 1, + [next_zone() is available])], + []) +]) diff --git a/configure b/configure index 8acd050232..7e59d67fdd 100755 --- a/configure +++ b/configure @@ -20481,6 +20481,123 @@ rm -f build/conftest.o build/conftest.mod.c build/conftest.mod.o build/conftest. + echo "$as_me:$LINENO: checking whether symbol first_online_pgdat is exported" >&5 +echo $ECHO_N "checking whether symbol first_online_pgdat is exported... $ECHO_C" >&6 + grep -q -E '[[:space:]]first_online_pgdat[[:space:]]' $LINUX/Module.symvers 2>/dev/null + rc=$? + if test $rc -ne 0; then + export=0 + for file in ; do + grep -q -E "EXPORT_SYMBOL.*(first_online_pgdat)" "$LINUX/$file" 2>/dev/null + rc=$? + if test $rc -eq 0; then + export=1 + break; + fi + done + if test $export -eq 0; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + + else + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_FIRST_ONLINE_PGDAT 1 +_ACEOF + + fi + else + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_FIRST_ONLINE_PGDAT 1 +_ACEOF + + fi + + + + echo "$as_me:$LINENO: checking whether symbol next_online_pgdat is exported" >&5 +echo $ECHO_N "checking whether symbol next_online_pgdat is exported... $ECHO_C" >&6 + grep -q -E '[[:space:]]next_online_pgdat[[:space:]]' $LINUX/Module.symvers 2>/dev/null + rc=$? + if test $rc -ne 0; then + export=0 + for file in ; do + grep -q -E "EXPORT_SYMBOL.*(next_online_pgdat)" "$LINUX/$file" 2>/dev/null + rc=$? + if test $rc -eq 0; then + export=1 + break; + fi + done + if test $export -eq 0; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + + else + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_NEXT_ONLINE_PGDAT 1 +_ACEOF + + fi + else + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_NEXT_ONLINE_PGDAT 1 +_ACEOF + + fi + + + + echo "$as_me:$LINENO: checking whether symbol next_zone is exported" >&5 +echo $ECHO_N "checking whether symbol next_zone is exported... $ECHO_C" >&6 + grep -q -E '[[:space:]]next_zone[[:space:]]' $LINUX/Module.symvers 2>/dev/null + rc=$? + if test $rc -ne 0; then + export=0 + for file in ; do + grep -q -E "EXPORT_SYMBOL.*(next_zone)" "$LINUX/$file" 2>/dev/null + rc=$? + if test $rc -eq 0; then + export=1 + break; + fi + done + if test $export -eq 0; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + + else + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_NEXT_ZONE 1 +_ACEOF + + fi + else + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_NEXT_ZONE 1 +_ACEOF + + fi + + + TOPDIR=`/bin/pwd` # Add "V=1" to KERNELMAKE_PARAMS to enable verbose module build diff --git a/configure.ac b/configure.ac index e0fcb60772..bd4f699769 100644 --- a/configure.ac +++ b/configure.ac @@ -69,6 +69,9 @@ SPL_AC_MONOTONIC_CLOCK SPL_AC_INODE_I_MUTEX SPL_AC_DIV64_64 SPL_AC_3ARGS_ON_EACH_CPU +SPL_AC_FIRST_ONLINE_PGDAT +SPL_AC_NEXT_ONLINE_PGDAT +SPL_AC_NEXT_ZONE TOPDIR=`/bin/pwd` diff --git a/include/sys/dnlc.h b/include/sys/dnlc.h index 91fb57e06c..38f8dd6b1c 100644 --- a/include/sys/dnlc.h +++ b/include/sys/dnlc.h @@ -1,4 +1,6 @@ #ifndef _SPL_DNLC_H #define _SPL_DNLC_H +#define dnlc_reduce_cache(percent) ((void)0) + #endif /* SPL_DNLC_H */ diff --git a/include/sys/kmem.h b/include/sys/kmem.h index 5f2695d521..7281f1063d 100644 --- a/include/sys/kmem.h +++ b/include/sys/kmem.h @@ -45,6 +45,7 @@ extern "C" { #include #include #include +#include /* * Memory allocation interfaces diff --git a/include/sys/sunddi.h b/include/sys/sunddi.h index d2e71ebd50..404b14b383 100644 --- a/include/sys/sunddi.h +++ b/include/sys/sunddi.h @@ -32,12 +32,15 @@ #include #include #include +#include #include #include #include #include #include +#define DDI_MAX_NAME_LEN 32 + typedef int ddi_devid_t; typedef enum { @@ -80,6 +83,7 @@ typedef struct pollhead { typedef struct dev_info { kmutex_t di_lock; + char di_name[DDI_MAX_NAME_LEN]; struct dev_ops *di_ops; struct cdev *di_cdev; spl_class *di_class; @@ -202,6 +206,7 @@ extern void __ddi_remove_minor_node(dev_info_t *dip, char *name); extern int ddi_quiesce_not_needed(dev_info_t *dip); extern int __mod_install(struct modlinkage *modlp); extern int __mod_remove(struct modlinkage *modlp); +extern int __mod_mknod(char *name, char *type, int major, int minor); extern int ddi_strtoul(const char *, char **, int, unsigned long *); extern int ddi_strtol(const char *, char **, int, long *); @@ -226,7 +231,16 @@ ddi_remove_minor_node(dev_info_t *di, char *name) di->di_class = NULL; di->di_dev = 0; } -#endif +#else + /* When we do not have access to the GPL-only device interfaces we + * are forced to do something crude. We unlink the special device + * file in /dev/ ourselves from within the kernel. On the upside we + * are already providing this functionality for Solaris, and it is + * easy to leverage the Solaris API to perform the unlink. */ + if (strlen(di->di_name) > 0) + vn_remove(di->di_name, UIO_SYSSPACE, RMFILE); + +#endif /* HAVE_GPL_ONLY_SYMBOLS */ __ddi_remove_minor_node(di, name); } @@ -254,14 +268,28 @@ ddi_create_minor_node(dev_info_t *di, char *name, int spec_type, di->di_class = NULL; ddi_remove_minor_node(di, name); CERROR("Error creating %s class, %d\n", name, rc); - RETURN(DDI_FAILURE); + return DDI_FAILURE; } /* Do not append a 0 to devices with minor nums of 0 */ di->di_device = spl_device_create(di->di_class, NULL, di->di_dev, NULL, (di->di_minor == 0) ? "%s" : "%s%d", name, di->di_minor); -#endif +#else + /* When we do not have access to the GPL-only device interfaces we + * are forced to do something horible. We use a user mode helper to + * create the special device file in /dev/. By futher extending the + * Solaris vnode implementation we could potentially do a vn_create() + * from within the kernel but that's still a hack. */ + if (name) { + rc = __mod_mknod(di->di_name, "c", di->di_major, di->di_minor); + if (rc) { + CERROR("Error mknod %s, %d\n", di->di_name, rc); + ddi_remove_minor_node(di, name); + } + } + +#endif /* HAVE_GPL_ONLY_SYMBOLS */ return rc; } diff --git a/include/sys/sysmacros.h b/include/sys/sysmacros.h index b828123857..65c1025857 100644 --- a/include/sys/sysmacros.h +++ b/include/sys/sysmacros.h @@ -67,10 +67,13 @@ extern "C" { #define DEV_BSIZE 512 #define DEV_BSHIFT 9 /* log2(DEV_BSIZE) */ +#define curproc current +#define proc_pageout NULL #define max_ncpus 64 #define CPU_SEQID smp_processor_id() /* I think... */ #define _NOTE(x) + #define RLIM64_INFINITY RLIM_INFINITY /* 0..MAX_PRIO-1: Process priority @@ -135,8 +138,6 @@ extern int p0; /* Missing misc functions */ extern int highbit(unsigned long i); -extern int ddi_strtoul(const char *str, char **nptr, - int base, unsigned long *result); #define makedevice(maj,min) makedev(maj,min) diff --git a/include/sys/vmsystm.h b/include/sys/vmsystm.h index 1cb716f13c..a6e9e7d7e4 100644 --- a/include/sys/vmsystm.h +++ b/include/sys/vmsystm.h @@ -27,19 +27,50 @@ #ifndef _SPL_VMSYSTM_H #define _SPL_VMSYSTM_H +#include #include #include #include #include -extern vmem_t *zio_alloc_arena; /* arena for zio caches */ +/* These values are loosely coupled with the the VM page reclaim. + * Linux uses its own heuristics to trigger page reclamation, and + * because those interface are difficult to interface with. These + * values should only be considered as a rough guide to the system + * memory state and not as direct evidence that page reclaimation + * is or is not currently in progress. + */ +#define ptob(pages) (pages * PAGE_SIZE) +#define membar_producer() smp_wmb() #define physmem num_physpages #define freemem nr_free_pages() -#define minfree 0 -#define needfree 0 /* # of needed pages */ -#define ptob(pages) (pages * PAGE_SIZE) -#define membar_producer() smp_wmb() + +extern pgcnt_t minfree; /* Sum of zone->pages_min */ +extern pgcnt_t desfree; /* Sum of zone->pages_low */ +extern pgcnt_t lotsfree; /* Sum of zone->pages_high */ +extern pgcnt_t needfree; /* Always 0 */ +extern pgcnt_t swapfs_minfree; +extern pgcnt_t swapfs_desfree; +extern pgcnt_t swapfs_reserve; +extern pgcnt_t availrmem; + +extern vmem_t *heap_arena; /* primary kernel heap arena */ +extern vmem_t *zio_alloc_arena; /* arena for zio caches */ +extern vmem_t *zio_arena; /* arena for allocating zio memory */ + +#define VMEM_ALLOC 0x01 +#define VMEM_FREE 0x02 + +static __inline__ size_t +vmem_size(vmem_t *vmp, int typemask) +{ + /* Arena's unsupported */ + ASSERT(vmp == NULL); + ASSERT(typemask & (VMEM_ALLOC | VMEM_FREE)); + + return 0; +} #define xcopyin(from, to, size) copy_from_user(to, from, size) #define xcopyout(from, to, size) copy_to_user(to, from, size) @@ -82,25 +113,4 @@ copyinstr(const void *from, void *to, size_t len, size_t *done) return 0; } -#if 0 -/* The average number of free pages over the last 5 seconds */ -#define avefree 0 - -/* The average number of free pages over the last 30 seconds */ -#define avefree30 0 - -/* A guess as to how much memory has been promised to - * processes but not yet allocated */ -#define deficit 0 - -/* A bootlean the controls the setting of deficit */ -#define desperate - -/* When free memory is above this limit, no paging or swapping is done */ -#define lotsfree 0 - -/* When free memory is above this limit, swapping is not performed */ -#define desfree 0 -#endif - #endif /* SPL_VMSYSTM_H */ diff --git a/module/spl/spl-generic.c b/module/spl/spl-generic.c index c09d9d4e36..a15cac41ec 100644 --- a/module/spl/spl-generic.c +++ b/module/spl/spl-generic.c @@ -53,9 +53,6 @@ EXPORT_SYMBOL(hw_serial); int p0 = 0; EXPORT_SYMBOL(p0); -vmem_t *zio_alloc_arena = NULL; -EXPORT_SYMBOL(zio_alloc_arena); - int highbit(unsigned long i) { diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index 79f7872715..c39636e06b 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -32,6 +32,96 @@ #define DEBUG_SUBSYSTEM S_KMEM +/* + * The minimum amount of memory measured in pages to be free at all + * times on the system. This is similar to Linux's zone->pages_min + * multipled by the number of zones and is sized based on that. + */ +pgcnt_t minfree = 0; +EXPORT_SYMBOL(minfree); + +/* + * The desired amount of memory measured in pages to be free at all + * times on the system. This is similar to Linux's zone->pages_low + * multipled by the number of zones and is sized based on that. + * Assuming all zones are being used roughly equally, when we drop + * below this threshold async page reclamation is triggered. + */ +pgcnt_t desfree = 0; +EXPORT_SYMBOL(desfree); + +/* + * When above this amount of memory measures in pages the system is + * determined to have enough free memory. This is similar to Linux's + * zone->pages_high multipled by the number of zones and is sized based + * on that. Assuming all zones are being used roughly equally, when + * async page reclamation reaches this threshold it stops. + */ +pgcnt_t lotsfree = 0; +EXPORT_SYMBOL(lotsfree); + +/* Unused always 0 in this implementation */ +pgcnt_t needfree = 0; +EXPORT_SYMBOL(needfree); + +pgcnt_t swapfs_desfree = 0; +EXPORT_SYMBOL(swapfs_desfree); + +pgcnt_t swapfs_minfree = 0; +EXPORT_SYMBOL(swapfs_minfree); + +pgcnt_t swapfs_reserve = 0; +EXPORT_SYMBOL(swapfs_reserve); + +pgcnt_t availrmem = 0; +EXPORT_SYMBOL(availrmem); + +vmem_t *heap_arena = NULL; +EXPORT_SYMBOL(heap_arena); + +vmem_t *zio_alloc_arena = NULL; +EXPORT_SYMBOL(zio_alloc_arena); + +vmem_t *zio_arena = NULL; +EXPORT_SYMBOL(zio_arena); + +#ifndef HAVE_FIRST_ONLINE_PGDAT +struct pglist_data *first_online_pgdat(void) +{ + return NODE_DATA(first_online_node); +} +#endif /* HAVE_FIRST_ONLINE_PGDAT */ + +#ifndef HAVE_NEXT_ONLINE_PGDAT +struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) +{ + int nid = next_online_node(pgdat->node_id); + + if (nid == MAX_NUMNODES) + return NULL; + + return NODE_DATA(nid); +} +#endif /* HAVE_NEXT_ONLINE_PGDAT */ + +#ifndef HAVE_NEXT_ZONE +struct zone *next_zone(struct zone *zone) +{ + pg_data_t *pgdat = zone->zone_pgdat; + + if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) + zone++; + else { + pgdat = next_online_pgdat(pgdat); + if (pgdat) + zone = pgdat->node_zones; + else + zone = NULL; + } + return zone; +} +#endif /* HAVE_NEXT_ZONE */ + /* * Memory allocation interfaces and debugging for basic kmem_* * and vmem_* style memory allocation. When DEBUG_KMEM is enable @@ -1601,6 +1691,24 @@ spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock) #define spl_kmem_fini_tracking(list, lock) #endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */ +static void +spl_kmem_init_globals(void) +{ + struct zone *zone; + + /* For now all zones are includes, it may be wise to restrict + * this to normal and highmem zones if we see problems. */ + for_each_zone(zone) { + + if (!populated_zone(zone)) + continue; + + minfree += zone->pages_min; + desfree += zone->pages_low; + lotsfree += zone->pages_high; + } +} + int spl_kmem_init(void) { @@ -1609,6 +1717,7 @@ spl_kmem_init(void) init_rwsem(&spl_kmem_cache_sem); INIT_LIST_HEAD(&spl_kmem_cache_list); + spl_kmem_init_globals(); #ifdef HAVE_SET_SHRINKER spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS, diff --git a/module/spl/spl-module.c b/module/spl/spl-module.c index c1d030f240..4a2be89dab 100644 --- a/module/spl/spl-module.c +++ b/module/spl/spl-module.c @@ -140,6 +140,7 @@ __ddi_create_minor_node(dev_info_t *di, char *name, int spec_type, ASSERT(cb_ops->cb_aread == NULL); ASSERT(cb_ops->cb_awrite == NULL); + snprintf(di->di_name, DDI_MAX_NAME_LEN-1, "/dev/%s", name); di->di_cdev = cdev; di->di_flags = flags; di->di_minor = minor_num; @@ -281,6 +282,30 @@ __mod_install(struct modlinkage *modlp) } EXPORT_SYMBOL(__mod_install); +int +__mod_mknod(char *name, char *type, int major, int minor) +{ + char cmd[] = "/bin/mknod"; + char major_str[8]; + char minor_str[8]; + char *argv[] = { cmd, + name, + type, + major_str, + minor_str, + NULL }; + char *envp[] = { "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL }; + + snprintf(major_str, 8, "%d", major); + snprintf(minor_str, 8, "%d", minor); + + return call_usermodehelper(cmd, argv, envp, 1); +} +EXPORT_SYMBOL(__mod_mknod); + int __mod_remove(struct modlinkage *modlp) { diff --git a/module/spl/spl-proc.c b/module/spl/spl-proc.c index bf185c60ec..024118a9f9 100644 --- a/module/spl/spl-proc.c +++ b/module/spl/spl-proc.c @@ -60,6 +60,7 @@ struct proc_dir_entry *proc_spl_kstat = NULL; #define CTL_SPL CTL_UNNUMBERED #define CTL_SPL_DEBUG CTL_UNNUMBERED +#define CTL_SPL_VM CTL_UNNUMBERED #define CTL_SPL_MUTEX CTL_UNNUMBERED #define CTL_SPL_KMEM CTL_UNNUMBERED #define CTL_SPL_KSTAT CTL_UNNUMBERED @@ -85,6 +86,15 @@ struct proc_dir_entry *proc_spl_kstat = NULL; #define CTL_CONSOLE_MIN_DELAY_CS CTL_UNNUMBERED /* Init delay skip messages */ #define CTL_CONSOLE_BACKOFF CTL_UNNUMBERED /* Delay increase factor */ +#define CTL_VM_MINFREE CTL_UNNUMBERED /* Minimum free memory */ +#define CTL_VM_DESFREE CTL_UNNUMBERED /* Desired free memory */ +#define CTL_VM_LOTSFREE CTL_UNNUMBERED /* Lots of free memory */ +#define CTL_VM_NEEDFREE CTL_UNNUMBERED /* Need free memory */ +#define CTL_VM_SWAPFS_MINFREE CTL_UNNUMBERED /* Minimum swapfs memory */ +#define CTL_VM_SWAPFS_DESFREE CTL_UNNUMBERED /* Desired swapfs memory */ +#define CTL_VM_SWAPFS_RESERVE CTL_UNNUMBERED /* Reserved swapfs memory */ +#define CTL_VM_AVAILRMEM CTL_UNNUMBERED /* Available reserved memory */ + #ifdef DEBUG_KMEM #define CTL_KMEM_KMEMUSED CTL_UNNUMBERED /* Alloc'd kmem bytes */ #define CTL_KMEM_KMEMMAX CTL_UNNUMBERED /* Max alloc'd by kmem bytes */ @@ -99,44 +109,56 @@ struct proc_dir_entry *proc_spl_kstat = NULL; #else /* HAVE_CTL_UNNUMBERED */ -#define CTL_SPL 0x87 -#define CTL_SPL_DEBUG 0x88 -#define CTL_SPL_MUTEX 0x89 -#define CTL_SPL_KMEM 0x90 -#define CTL_SPL_KSTAT 0x91 +enum { + CTL_SPL = 0x87, + CTL_SPL_DEBUG = 0x88, + CTL_SPL_VM = 0x89, + CTL_SPL_MUTEX = 0x90, + CTL_SPL_KMEM = 0x91, + CTL_SPL_KSTAT = 0x92, +}; enum { - CTL_VERSION = 1, /* Version */ - CTL_HOSTID, /* Host id reported by /usr/bin/hostid */ - CTL_HW_SERIAL, /* Hardware serial number from hostid */ + CTL_VERSION = 1, /* Version */ + CTL_HOSTID, /* Host id reported by /usr/bin/hostid */ + CTL_HW_SERIAL, /* Hardware serial number from hostid */ - CTL_DEBUG_SUBSYS, /* Debug subsystem */ - CTL_DEBUG_MASK, /* Debug mask */ - CTL_DEBUG_PRINTK, /* Force all messages to console */ - CTL_DEBUG_MB, /* Debug buffer size */ - CTL_DEBUG_BINARY, /* Include binary data in buffer */ - CTL_DEBUG_CATASTROPHE, /* Set if we have BUG'd or panic'd */ - CTL_DEBUG_PANIC_ON_BUG, /* Set if we should panic on BUG */ - CTL_DEBUG_PATH, /* Dump log location */ - CTL_DEBUG_DUMP, /* Dump debug buffer to file */ - CTL_DEBUG_FORCE_BUG, /* Hook to force a BUG */ - CTL_DEBUG_STACK_SIZE, /* Max observed stack size */ + CTL_DEBUG_SUBSYS, /* Debug subsystem */ + CTL_DEBUG_MASK, /* Debug mask */ + CTL_DEBUG_PRINTK, /* Force all messages to console */ + CTL_DEBUG_MB, /* Debug buffer size */ + CTL_DEBUG_BINARY, /* Include binary data in buffer */ + CTL_DEBUG_CATASTROPHE, /* Set if we have BUG'd or panic'd */ + CTL_DEBUG_PANIC_ON_BUG, /* Set if we should panic on BUG */ + CTL_DEBUG_PATH, /* Dump log location */ + CTL_DEBUG_DUMP, /* Dump debug buffer to file */ + CTL_DEBUG_FORCE_BUG, /* Hook to force a BUG */ + CTL_DEBUG_STACK_SIZE, /* Max observed stack size */ - CTL_CONSOLE_RATELIMIT, /* Ratelimit console messages */ - CTL_CONSOLE_MAX_DELAY_CS, /* Max delay at which we skip messages */ - CTL_CONSOLE_MIN_DELAY_CS, /* Init delay at which we skip messages */ - CTL_CONSOLE_BACKOFF, /* Delay increase factor */ + CTL_CONSOLE_RATELIMIT, /* Ratelimit console messages */ + CTL_CONSOLE_MAX_DELAY_CS, /* Max delay which we skip messages */ + CTL_CONSOLE_MIN_DELAY_CS, /* Init delay which we skip messages */ + CTL_CONSOLE_BACKOFF, /* Delay increase factor */ + + CTL_VM_MINFREE, /* Minimum free memory threshold */ + CTL_VM_DESFREE, /* Desired free memory threshold */ + CTL_VM_LOTSFREE, /* Lots of free memory threshold */ + CTL_VM_NEEDFREE, /* Need free memory deficit */ + CTL_VM_SWAPFS_MINFREE, /* Minimum swapfs memory */ + CTL_VM_SWAPFS_DESFREE, /* Desired swapfs memory */ + CTL_VM_SWAPFS_RESERVE, /* Reserved swapfs memory */ + CTL_VM_AVAILRMEM, /* Available reserved memory */ #ifdef DEBUG_KMEM - CTL_KMEM_KMEMUSED, /* Alloc'd kmem bytes */ - CTL_KMEM_KMEMMAX, /* Max alloc'd by kmem bytes */ - CTL_KMEM_VMEMUSED, /* Alloc'd vmem bytes */ - CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */ + CTL_KMEM_KMEMUSED, /* Alloc'd kmem bytes */ + CTL_KMEM_KMEMMAX, /* Max alloc'd by kmem bytes */ + CTL_KMEM_VMEMUSED, /* Alloc'd vmem bytes */ + CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */ #endif - CTL_MUTEX_STATS, /* Global mutex statistics */ - CTL_MUTEX_STATS_PER, /* Per mutex statistics */ - CTL_MUTEX_SPIN_MAX, /* Maximum mutex spin iterations */ + CTL_MUTEX_STATS, /* Global mutex statistics */ + CTL_MUTEX_STATS_PER, /* Per mutex statistics */ + CTL_MUTEX_SPIN_MAX, /* Maximum mutex spin iterations */ }; #endif /* HAVE_CTL_UNNUMBERED */ @@ -769,6 +791,74 @@ static struct ctl_table spl_debug_table[] = { {0}, }; +static struct ctl_table spl_vm_table[] = { + { + .ctl_name = CTL_VM_MINFREE, + .procname = "minfree", + .data = &minfree, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_VM_DESFREE, + .procname = "desfree", + .data = &desfree, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_VM_LOTSFREE, + .procname = "lotsfree", + .data = &lotsfree, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_VM_NEEDFREE, + .procname = "needfree", + .data = &needfree, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_VM_SWAPFS_MINFREE, + .procname = "swapfs_minfree", + .data = &swapfs_minfree, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_VM_SWAPFS_DESFREE, + .procname = "swapfs_desfree", + .data = &swapfs_desfree, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_VM_SWAPFS_RESERVE, + .procname = "swapfs_reserve", + .data = &swapfs_reserve, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_VM_AVAILRMEM, + .procname = "availrmem", + .data = &availrmem, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + {0}, +}; + #ifdef DEBUG_MUTEX static struct ctl_table spl_mutex_table[] = { { @@ -873,6 +963,12 @@ static struct ctl_table spl_table[] = { .mode = 0555, .child = spl_debug_table, }, + { + .ctl_name = CTL_SPL_VM, + .procname = "vm", + .mode = 0555, + .child = spl_vm_table, + }, #ifdef DEBUG_MUTEX { .ctl_name = CTL_SPL_MUTEX, diff --git a/spl_config.h.in b/spl_config.h.in index a4a8ccd321..2bfb5c4873 100644 --- a/spl_config.h.in +++ b/spl_config.h.in @@ -45,6 +45,9 @@ /* files_fdtable() is available */ #undef HAVE_FILES_FDTABLE +/* first_online_pgdat() is available */ +#undef HAVE_FIRST_ONLINE_PGDAT + /* fls64() is available */ #undef HAVE_FLS64 @@ -66,6 +69,12 @@ /* monotonic_clock() is available */ #undef HAVE_MONOTONIC_CLOCK +/* next_online_pgdat() is available */ +#undef HAVE_NEXT_ONLINE_PGDAT + +/* next_zone() is available */ +#undef HAVE_NEXT_ZONE + /* struct path used in struct nameidata */ #undef HAVE_PATH_IN_NAMEIDATA