More fixes to ensure we get good debug logs even if we're in the
process of destroying the stacks. Threshhold set fairly aggressively top 80% of stack usage. git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@82 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c
This commit is contained in:
parent
e5bbd245e3
commit
7fea96c04f
|
@ -64,6 +64,16 @@ extern unsigned int spl_debug_stack;
|
||||||
#define SPL_DEFAULT_MIN_DELAY ((HZ + 1) / 2)
|
#define SPL_DEFAULT_MIN_DELAY ((HZ + 1) / 2)
|
||||||
#define SPL_DEFAULT_BACKOFF 2
|
#define SPL_DEFAULT_BACKOFF 2
|
||||||
|
|
||||||
|
#define DL_NOTHREAD 0x0001 /* Do not create a new thread */
|
||||||
|
#define DL_SINGLE_CPU 0x0002 /* Collect pages from this CPU */
|
||||||
|
|
||||||
|
typedef struct dumplog_priv {
|
||||||
|
wait_queue_head_t dp_waitq;
|
||||||
|
pid_t dp_pid;
|
||||||
|
int dp_flags;
|
||||||
|
atomic_t dp_done;
|
||||||
|
} dumplog_priv_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned long cdls_next;
|
unsigned long cdls_next;
|
||||||
int cdls_count;
|
int cdls_count;
|
||||||
|
@ -147,7 +157,7 @@ struct page_collection {
|
||||||
int pc_want_daemon_pages;
|
int pc_want_daemon_pages;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define SBUG() spl_debug_bug(__FILE__, __FUNCTION__, __LINE__);
|
#define SBUG() spl_debug_bug(__FILE__, __FUNCTION__, __LINE__, 0);
|
||||||
|
|
||||||
#ifdef __ia64__
|
#ifdef __ia64__
|
||||||
#define CDEBUG_STACK() (THREAD_SIZE - \
|
#define CDEBUG_STACK() (THREAD_SIZE - \
|
||||||
|
@ -159,29 +169,24 @@ struct page_collection {
|
||||||
(THREAD_SIZE - 1)))
|
(THREAD_SIZE - 1)))
|
||||||
# endif /* __ia64__ */
|
# endif /* __ia64__ */
|
||||||
|
|
||||||
|
/* DL_NOTHREAD and DL_SINGLE_CPU flags are passed to spl_debug_bug()
|
||||||
|
* because we have over run our stack and likely damaged at least one
|
||||||
|
* other unknown threads stack. We must finish generating the needed
|
||||||
|
* debug info within this thread context because once we yeild the CPU
|
||||||
|
* its very likely the system will crash.
|
||||||
|
*/
|
||||||
#define __CHECK_STACK(file, func, line) \
|
#define __CHECK_STACK(file, func, line) \
|
||||||
do { \
|
do { \
|
||||||
unsigned long _stack = CDEBUG_STACK(); \
|
unsigned long _stack = CDEBUG_STACK(); \
|
||||||
unsigned long _soft_limit = (9 * THREAD_SIZE) / 10; \
|
unsigned long _soft_limit = (8 * THREAD_SIZE) / 10; \
|
||||||
\
|
\
|
||||||
if (unlikely(_stack > _soft_limit && _stack > spl_debug_stack)){\
|
if (unlikely(_stack > _soft_limit && _stack > spl_debug_stack)){\
|
||||||
spl_debug_stack = _stack; \
|
spl_debug_stack = _stack; \
|
||||||
if (_stack <= THREAD_SIZE) { \
|
|
||||||
spl_debug_msg(NULL, D_TRACE, D_WARNING, \
|
spl_debug_msg(NULL, D_TRACE, D_WARNING, \
|
||||||
file, func, line, "Warning " \
|
file, func, line, "Error exceeded " \
|
||||||
"exceeded 90%% of maximum safe " \
|
"maximum safe stack size (%lu/%lu)\n", \
|
||||||
"stack size (%lu/%lu)\n", \
|
|
||||||
_stack, THREAD_SIZE); \
|
_stack, THREAD_SIZE); \
|
||||||
spl_debug_dumpstack(NULL); \
|
spl_debug_bug(file, func, line, DL_SINGLE_CPU); \
|
||||||
spl_debug_dumplog(); \
|
|
||||||
} else { \
|
|
||||||
spl_debug_msg(NULL, D_TRACE, D_WARNING, \
|
|
||||||
file, func, line, "Error " \
|
|
||||||
"exceeded maximum safe stack " \
|
|
||||||
"size (%lu/%lu)\n", \
|
|
||||||
_stack, THREAD_SIZE); \
|
|
||||||
SBUG(); \
|
|
||||||
} \
|
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -213,7 +218,7 @@ do { \
|
||||||
spl_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \
|
spl_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \
|
||||||
__FILE__, __FUNCTION__, __LINE__, \
|
__FILE__, __FUNCTION__, __LINE__, \
|
||||||
"ASSERTION(" #cond ") failed\n"); \
|
"ASSERTION(" #cond ") failed\n"); \
|
||||||
spl_debug_bug(__FILE__, __FUNCTION__, __LINE__); \
|
SBUG(); \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -226,7 +231,7 @@ do { \
|
||||||
__FILE__, __FUNCTION__, __LINE__, \
|
__FILE__, __FUNCTION__, __LINE__, \
|
||||||
"ASSERTION(" #cond ") failed:" fmt, \
|
"ASSERTION(" #cond ") failed:" fmt, \
|
||||||
## a); \
|
## a); \
|
||||||
spl_debug_bug(__FILE__, __FUNCTION__, __LINE__) \
|
SBUG(); \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -242,7 +247,7 @@ do { \
|
||||||
__FILE__, __FUNCTION__, __LINE__, \
|
__FILE__, __FUNCTION__, __LINE__, \
|
||||||
"VERIFY3(" FMT " " #OP " " FMT ")\n", \
|
"VERIFY3(" FMT " " #OP " " FMT ")\n", \
|
||||||
CAST __left, CAST __right); \
|
CAST __left, CAST __right); \
|
||||||
spl_debug_bug(__FILE__, __FUNCTION__, __LINE__); \
|
SBUG(); \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -285,7 +290,6 @@ do { \
|
||||||
#define CDEBUG_LIMIT(mask, format, a...) \
|
#define CDEBUG_LIMIT(mask, format, a...) \
|
||||||
__CDEBUG_LIMIT(DEBUG_SUBSYSTEM, mask, format, ## a)
|
__CDEBUG_LIMIT(DEBUG_SUBSYSTEM, mask, format, ## a)
|
||||||
|
|
||||||
#define dprintf(fmt, a...) CDEBUG_LIMIT(D_INFO, fmt, ## a)
|
|
||||||
#define CWARN(fmt, a...) CDEBUG_LIMIT(D_WARNING, fmt, ## a)
|
#define CWARN(fmt, a...) CDEBUG_LIMIT(D_WARNING, fmt, ## a)
|
||||||
#define CERROR(fmt, a...) CDEBUG_LIMIT(D_ERROR, fmt, ## a)
|
#define CERROR(fmt, a...) CDEBUG_LIMIT(D_ERROR, fmt, ## a)
|
||||||
#define CEMERG(fmt, a...) CDEBUG_LIMIT(D_EMERG, fmt, ## a)
|
#define CEMERG(fmt, a...) CDEBUG_LIMIT(D_EMERG, fmt, ## a)
|
||||||
|
@ -329,9 +333,9 @@ extern unsigned long spl_debug_get_subsys(void);
|
||||||
extern int spl_debug_set_mb(int mb);
|
extern int spl_debug_set_mb(int mb);
|
||||||
extern int spl_debug_get_mb(void);
|
extern int spl_debug_get_mb(void);
|
||||||
|
|
||||||
extern int spl_debug_dumplog(void);
|
extern int spl_debug_dumplog(int flags);
|
||||||
extern void spl_debug_dumpstack(struct task_struct *tsk);
|
extern void spl_debug_dumpstack(struct task_struct *tsk);
|
||||||
extern void spl_debug_bug(char *file, const char *func, const int line);
|
extern void spl_debug_bug(char *file, const char *func, const int line, int flags);
|
||||||
|
|
||||||
extern int spl_debug_clear_buffer(void);
|
extern int spl_debug_clear_buffer(void);
|
||||||
extern int spl_debug_mark_buffer(char *text);
|
extern int spl_debug_mark_buffer(char *text);
|
||||||
|
|
|
@ -102,7 +102,7 @@ struct rw_semaphore trace_sem;
|
||||||
atomic_t trace_tage_allocated = ATOMIC_INIT(0);
|
atomic_t trace_tage_allocated = ATOMIC_INIT(0);
|
||||||
|
|
||||||
static int panic_notifier(struct notifier_block *, unsigned long, void *);
|
static int panic_notifier(struct notifier_block *, unsigned long, void *);
|
||||||
static int spl_debug_dump_all_pages(char *);
|
static int spl_debug_dump_all_pages(dumplog_priv_t *dp, char *);
|
||||||
static void trace_fini(void);
|
static void trace_fini(void);
|
||||||
|
|
||||||
|
|
||||||
|
@ -344,12 +344,6 @@ spl_debug_str2mask(unsigned long *mask, const char *str, int is_subsys)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct dumplog_priv {
|
|
||||||
wait_queue_head_t dp_waitq;
|
|
||||||
pid_t dp_pid;
|
|
||||||
atomic_t dp_flag;
|
|
||||||
} dumplog_priv_t;
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
spl_debug_dumplog_internal(dumplog_priv_t *dp)
|
spl_debug_dumplog_internal(dumplog_priv_t *dp)
|
||||||
{
|
{
|
||||||
|
@ -362,7 +356,7 @@ spl_debug_dumplog_internal(dumplog_priv_t *dp)
|
||||||
"%s.%ld.%ld", spl_debug_file_path,
|
"%s.%ld.%ld", spl_debug_file_path,
|
||||||
get_seconds(), (long)dp->dp_pid);
|
get_seconds(), (long)dp->dp_pid);
|
||||||
printk(KERN_ALERT "SPL: dumping log to %s\n", spl_debug_file_name);
|
printk(KERN_ALERT "SPL: dumping log to %s\n", spl_debug_file_name);
|
||||||
spl_debug_dump_all_pages(spl_debug_file_name);
|
spl_debug_dump_all_pages(dp, spl_debug_file_name);
|
||||||
|
|
||||||
current->journal_info = journal_info;
|
current->journal_info = journal_info;
|
||||||
}
|
}
|
||||||
|
@ -373,29 +367,36 @@ spl_debug_dumplog_thread(void *arg)
|
||||||
dumplog_priv_t *dp = (dumplog_priv_t *)arg;
|
dumplog_priv_t *dp = (dumplog_priv_t *)arg;
|
||||||
|
|
||||||
spl_debug_dumplog_internal(dp);
|
spl_debug_dumplog_internal(dp);
|
||||||
atomic_set(&dp->dp_flag, 1);
|
atomic_set(&dp->dp_done, 1);
|
||||||
wake_up(&dp->dp_waitq);
|
wake_up(&dp->dp_waitq);
|
||||||
do_exit(0);
|
do_exit(0);
|
||||||
|
|
||||||
return 0; /* Unreachable */
|
return 0; /* Unreachable */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* When flag is set do not use a new thread for the debug dump */
|
||||||
int
|
int
|
||||||
spl_debug_dumplog(void)
|
spl_debug_dumplog(int flags)
|
||||||
{
|
{
|
||||||
struct task_struct *tsk;
|
struct task_struct *tsk;
|
||||||
dumplog_priv_t dp;
|
dumplog_priv_t dp;
|
||||||
|
|
||||||
init_waitqueue_head(&dp.dp_waitq);
|
init_waitqueue_head(&dp.dp_waitq);
|
||||||
dp.dp_pid = current->pid;
|
dp.dp_pid = current->pid;
|
||||||
atomic_set(&dp.dp_flag, 0);
|
dp.dp_flags = flags;
|
||||||
|
atomic_set(&dp.dp_done, 0);
|
||||||
|
|
||||||
|
if (dp.dp_flags & DL_NOTHREAD) {
|
||||||
|
spl_debug_dumplog_internal(&dp);
|
||||||
|
} else {
|
||||||
|
|
||||||
tsk = kthread_create(spl_debug_dumplog_thread,(void *)&dp,"spl_debug");
|
tsk = kthread_create(spl_debug_dumplog_thread,(void *)&dp,"spl_debug");
|
||||||
if (tsk == NULL)
|
if (tsk == NULL)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
wake_up_process(tsk);
|
wake_up_process(tsk);
|
||||||
wait_event(dp.dp_waitq, atomic_read(&dp.dp_flag));
|
wait_event(dp.dp_waitq, atomic_read(&dp.dp_done));
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -849,7 +850,7 @@ EXPORT_SYMBOL(spl_debug_vmsg);
|
||||||
* some arch, this will have to be implemented separately in each arch.
|
* some arch, this will have to be implemented separately in each arch.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
panic_collect_pages(struct page_collection *pc)
|
collect_pages_from_single_cpu(struct page_collection *pc)
|
||||||
{
|
{
|
||||||
struct trace_cpu_data *tcd;
|
struct trace_cpu_data *tcd;
|
||||||
int i, j;
|
int i, j;
|
||||||
|
@ -876,12 +877,12 @@ collect_pages_on_cpu(void *info)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
collect_pages(struct page_collection *pc)
|
collect_pages(dumplog_priv_t *dp, struct page_collection *pc)
|
||||||
{
|
{
|
||||||
INIT_LIST_HEAD(&pc->pc_pages);
|
INIT_LIST_HEAD(&pc->pc_pages);
|
||||||
|
|
||||||
if (spl_panic_in_progress)
|
if (spl_panic_in_progress || dp->dp_flags & DL_SINGLE_CPU)
|
||||||
panic_collect_pages(pc);
|
collect_pages_from_single_cpu(pc);
|
||||||
else
|
else
|
||||||
trace_call_on_all_cpus(collect_pages_on_cpu, pc);
|
trace_call_on_all_cpus(collect_pages_on_cpu, pc);
|
||||||
}
|
}
|
||||||
|
@ -944,7 +945,7 @@ trace_filp_open (const char *name, int flags, int mode, int *err)
|
||||||
#define trace_filp_poff(f) (&(f)->f_pos)
|
#define trace_filp_poff(f) (&(f)->f_pos)
|
||||||
|
|
||||||
static int
|
static int
|
||||||
spl_debug_dump_all_pages(char *filename)
|
spl_debug_dump_all_pages(dumplog_priv_t *dp, char *filename)
|
||||||
{
|
{
|
||||||
struct page_collection pc;
|
struct page_collection pc;
|
||||||
struct file *filp;
|
struct file *filp;
|
||||||
|
@ -965,7 +966,7 @@ spl_debug_dump_all_pages(char *filename)
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_init(&pc.pc_lock);
|
spin_lock_init(&pc.pc_lock);
|
||||||
collect_pages(&pc);
|
collect_pages(dp, &pc);
|
||||||
if (list_empty(&pc.pc_pages)) {
|
if (list_empty(&pc.pc_pages)) {
|
||||||
rc = 0;
|
rc = 0;
|
||||||
goto close;
|
goto close;
|
||||||
|
@ -1006,13 +1007,18 @@ spl_debug_dump_all_pages(char *filename)
|
||||||
static void
|
static void
|
||||||
spl_debug_flush_pages(void)
|
spl_debug_flush_pages(void)
|
||||||
{
|
{
|
||||||
|
dumplog_priv_t dp;
|
||||||
struct page_collection pc;
|
struct page_collection pc;
|
||||||
struct trace_page *tage;
|
struct trace_page *tage;
|
||||||
struct trace_page *tmp;
|
struct trace_page *tmp;
|
||||||
|
|
||||||
spin_lock_init(&pc.pc_lock);
|
spin_lock_init(&pc.pc_lock);
|
||||||
|
init_waitqueue_head(&dp.dp_waitq);
|
||||||
|
dp.dp_pid = current->pid;
|
||||||
|
dp.dp_flags = 0;
|
||||||
|
atomic_set(&dp.dp_done, 0);
|
||||||
|
|
||||||
collect_pages(&pc);
|
collect_pages(&dp, &pc);
|
||||||
list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
|
list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
|
||||||
__ASSERT_TAGE_INVARIANT(tage);
|
__ASSERT_TAGE_INVARIANT(tage);
|
||||||
list_del(&tage->linkage);
|
list_del(&tage->linkage);
|
||||||
|
@ -1109,7 +1115,7 @@ void spl_debug_dumpstack(struct task_struct *tsk)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(spl_debug_dumpstack);
|
EXPORT_SYMBOL(spl_debug_dumpstack);
|
||||||
|
|
||||||
void spl_debug_bug(char *file, const char *func, const int line)
|
void spl_debug_bug(char *file, const char *func, const int line, int flags)
|
||||||
{
|
{
|
||||||
spl_debug_catastrophe = 1;
|
spl_debug_catastrophe = 1;
|
||||||
spl_debug_msg(NULL, 0, D_EMERG, file, func, line, "SBUG\n");
|
spl_debug_msg(NULL, 0, D_EMERG, file, func, line, "SBUG\n");
|
||||||
|
@ -1124,7 +1130,7 @@ void spl_debug_bug(char *file, const char *func, const int line)
|
||||||
spl_panic_in_progress = 1;
|
spl_panic_in_progress = 1;
|
||||||
|
|
||||||
spl_debug_dumpstack(NULL);
|
spl_debug_dumpstack(NULL);
|
||||||
spl_debug_dumplog();
|
spl_debug_dumplog(flags);
|
||||||
|
|
||||||
if (spl_debug_panic_on_bug)
|
if (spl_debug_panic_on_bug)
|
||||||
panic("SBUG");
|
panic("SBUG");
|
||||||
|
@ -1168,7 +1174,7 @@ panic_notifier(struct notifier_block *self,
|
||||||
while (current->lock_depth >= 0)
|
while (current->lock_depth >= 0)
|
||||||
unlock_kernel();
|
unlock_kernel();
|
||||||
|
|
||||||
spl_debug_dumplog_internal((void *)(long)current->pid);
|
spl_debug_dumplog(DL_NOTHREAD | DL_SINGLE_CPU);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -179,7 +179,7 @@ proc_dump_kernel(struct ctl_table *table, int write, struct file *filp,
|
||||||
ENTRY;
|
ENTRY;
|
||||||
|
|
||||||
if (write) {
|
if (write) {
|
||||||
spl_debug_dumplog();
|
spl_debug_dumplog(0);
|
||||||
*ppos += *lenp;
|
*ppos += *lenp;
|
||||||
} else {
|
} else {
|
||||||
*lenp = 0;
|
*lenp = 0;
|
||||||
|
|
Loading…
Reference in New Issue