/* * hwlat_detector.c - A simple Hardware Latency detector. * * Use this module to detect large system latencies induced by the behavior of * certain underlying system hardware or firmware, independent of Linux itself. * The code was developed originally to detect the presence of SMIs on Intel * and AMD systems, although there is no dependency upon x86 herein. * * The classical example usage of this module is in detecting the presence of * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a * somewhat special form of hardware interrupt spawned from earlier CPU debug * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge * LPC (or other device) to generate a special interrupt under certain * circumstances, for example, upon expiration of a special SMI timer device, * due to certain external thermal readings, on certain I/O address accesses, * and other situations. An SMI hits a special CPU pin, triggers a special * SMI mode (complete with special memory map), and the OS is unaware. * * Although certain hardware-inducing latencies are necessary (for example, * a modern system often requires an SMI handler for correct thermal control * and remote management) they can wreak havoc upon any OS-level performance * guarantees toward low-latency, especially when the OS is not even made * aware of the presence of these interrupts. For this reason, we need a * somewhat brute force mechanism to detect these interrupts. In this case, * we do it by hogging all of the CPU(s) for configurable timer intervals, * sampling the built-in CPU timer, looking for discontiguous readings. * * WARNING: This implementation necessarily introduces latencies. Therefore, * you should NEVER use this module in a production environment * requiring any kind of low-latency performance guarantee(s). * * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> * * Includes useful feedback from Clark Williams <clark@redhat.com> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any * warranty of any kind, whether express or implied. */ #include <linux/module.h> #include <linux/init.h> #include <linux/ring_buffer.h> #include <linux/stop_machine.h> #include <linux/time.h> #include <linux/hrtimer.h> #include <linux/kthread.h> #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/uaccess.h> #include <linux/version.h> #include <linux/delay.h> #define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */ #define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */ #define U64STR_SIZE 22 /* 20 digits max */ #define VERSION "1.0.0" #define BANNER "hwlat_detector: " #define DRVNAME "hwlat_detector" #define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ #define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ #define DEFAULT_LAT_THRESHOLD 10 /* 10us */ /* Module metadata */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jon Masters <jcm@redhat.com>"); MODULE_DESCRIPTION("A simple hardware latency detector"); MODULE_VERSION(VERSION); /* Module parameters */ static int debug; static int enabled; static int threshold; module_param(debug, int, 0); /* enable debug */ module_param(enabled, int, 0); /* enable detector */ module_param(threshold, int, 0); /* latency threshold */ /* Buffering and sampling */ static struct ring_buffer *ring_buffer; /* sample buffer */ static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */ static unsigned long buf_size = BUF_SIZE_DEFAULT; static struct task_struct *kthread; /* sampling thread */ /* DebugFS filesystem entries */ static struct dentry *debug_dir; /* debugfs directory */ static struct dentry *debug_max; /* maximum TSC delta */ static struct dentry *debug_count; /* total detect count */ static struct dentry *debug_sample_width; /* sample width us */ static struct dentry *debug_sample_window; /* sample window us */ static struct dentry *debug_sample; /* raw samples us */ static struct dentry *debug_threshold; /* threshold us */ static struct dentry *debug_enable; /* enable/disable */ /* Individual samples and global state */ struct sample; /* latency sample */ struct data; /* Global state */ /* Sampling functions */ static int __buffer_add_sample(struct sample *sample); static struct sample *buffer_get_sample(struct sample *sample); static int get_sample(void *unused); /* Threading and state */ static int kthread_fn(void *unused); static int start_kthread(void); static int stop_kthread(void); static void __reset_stats(void); static int init_stats(void); /* Debugfs interface */ static ssize_t simple_data_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, const u64 *entry); static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u64 *entry); static int debug_sample_fopen(struct inode *inode, struct file *filp); static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); static int debug_sample_release(struct inode *inode, struct file *filp); static int debug_enable_fopen(struct inode *inode, struct file *filp); static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); static ssize_t debug_enable_fwrite(struct file *file, const char __user *user_buffer, size_t user_size, loff_t *offset); /* Initialization functions */ static int init_debugfs(void); static void free_debugfs(void); static int detector_init(void); static void detector_exit(void); /* Individual latency samples are stored here when detected and packed into * the ring_buffer circular buffer, where they are overwritten when * more than buf_size/sizeof(sample) samples are received. */ struct sample { u64 seqnum; /* unique sequence */ u64 duration; /* ktime delta */ struct timespec timestamp; /* wall time */ }; /* keep the global state somewhere. Mostly used under stop_machine. */ static struct data { struct mutex lock; /* protect changes */ u64 count; /* total since reset */ u64 max_sample; /* max hardware latency */ u64 threshold; /* sample threshold level */ u64 sample_window; /* total sampling window (on+off) */ u64 sample_width; /* active sampling portion of window */ atomic_t sample_open; /* whether the sample file is open */ wait_queue_head_t wq; /* waitqeue for new sample values */ } data; /** * __buffer_add_sample - add a new latency sample recording to the ring buffer * @sample: The new latency sample value * * This receives a new latency sample and records it in a global ring buffer. * No additional locking is used in this case - suited for stop_machine use. */ static int __buffer_add_sample(struct sample *sample) { return ring_buffer_write(ring_buffer, sizeof(struct sample), sample); } /** * buffer_get_sample - remove a hardware latency sample from the ring buffer * @sample: Pre-allocated storage for the sample * * This retrieves a hardware latency sample from the global circular buffer */ static struct sample *buffer_get_sample(struct sample *sample) { struct ring_buffer_event *e = NULL; struct sample *s = NULL; unsigned int cpu = 0; if (!sample) return NULL; /* ring_buffers are per-cpu but we just want any value */ /* so we'll start with this cpu and try others if not */ /* Steven is planning to add a generic mechanism */ mutex_lock(&ring_buffer_mutex); e = ring_buffer_consume(ring_buffer, smp_processor_id(), NULL); if (!e) { for_each_online_cpu(cpu) { e = ring_buffer_consume(ring_buffer, cpu, NULL); if (e) break; } } if (e) { s = ring_buffer_event_data(e); memcpy(sample, s, sizeof(struct sample)); } else sample = NULL; mutex_unlock(&ring_buffer_mutex); return sample; } /** * get_sample - sample the CPU TSC and look for likely hardware latencies * @unused: This is not used but is a part of the stop_machine API * * Used to repeatedly capture the CPU TSC (or similar), looking for potential * hardware-induced latency. Called under stop_machine, with data.lock held. */ static int get_sample(void *unused) { ktime_t start, t1, t2; s64 diff, total = 0; u64 sample = 0; int ret = 1; start = ktime_get(); /* start timestamp */ do { t1 = ktime_get(); /* we'll look for a discontinuity */ t2 = ktime_get(); total = ktime_to_us(ktime_sub(t2, start)); /* sample width */ diff = ktime_to_us(ktime_sub(t2, t1)); /* current diff */ /* This shouldn't happen */ if (diff < 0) { printk(KERN_ERR BANNER "time running backwards\n"); goto out; } if (diff > sample) sample = diff; /* only want highest value */ } while (total <= data.sample_width); /* If we exceed the threshold value, we have found a hardware latency */ if (sample > data.threshold) { struct sample s; data.count++; s.seqnum = data.count; s.duration = sample; s.timestamp = CURRENT_TIME; __buffer_add_sample(&s); /* Keep a running maximum ever recorded hardware latency */ if (sample > data.max_sample) data.max_sample = sample; } ret = 0; out: return ret; } /* * kthread_fn - The CPU time sampling/hardware latency detection kernel thread * @unused: A required part of the kthread API. * * Used to periodically sample the CPU TSC via a call to get_sample. We * use stop_machine, whith does (intentionally) introduce latency since we * need to ensure nothing else might be running (and thus pre-empting). * Obviously this should never be used in production environments. * * stop_machine will schedule us typically only on CPU0 which is fine for * almost every real-world hardware latency situation - but we might later * generalize this if we find there are any actualy systems with alternate * SMI delivery or other non CPU0 hardware latencies. */ static int kthread_fn(void *unused) { int err = 0; u64 interval = 0; while (!kthread_should_stop()) { mutex_lock(&data.lock); err = stop_machine(get_sample, unused, 0); if (err) { /* Houston, we have a problem */ mutex_unlock(&data.lock); goto err_out; } wake_up(&data.wq); /* wake up reader(s) */ interval = data.sample_window - data.sample_width; do_div(interval, USEC_PER_MSEC); /* modifies interval value */ mutex_unlock(&data.lock); if (msleep_interruptible(interval)) goto out; } goto out; err_out: printk(KERN_ERR BANNER "could not call stop_machine, disabling\n"); enabled = 0; out: return err; } /** * start_kthread - Kick off the hardware latency sampling/detector kthread * * This starts a kernel thread that will sit and sample the CPU timestamp * counter (TSC or similar) and look for potential hardware latencies. */ static int start_kthread(void) { kthread = kthread_run(kthread_fn, NULL, DRVNAME); if (IS_ERR(kthread)) { printk(KERN_ERR BANNER "could not start sampling thread\n"); enabled = 0; return -ENOMEM; } return 0; } /** * stop_kthread - Inform the hardware latency samping/detector kthread to stop * * This kicks the running hardware latency sampling/detector kernel thread and * tells it to stop sampling now. Use this on unload and at system shutdown. */ static int stop_kthread(void) { int ret; ret = kthread_stop(kthread); return ret; } /** * __reset_stats - Reset statistics for the hardware latency detector * * We use data to store various statistics and global state. We call this * function in order to reset those when "enable" is toggled on or off, and * also at initialization. Should be called with data.lock held. */ static void __reset_stats(void) { data.count = 0; data.max_sample = 0; ring_buffer_reset(ring_buffer); /* flush out old sample entries */ } /** * init_stats - Setup global state statistics for the hardware latency detector * * We use data to store various statistics and global state. We also use * a global ring buffer (ring_buffer) to keep raw samples of detected hardware * induced system latencies. This function initializes these structures and * allocates the global ring buffer also. */ static int init_stats(void) { int ret = -ENOMEM; mutex_init(&data.lock); init_waitqueue_head(&data.wq); atomic_set(&data.sample_open, 0); ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS); if (WARN(!ring_buffer, KERN_ERR BANNER "failed to allocate ring buffer!\n")) goto out; __reset_stats(); data.threshold = DEFAULT_LAT_THRESHOLD; /* threshold us */ data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */ data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */ ret = 0; out: return ret; } /* * simple_data_read - Wrapper read function for global state debugfs entries * @filp: The active open file structure for the debugfs "file" * @ubuf: The userspace provided buffer to read value into * @cnt: The maximum number of bytes to read * @ppos: The current "file" position * @entry: The entry to read from * * This function provides a generic read implementation for the global state * "data" structure debugfs filesystem entries. It would be nice to use * simple_attr_read directly, but we need to make sure that the data.lock * spinlock is held during the actual read (even though we likely won't ever * actually race here as the updater runs under a stop_machine context). */ static ssize_t simple_data_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, const u64 *entry) { char buf[U64STR_SIZE]; u64 val = 0; int len = 0; memset(buf, 0, sizeof(buf)); if (!entry) return -EFAULT; mutex_lock(&data.lock); val = *entry; mutex_unlock(&data.lock); len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val); return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); } /* * simple_data_write - Wrapper write function for global state debugfs entries * @filp: The active open file structure for the debugfs "file" * @ubuf: The userspace provided buffer to write value from * @cnt: The maximum number of bytes to write * @ppos: The current "file" position * @entry: The entry to write to * * This function provides a generic write implementation for the global state * "data" structure debugfs filesystem entries. It would be nice to use * simple_attr_write directly, but we need to make sure that the data.lock * spinlock is held during the actual write (even though we likely won't ever * actually race here as the updater runs under a stop_machine context). */ static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u64 *entry) { char buf[U64STR_SIZE]; int csize = min(cnt, sizeof(buf)); u64 val = 0; int err = 0; memset(buf, '\0', sizeof(buf)); if (copy_from_user(buf, ubuf, csize)) return -EFAULT; buf[U64STR_SIZE-1] = '\0'; /* just in case */ err = strict_strtoull(buf, 10, &val); if (err) return -EINVAL; mutex_lock(&data.lock); *entry = val; mutex_unlock(&data.lock); return csize; } /** * debug_count_fopen - Open function for "count" debugfs entry * @inode: The in-kernel inode representation of the debugfs "file" * @filp: The active open file structure for the debugfs "file" * * This function provides an open implementation for the "count" debugfs * interface to the hardware latency detector. */ static int debug_count_fopen(struct inode *inode, struct file *filp) { return 0; } /** * debug_count_fread - Read function for "count" debugfs entry * @filp: The active open file structure for the debugfs "file" * @ubuf: The userspace provided buffer to read value into * @cnt: The maximum number of bytes to read * @ppos: The current "file" position * * This function provides a read implementation for the "count" debugfs * interface to the hardware latency detector. Can be used to read the * number of latency readings exceeding the configured threshold since * the detector was last reset (e.g. by writing a zero into "count"). */ static ssize_t debug_count_fread(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { return simple_data_read(filp, ubuf, cnt, ppos, &data.count); } /** * debug_count_fwrite - Write function for "count" debugfs entry * @filp: The active open file structure for the debugfs "file" * @ubuf: The user buffer that contains the value to write * @cnt: The maximum number of bytes to write to "file" * @ppos: The current position in the debugfs "file" * * This function provides a write implementation for the "count" debugfs * interface to the hardware latency detector. Can be used to write a * desired value, especially to zero the total count. */ static ssize_t debug_count_fwrite(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { return simple_data_write(filp, ubuf, cnt, ppos, &data.count); } /** * debug_enable_fopen - Dummy open function for "enable" debugfs interface * @inode: The in-kernel inode representation of the debugfs "file" * @filp: The active open file structure for the debugfs "file" * * This function provides an open implementation for the "enable" debugfs * interface to the hardware latency detector. */ static int debug_enable_fopen(struct inode *inode, struct file *filp) { return 0; } /** * debug_enable_fread - Read function for "enable" debugfs interface * @filp: The active open file structure for the debugfs "file" * @ubuf: The userspace provided buffer to read value into * @cnt: The maximum number of bytes to read * @ppos: The current "file" position * * This function provides a read implementation for the "enable" debugfs * interface to the hardware latency detector. Can be used to determine * whether the detector is currently enabled ("0\n" or "1\n" returned). */ static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[4]; if ((cnt < sizeof(buf)) || (*ppos)) return 0; buf[0] = enabled ? '1' : '0'; buf[1] = '\n'; buf[2] = '\0'; if (copy_to_user(ubuf, buf, strlen(buf))) return -EFAULT; return *ppos = strlen(buf); } /** * debug_enable_fwrite - Write function for "enable" debugfs interface * @filp: The active open file structure for the debugfs "file" * @ubuf: The user buffer that contains the value to write * @cnt: The maximum number of bytes to write to "file" * @ppos: The current position in the debugfs "file" * * This function provides a write implementation for the "enable" debugfs * interface to the hardware latency detector. Can be used to enable or * disable the detector, which will have the side-effect of possibly * also resetting the global stats and kicking off the measuring * kthread (on an enable) or the converse (upon a disable). */ static ssize_t debug_enable_fwrite(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[4]; int csize = min(cnt, sizeof(buf)); long val = 0; int err = 0; memset(buf, '\0', sizeof(buf)); if (copy_from_user(buf, ubuf, csize)) return -EFAULT; buf[sizeof(buf)-1] = '\0'; /* just in case */ err = strict_strtoul(buf, 10, &val); if (0 != err) return -EINVAL; if (val) { if (enabled) goto unlock; enabled = 1; __reset_stats(); if (start_kthread()) return -EFAULT; } else { if (!enabled) goto unlock; enabled = 0; stop_kthread(); wake_up(&data.wq); /* reader(s) should return */ } unlock: return csize; } /** * debug_max_fopen - Open function for "max" debugfs entry * @inode: The in-kernel inode representation of the debugfs "file" * @filp: The active open file structure for the debugfs "file" * * This function provides an open implementation for the "max" debugfs * interface to the hardware latency detector. */ static int debug_max_fopen(struct inode *inode, struct file *filp) { return 0; } /** * debug_max_fread - Read function for "max" debugfs entry * @filp: The active open file structure for the debugfs "file" * @ubuf: The userspace provided buffer to read value into * @cnt: The maximum number of bytes to read * @ppos: The current "file" position * * This function provides a read implementation for the "max" debugfs * interface to the hardware latency detector. Can be used to determine * the maximum latency value observed since it was last reset. */ static ssize_t debug_max_fread(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample); } /** * debug_max_fwrite - Write function for "max" debugfs entry * @filp: The active open file structure for the debugfs "file" * @ubuf: The user buffer that contains the value to write * @cnt: The maximum number of bytes to write to "file" * @ppos: The current position in the debugfs "file" * * This function provides a write implementation for the "max" debugfs * interface to the hardware latency detector. Can be used to reset the * maximum or set it to some other desired value - if, then, subsequent * measurements exceed this value, the maximum will be updated. */ static ssize_t debug_max_fwrite(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample); } /** * debug_sample_fopen - An open function for "sample" debugfs interface * @inode: The in-kernel inode representation of this debugfs "file" * @filp: The active open file structure for the debugfs "file" * * This function handles opening the "sample" file within the hardware * latency detector debugfs directory interface. This file is used to read * raw samples from the global ring_buffer and allows the user to see a * running latency history. Can be opened blocking or non-blocking, * affecting whether it behaves as a buffer read pipe, or does not. * Implements simple locking to prevent multiple simultaneous use. */ static int debug_sample_fopen(struct inode *inode, struct file *filp) { if (!atomic_add_unless(&data.sample_open, 1, 1)) return -EBUSY; else return 0; } /** * debug_sample_fread - A read function for "sample" debugfs interface * @filp: The active open file structure for the debugfs "file" * @ubuf: The user buffer that will contain the samples read * @cnt: The maximum bytes to read from the debugfs "file" * @ppos: The current position in the debugfs "file" * * This function handles reading from the "sample" file within the hardware * latency detector debugfs directory interface. This file is used to read * raw samples from the global ring_buffer and allows the user to see a * running latency history. By default this will block pending a new * value written into the sample buffer, unless there are already a * number of value(s) waiting in the buffer, or the sample file was * previously opened in a non-blocking mode of operation. */ static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { int len = 0; char buf[64]; struct sample *sample = NULL; if (!enabled) return 0; sample = kzalloc(sizeof(struct sample), GFP_KERNEL); if (!sample) return -ENOMEM; while (!buffer_get_sample(sample)) { DEFINE_WAIT(wait); if (filp->f_flags & O_NONBLOCK) { len = -EAGAIN; goto out; } prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE); schedule(); finish_wait(&data.wq, &wait); if (signal_pending(current)) { len = -EINTR; goto out; } if (!enabled) { /* enable was toggled */ len = 0; goto out; } } len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n", sample->timestamp.tv_sec, sample->timestamp.tv_nsec, sample->duration); /* handling partial reads is more trouble than it's worth */ if (len > cnt) goto out; if (copy_to_user(ubuf, buf, len)) len = -EFAULT; out: kfree(sample); return len; } /** * debug_sample_release - Release function for "sample" debugfs interface * @inode: The in-kernel inode represenation of the debugfs "file" * @filp: The active open file structure for the debugfs "file" * * This function completes the close of the debugfs interface "sample" file. * Frees the sample_open "lock" so that other users may open the interface. */ static int debug_sample_release(struct inode *inode, struct file *filp) { atomic_dec(&data.sample_open); return 0; } /** * debug_threshold_fopen - Open function for "threshold" debugfs entry * @inode: The in-kernel inode representation of the debugfs "file" * @filp: The active open file structure for the debugfs "file" * * This function provides an open implementation for the "threshold" debugfs * interface to the hardware latency detector. */ static int debug_threshold_fopen(struct inode *inode, struct file *filp) { return 0; } /** * debug_threshold_fread - Read function for "threshold" debugfs entry * @filp: The active open file structure for the debugfs "file" * @ubuf: The userspace provided buffer to read value into * @cnt: The maximum number of bytes to read * @ppos: The current "file" position * * This function provides a read implementation for the "threshold" debugfs * interface to the hardware latency detector. It can be used to determine * the current threshold level at which a latency will be recorded in the * global ring buffer, typically on the order of 10us. */ static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold); } /** * debug_threshold_fwrite - Write function for "threshold" debugfs entry * @filp: The active open file structure for the debugfs "file" * @ubuf: The user buffer that contains the value to write * @cnt: The maximum number of bytes to write to "file" * @ppos: The current position in the debugfs "file" * * This function provides a write implementation for the "threshold" debugfs * interface to the hardware latency detector. It can be used to configure * the threshold level at which any subsequently detected latencies will * be recorded into the global ring buffer. */ static ssize_t debug_threshold_fwrite(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { int ret; ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold); if (enabled) wake_up_process(kthread); return ret; } /** * debug_width_fopen - Open function for "width" debugfs entry * @inode: The in-kernel inode representation of the debugfs "file" * @filp: The active open file structure for the debugfs "file" * * This function provides an open implementation for the "width" debugfs * interface to the hardware latency detector. */ static int debug_width_fopen(struct inode *inode, struct file *filp) { return 0; } /** * debug_width_fread - Read function for "width" debugfs entry * @filp: The active open file structure for the debugfs "file" * @ubuf: The userspace provided buffer to read value into * @cnt: The maximum number of bytes to read * @ppos: The current "file" position * * This function provides a read implementation for the "width" debugfs * interface to the hardware latency detector. It can be used to determine * for how many us of the total window us we will actively sample for any * hardware-induced latecy periods. Obviously, it is not possible to * sample constantly and have the system respond to a sample reader, or, * worse, without having the system appear to have gone out to lunch. */ static ssize_t debug_width_fread(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width); } /** * debug_width_fwrite - Write function for "width" debugfs entry * @filp: The active open file structure for the debugfs "file" * @ubuf: The user buffer that contains the value to write * @cnt: The maximum number of bytes to write to "file" * @ppos: The current position in the debugfs "file" * * This function provides a write implementation for the "width" debugfs * interface to the hardware latency detector. It can be used to configure * for how many us of the total window us we will actively sample for any * hardware-induced latency periods. Obviously, it is not possible to * sample constantly and have the system respond to a sample reader, or, * worse, without having the system appear to have gone out to lunch. It * is enforced that width is less that the total window size. */ static ssize_t debug_width_fwrite(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[U64STR_SIZE]; int csize = min(cnt, sizeof(buf)); u64 val = 0; int err = 0; memset(buf, '\0', sizeof(buf)); if (copy_from_user(buf, ubuf, csize)) return -EFAULT; buf[U64STR_SIZE-1] = '\0'; /* just in case */ err = strict_strtoull(buf, 10, &val); if (0 != err) return -EINVAL; mutex_lock(&data.lock); if (val < data.sample_window) data.sample_width = val; else { mutex_unlock(&data.lock); return -EINVAL; } mutex_unlock(&data.lock); if (enabled) wake_up_process(kthread); return csize; } /** * debug_window_fopen - Open function for "window" debugfs entry * @inode: The in-kernel inode representation of the debugfs "file" * @filp: The active open file structure for the debugfs "file" * * This function provides an open implementation for the "window" debugfs * interface to the hardware latency detector. The window is the total time * in us that will be considered one sample period. Conceptually, windows * occur back-to-back and contain a sample width period during which * actual sampling occurs. */ static int debug_window_fopen(struct inode *inode, struct file *filp) { return 0; } /** * debug_window_fread - Read function for "window" debugfs entry * @filp: The active open file structure for the debugfs "file" * @ubuf: The userspace provided buffer to read value into * @cnt: The maximum number of bytes to read * @ppos: The current "file" position * * This function provides a read implementation for the "window" debugfs * interface to the hardware latency detector. The window is the total time * in us that will be considered one sample period. Conceptually, windows * occur back-to-back and contain a sample width period during which * actual sampling occurs. Can be used to read the total window size. */ static ssize_t debug_window_fread(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window); } /** * debug_window_fwrite - Write function for "window" debugfs entry * @filp: The active open file structure for the debugfs "file" * @ubuf: The user buffer that contains the value to write * @cnt: The maximum number of bytes to write to "file" * @ppos: The current position in the debugfs "file" * * This function provides a write implementation for the "window" debufds * interface to the hardware latency detetector. The window is the total time * in us that will be considered one sample period. Conceptually, windows * occur back-to-back and contain a sample width period during which * actual sampling occurs. Can be used to write a new total window size. It * is enfoced that any value written must be greater than the sample width * size, or an error results. */ static ssize_t debug_window_fwrite(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[U64STR_SIZE]; int csize = min(cnt, sizeof(buf)); u64 val = 0; int err = 0; memset(buf, '\0', sizeof(buf)); if (copy_from_user(buf, ubuf, csize)) return -EFAULT; buf[U64STR_SIZE-1] = '\0'; /* just in case */ err = strict_strtoull(buf, 10, &val); if (0 != err) return -EINVAL; mutex_lock(&data.lock); if (data.sample_width < val) data.sample_window = val; else { mutex_unlock(&data.lock); return -EINVAL; } mutex_unlock(&data.lock); return csize; } /* * Function pointers for the "count" debugfs file operations */ static const struct file_operations count_fops = { .open = debug_count_fopen, .read = debug_count_fread, .write = debug_count_fwrite, .owner = THIS_MODULE, }; /* * Function pointers for the "enable" debugfs file operations */ static const struct file_operations enable_fops = { .open = debug_enable_fopen, .read = debug_enable_fread, .write = debug_enable_fwrite, .owner = THIS_MODULE, }; /* * Function pointers for the "max" debugfs file operations */ static const struct file_operations max_fops = { .open = debug_max_fopen, .read = debug_max_fread, .write = debug_max_fwrite, .owner = THIS_MODULE, }; /* * Function pointers for the "sample" debugfs file operations */ static const struct file_operations sample_fops = { .open = debug_sample_fopen, .read = debug_sample_fread, .release = debug_sample_release, .owner = THIS_MODULE, }; /* * Function pointers for the "threshold" debugfs file operations */ static const struct file_operations threshold_fops = { .open = debug_threshold_fopen, .read = debug_threshold_fread, .write = debug_threshold_fwrite, .owner = THIS_MODULE, }; /* * Function pointers for the "width" debugfs file operations */ static const struct file_operations width_fops = { .open = debug_width_fopen, .read = debug_width_fread, .write = debug_width_fwrite, .owner = THIS_MODULE, }; /* * Function pointers for the "window" debugfs file operations */ static const struct file_operations window_fops = { .open = debug_window_fopen, .read = debug_window_fread, .write = debug_window_fwrite, .owner = THIS_MODULE, }; /** * init_debugfs - A function to initialize the debugfs interface files * * This function creates entries in debugfs for "hwlat_detector", including * files to read values from the detector, current samples, and the * maximum sample that has been captured since the hardware latency * dectector was started. */ static int init_debugfs(void) { int ret = -ENOMEM; debug_dir = debugfs_create_dir(DRVNAME, NULL); if (!debug_dir) goto err_debug_dir; debug_sample = debugfs_create_file("sample", 0444, debug_dir, NULL, &sample_fops); if (!debug_sample) goto err_sample; debug_count = debugfs_create_file("count", 0444, debug_dir, NULL, &count_fops); if (!debug_count) goto err_count; debug_max = debugfs_create_file("max", 0444, debug_dir, NULL, &max_fops); if (!debug_max) goto err_max; debug_sample_window = debugfs_create_file("window", 0644, debug_dir, NULL, &window_fops); if (!debug_sample_window) goto err_window; debug_sample_width = debugfs_create_file("width", 0644, debug_dir, NULL, &width_fops); if (!debug_sample_width) goto err_width; debug_threshold = debugfs_create_file("threshold", 0644, debug_dir, NULL, &threshold_fops); if (!debug_threshold) goto err_threshold; debug_enable = debugfs_create_file("enable", 0644, debug_dir, &enabled, &enable_fops); if (!debug_enable) goto err_enable; else { ret = 0; goto out; } err_enable: debugfs_remove(debug_threshold); err_threshold: debugfs_remove(debug_sample_width); err_width: debugfs_remove(debug_sample_window); err_window: debugfs_remove(debug_max); err_max: debugfs_remove(debug_count); err_count: debugfs_remove(debug_sample); err_sample: debugfs_remove(debug_dir); err_debug_dir: out: return ret; } /** * free_debugfs - A function to cleanup the debugfs file interface */ static void free_debugfs(void) { /* could also use a debugfs_remove_recursive */ debugfs_remove(debug_enable); debugfs_remove(debug_threshold); debugfs_remove(debug_sample_width); debugfs_remove(debug_sample_window); debugfs_remove(debug_max); debugfs_remove(debug_count); debugfs_remove(debug_sample); debugfs_remove(debug_dir); } /** * detector_init - Standard module initialization code */ static int detector_init(void) { int ret = -ENOMEM; printk(KERN_INFO BANNER "version %s\n", VERSION); ret = init_stats(); if (0 != ret) goto out; ret = init_debugfs(); if (0 != ret) goto err_stats; if (enabled) ret = start_kthread(); goto out; err_stats: ring_buffer_free(ring_buffer); out: return ret; } /** * detector_exit - Standard module cleanup code */ static void detector_exit(void) { if (enabled) { enabled = 0; stop_kthread(); } free_debugfs(); ring_buffer_free(ring_buffer); /* free up the ring buffer */ } module_init(detector_init); module_exit(detector_exit);