/*
 * hwlat_detector.c - A simple Hardware Latency detector.
 *
 * Use this module to detect large system latencies induced by the behavior of
 * certain underlying system hardware or firmware, independent of Linux itself.
 * The code was developed originally to detect the presence of SMIs on Intel
 * and AMD systems, although there is no dependency upon x86 herein.
 *
 * The classical example usage of this module is in detecting the presence of
 * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
 * somewhat special form of hardware interrupt spawned from earlier CPU debug
 * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
 * LPC (or other device) to generate a special interrupt under certain
 * circumstances, for example, upon expiration of a special SMI timer device,
 * due to certain external thermal readings, on certain I/O address accesses,
 * and other situations. An SMI hits a special CPU pin, triggers a special
 * SMI mode (complete with special memory map), and the OS is unaware.
 *
 * Although certain hardware-inducing latencies are necessary (for example,
 * a modern system often requires an SMI handler for correct thermal control
 * and remote management) they can wreak havoc upon any OS-level performance
 * guarantees toward low-latency, especially when the OS is not even made
 * aware of the presence of these interrupts. For this reason, we need a
 * somewhat brute force mechanism to detect these interrupts. In this case,
 * we do it by hogging all of the CPU(s) for configurable timer intervals,
 * sampling the built-in CPU timer, looking for discontiguous readings.
 *
 * WARNING: This implementation necessarily introduces latencies. Therefore,
 *          you should NEVER use this module in a production environment
 *          requiring any kind of low-latency performance guarantee(s).
 *
 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
 *
 * Includes useful feedback from Clark Williams <clark@redhat.com>
 *
 * This file is licensed under the terms of the GNU General Public
 * License version 2. This program is licensed "as is" without any
 * warranty of any kind, whether express or implied.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/ring_buffer.h>
#include <linux/stop_machine.h>
#include <linux/time.h>
#include <linux/hrtimer.h>
#include <linux/kthread.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
#include <linux/version.h>
#include <linux/delay.h>

#define BUF_SIZE_DEFAULT	262144UL		/* 8K*(sizeof(entry)) */
#define BUF_FLAGS		(RB_FL_OVERWRITE)	/* no block on full */
#define U64STR_SIZE		22			/* 20 digits max */

#define VERSION			"1.0.0"
#define BANNER			"hwlat_detector: "
#define DRVNAME			"hwlat_detector"
#define DEFAULT_SAMPLE_WINDOW	1000000			/* 1s */
#define DEFAULT_SAMPLE_WIDTH	500000			/* 0.5s */
#define DEFAULT_LAT_THRESHOLD	10			/* 10us */

/* Module metadata */

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
MODULE_DESCRIPTION("A simple hardware latency detector");
MODULE_VERSION(VERSION);

/* Module parameters */

static int debug;
static int enabled;
static int threshold;

module_param(debug, int, 0);			/* enable debug */
module_param(enabled, int, 0);			/* enable detector */
module_param(threshold, int, 0);		/* latency threshold */

/* Buffering and sampling */

static struct ring_buffer *ring_buffer;		/* sample buffer */
static DEFINE_MUTEX(ring_buffer_mutex);		/* lock changes */
static unsigned long buf_size = BUF_SIZE_DEFAULT;
static struct task_struct *kthread;		/* sampling thread */

/* DebugFS filesystem entries */

static struct dentry *debug_dir;		/* debugfs directory */
static struct dentry *debug_max;		/* maximum TSC delta */
static struct dentry *debug_count;		/* total detect count */
static struct dentry *debug_sample_width;	/* sample width us */
static struct dentry *debug_sample_window;	/* sample window us */
static struct dentry *debug_sample;		/* raw samples us */
static struct dentry *debug_threshold;		/* threshold us */
static struct dentry *debug_enable;         	/* enable/disable */

/* Individual samples and global state */

struct sample;					/* latency sample */
struct data;					/* Global state */

/* Sampling functions */
static int __buffer_add_sample(struct sample *sample);
static struct sample *buffer_get_sample(struct sample *sample);
static int get_sample(void *unused);

/* Threading and state */
static int kthread_fn(void *unused);
static int start_kthread(void);
static int stop_kthread(void);
static void __reset_stats(void);
static int init_stats(void);

/* Debugfs interface */
static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
				size_t cnt, loff_t *ppos, const u64 *entry);
static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
				 size_t cnt, loff_t *ppos, u64 *entry);
static int debug_sample_fopen(struct inode *inode, struct file *filp);
static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
				  size_t cnt, loff_t *ppos);
static int debug_sample_release(struct inode *inode, struct file *filp);
static int debug_enable_fopen(struct inode *inode, struct file *filp);
static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
				  size_t cnt, loff_t *ppos);
static ssize_t debug_enable_fwrite(struct file *file,
				   const char __user *user_buffer,
				   size_t user_size, loff_t *offset);

/* Initialization functions */
static int init_debugfs(void);
static void free_debugfs(void);
static int detector_init(void);
static void detector_exit(void);

/* Individual latency samples are stored here when detected and packed into
 * the ring_buffer circular buffer, where they are overwritten when
 * more than buf_size/sizeof(sample) samples are received. */
struct sample {
	u64		seqnum;		/* unique sequence */
	u64		duration;	/* ktime delta */
	struct timespec	timestamp;	/* wall time */
};

/* keep the global state somewhere. Mostly used under stop_machine. */
static struct data {

	struct mutex lock;		/* protect changes */

	u64	count;			/* total since reset */
	u64	max_sample;		/* max hardware latency */
	u64	threshold;		/* sample threshold level */

	u64	sample_window;		/* total sampling window (on+off) */
	u64	sample_width;		/* active sampling portion of window */

	atomic_t sample_open;		/* whether the sample file is open */

	wait_queue_head_t wq;		/* waitqeue for new sample values */

} data;

/**
 * __buffer_add_sample - add a new latency sample recording to the ring buffer
 * @sample: The new latency sample value
 *
 * This receives a new latency sample and records it in a global ring buffer.
 * No additional locking is used in this case - suited for stop_machine use.
 */
static int __buffer_add_sample(struct sample *sample)
{
	return ring_buffer_write(ring_buffer,
				 sizeof(struct sample), sample);
}

/**
 * buffer_get_sample - remove a hardware latency sample from the ring buffer
 * @sample: Pre-allocated storage for the sample
 *
 * This retrieves a hardware latency sample from the global circular buffer
 */
static struct sample *buffer_get_sample(struct sample *sample)
{
	struct ring_buffer_event *e = NULL;
	struct sample *s = NULL;
	unsigned int cpu = 0;

	if (!sample)
		return NULL;

	/* ring_buffers are per-cpu but we just want any value */
	/* so we'll start with this cpu and try others if not */
	/* Steven is planning to add a generic mechanism */
	mutex_lock(&ring_buffer_mutex);
	e = ring_buffer_consume(ring_buffer, smp_processor_id(), NULL);
	if (!e) {
		for_each_online_cpu(cpu) {
			e = ring_buffer_consume(ring_buffer, cpu, NULL);
			if (e)
				break;
		}
	}

	if (e) {
		s = ring_buffer_event_data(e);
		memcpy(sample, s, sizeof(struct sample));
	} else
		sample = NULL;
	mutex_unlock(&ring_buffer_mutex);

	return sample;
}

/**
 * get_sample - sample the CPU TSC and look for likely hardware latencies
 * @unused: This is not used but is a part of the stop_machine API
 *
 * Used to repeatedly capture the CPU TSC (or similar), looking for potential
 * hardware-induced latency. Called under stop_machine, with data.lock held.
 */
static int get_sample(void *unused)
{
	ktime_t start, t1, t2;
	s64 diff, total = 0;
	u64 sample = 0;
	int ret = 1;

	start = ktime_get(); /* start timestamp */

	do {

		t1 = ktime_get();	/* we'll look for a discontinuity */
		t2 = ktime_get();

		total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
		diff = ktime_to_us(ktime_sub(t2, t1));     /* current diff */

		/* This shouldn't happen */
		if (diff < 0) {
			printk(KERN_ERR BANNER "time running backwards\n");
			goto out;
		}

		if (diff > sample)
			sample = diff; /* only want highest value */

	} while (total <= data.sample_width);

	/* If we exceed the threshold value, we have found a hardware latency */
	if (sample > data.threshold) {
		struct sample s;

		data.count++;
		s.seqnum = data.count;
		s.duration = sample;
		s.timestamp = CURRENT_TIME;
		__buffer_add_sample(&s);

		/* Keep a running maximum ever recorded hardware latency */
		if (sample > data.max_sample)
			data.max_sample = sample;
	}

	ret = 0;
out:
	return ret;
}

/*
 * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
 * @unused: A required part of the kthread API.
 *
 * Used to periodically sample the CPU TSC via a call to get_sample. We
 * use stop_machine, whith does (intentionally) introduce latency since we
 * need to ensure nothing else might be running (and thus pre-empting).
 * Obviously this should never be used in production environments.
 *
 * stop_machine will schedule us typically only on CPU0 which is fine for
 * almost every real-world hardware latency situation - but we might later
 * generalize this if we find there are any actualy systems with alternate
 * SMI delivery or other non CPU0 hardware latencies.
 */
static int kthread_fn(void *unused)
{
	int err = 0;
	u64 interval = 0;

	while (!kthread_should_stop()) {

		mutex_lock(&data.lock);

		err = stop_machine(get_sample, unused, 0);
		if (err) {
			/* Houston, we have a problem */
			mutex_unlock(&data.lock);
			goto err_out;
		}

		wake_up(&data.wq); /* wake up reader(s) */

		interval = data.sample_window - data.sample_width;
		do_div(interval, USEC_PER_MSEC); /* modifies interval value */

		mutex_unlock(&data.lock);

		if (msleep_interruptible(interval))
			goto out;
	}
		goto out;
err_out:
	printk(KERN_ERR BANNER "could not call stop_machine, disabling\n");
	enabled = 0;
out:
	return err;

}

/**
 * start_kthread - Kick off the hardware latency sampling/detector kthread
 *
 * This starts a kernel thread that will sit and sample the CPU timestamp
 * counter (TSC or similar) and look for potential hardware latencies.
 */
static int start_kthread(void)
{
	kthread = kthread_run(kthread_fn, NULL,
					DRVNAME);
	if (IS_ERR(kthread)) {
		printk(KERN_ERR BANNER "could not start sampling thread\n");
		enabled = 0;
		return -ENOMEM;
	}

	return 0;
}

/**
 * stop_kthread - Inform the hardware latency samping/detector kthread to stop
 *
 * This kicks the running hardware latency sampling/detector kernel thread and
 * tells it to stop sampling now. Use this on unload and at system shutdown.
 */
static int stop_kthread(void)
{
	int ret;

	ret = kthread_stop(kthread);

	return ret;
}

/**
 * __reset_stats - Reset statistics for the hardware latency detector
 *
 * We use data to store various statistics and global state. We call this
 * function in order to reset those when "enable" is toggled on or off, and
 * also at initialization. Should be called with data.lock held.
 */
static void __reset_stats(void)
{
	data.count = 0;
	data.max_sample = 0;
	ring_buffer_reset(ring_buffer); /* flush out old sample entries */
}

/**
 * init_stats - Setup global state statistics for the hardware latency detector
 *
 * We use data to store various statistics and global state. We also use
 * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
 * induced system latencies. This function initializes these structures and
 * allocates the global ring buffer also.
 */
static int init_stats(void)
{
	int ret = -ENOMEM;

	mutex_init(&data.lock);
	init_waitqueue_head(&data.wq);
	atomic_set(&data.sample_open, 0);

	ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);

	if (WARN(!ring_buffer, KERN_ERR BANNER
			       "failed to allocate ring buffer!\n"))
		goto out;

	__reset_stats();
	data.threshold = DEFAULT_LAT_THRESHOLD;	    /* threshold us */
	data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
	data.sample_width = DEFAULT_SAMPLE_WIDTH;   /* width us */

	ret = 0;

out:
	return ret;

}

/*
 * simple_data_read - Wrapper read function for global state debugfs entries
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The userspace provided buffer to read value into
 * @cnt: The maximum number of bytes to read
 * @ppos: The current "file" position
 * @entry: The entry to read from
 *
 * This function provides a generic read implementation for the global state
 * "data" structure debugfs filesystem entries. It would be nice to use
 * simple_attr_read directly, but we need to make sure that the data.lock
 * spinlock is held during the actual read (even though we likely won't ever
 * actually race here as the updater runs under a stop_machine context).
 */
static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
				size_t cnt, loff_t *ppos, const u64 *entry)
{
	char buf[U64STR_SIZE];
	u64 val = 0;
	int len = 0;

	memset(buf, 0, sizeof(buf));

	if (!entry)
		return -EFAULT;

	mutex_lock(&data.lock);
	val = *entry;
	mutex_unlock(&data.lock);

	len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);

	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);

}

/*
 * simple_data_write - Wrapper write function for global state debugfs entries
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The userspace provided buffer to write value from
 * @cnt: The maximum number of bytes to write
 * @ppos: The current "file" position
 * @entry: The entry to write to
 *
 * This function provides a generic write implementation for the global state
 * "data" structure debugfs filesystem entries. It would be nice to use
 * simple_attr_write directly, but we need to make sure that the data.lock
 * spinlock is held during the actual write (even though we likely won't ever
 * actually race here as the updater runs under a stop_machine context).
 */
static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
				 size_t cnt, loff_t *ppos, u64 *entry)
{
	char buf[U64STR_SIZE];
	int csize = min(cnt, sizeof(buf));
	u64 val = 0;
	int err = 0;

	memset(buf, '\0', sizeof(buf));
	if (copy_from_user(buf, ubuf, csize))
		return -EFAULT;

	buf[U64STR_SIZE-1] = '\0';			/* just in case */
	err = strict_strtoull(buf, 10, &val);
	if (err)
		return -EINVAL;

	mutex_lock(&data.lock);
	*entry = val;
	mutex_unlock(&data.lock);

	return csize;
}

/**
 * debug_count_fopen - Open function for "count" debugfs entry
 * @inode: The in-kernel inode representation of the debugfs "file"
 * @filp: The active open file structure for the debugfs "file"
 *
 * This function provides an open implementation for the "count" debugfs
 * interface to the hardware latency detector.
 */
static int debug_count_fopen(struct inode *inode, struct file *filp)
{
	return 0;
}

/**
 * debug_count_fread - Read function for "count" debugfs entry
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The userspace provided buffer to read value into
 * @cnt: The maximum number of bytes to read
 * @ppos: The current "file" position
 *
 * This function provides a read implementation for the "count" debugfs
 * interface to the hardware latency detector. Can be used to read the
 * number of latency readings exceeding the configured threshold since
 * the detector was last reset (e.g. by writing a zero into "count").
 */
static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
				     size_t cnt, loff_t *ppos)
{
	return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
}

/**
 * debug_count_fwrite - Write function for "count" debugfs entry
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The user buffer that contains the value to write
 * @cnt: The maximum number of bytes to write to "file"
 * @ppos: The current position in the debugfs "file"
 *
 * This function provides a write implementation for the "count" debugfs
 * interface to the hardware latency detector. Can be used to write a
 * desired value, especially to zero the total count.
 */
static ssize_t  debug_count_fwrite(struct file *filp,
				       const char __user *ubuf,
				       size_t cnt,
				       loff_t *ppos)
{
	return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
}

/**
 * debug_enable_fopen - Dummy open function for "enable" debugfs interface
 * @inode: The in-kernel inode representation of the debugfs "file"
 * @filp: The active open file structure for the debugfs "file"
 *
 * This function provides an open implementation for the "enable" debugfs
 * interface to the hardware latency detector.
 */
static int debug_enable_fopen(struct inode *inode, struct file *filp)
{
	return 0;
}

/**
 * debug_enable_fread - Read function for "enable" debugfs interface
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The userspace provided buffer to read value into
 * @cnt: The maximum number of bytes to read
 * @ppos: The current "file" position
 *
 * This function provides a read implementation for the "enable" debugfs
 * interface to the hardware latency detector. Can be used to determine
 * whether the detector is currently enabled ("0\n" or "1\n" returned).
 */
static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
				      size_t cnt, loff_t *ppos)
{
	char buf[4];

	if ((cnt < sizeof(buf)) || (*ppos))
		return 0;

	buf[0] = enabled ? '1' : '0';
	buf[1] = '\n';
	buf[2] = '\0';
	if (copy_to_user(ubuf, buf, strlen(buf)))
		return -EFAULT;
	return *ppos = strlen(buf);
}

/**
 * debug_enable_fwrite - Write function for "enable" debugfs interface
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The user buffer that contains the value to write
 * @cnt: The maximum number of bytes to write to "file"
 * @ppos: The current position in the debugfs "file"
 *
 * This function provides a write implementation for the "enable" debugfs
 * interface to the hardware latency detector. Can be used to enable or
 * disable the detector, which will have the side-effect of possibly
 * also resetting the global stats and kicking off the measuring
 * kthread (on an enable) or the converse (upon a disable).
 */
static ssize_t  debug_enable_fwrite(struct file *filp,
					const char __user *ubuf,
					size_t cnt,
					loff_t *ppos)
{
	char buf[4];
	int csize = min(cnt, sizeof(buf));
	long val = 0;
	int err = 0;

	memset(buf, '\0', sizeof(buf));
	if (copy_from_user(buf, ubuf, csize))
		return -EFAULT;

	buf[sizeof(buf)-1] = '\0';			/* just in case */
	err = strict_strtoul(buf, 10, &val);
	if (0 != err)
		return -EINVAL;

	if (val) {
		if (enabled)
			goto unlock;
		enabled = 1;
		__reset_stats();
		if (start_kthread())
			return -EFAULT;
	} else {
		if (!enabled)
			goto unlock;
		enabled = 0;
		stop_kthread();
		wake_up(&data.wq);		/* reader(s) should return */
	}
unlock:
	return csize;
}

/**
 * debug_max_fopen - Open function for "max" debugfs entry
 * @inode: The in-kernel inode representation of the debugfs "file"
 * @filp: The active open file structure for the debugfs "file"
 *
 * This function provides an open implementation for the "max" debugfs
 * interface to the hardware latency detector.
 */
static int debug_max_fopen(struct inode *inode, struct file *filp)
{
	return 0;
}

/**
 * debug_max_fread - Read function for "max" debugfs entry
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The userspace provided buffer to read value into
 * @cnt: The maximum number of bytes to read
 * @ppos: The current "file" position
 *
 * This function provides a read implementation for the "max" debugfs
 * interface to the hardware latency detector. Can be used to determine
 * the maximum latency value observed since it was last reset.
 */
static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
				   size_t cnt, loff_t *ppos)
{
	return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
}

/**
 * debug_max_fwrite - Write function for "max" debugfs entry
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The user buffer that contains the value to write
 * @cnt: The maximum number of bytes to write to "file"
 * @ppos: The current position in the debugfs "file"
 *
 * This function provides a write implementation for the "max" debugfs
 * interface to the hardware latency detector. Can be used to reset the
 * maximum or set it to some other desired value - if, then, subsequent
 * measurements exceed this value, the maximum will be updated.
 */
static ssize_t  debug_max_fwrite(struct file *filp,
				     const char __user *ubuf,
				     size_t cnt,
				     loff_t *ppos)
{
	return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
}


/**
 * debug_sample_fopen - An open function for "sample" debugfs interface
 * @inode: The in-kernel inode representation of this debugfs "file"
 * @filp: The active open file structure for the debugfs "file"
 *
 * This function handles opening the "sample" file within the hardware
 * latency detector debugfs directory interface. This file is used to read
 * raw samples from the global ring_buffer and allows the user to see a
 * running latency history. Can be opened blocking or non-blocking,
 * affecting whether it behaves as a buffer read pipe, or does not.
 * Implements simple locking to prevent multiple simultaneous use.
 */
static int debug_sample_fopen(struct inode *inode, struct file *filp)
{
	if (!atomic_add_unless(&data.sample_open, 1, 1))
		return -EBUSY;
	else
		return 0;
}

/**
 * debug_sample_fread - A read function for "sample" debugfs interface
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The user buffer that will contain the samples read
 * @cnt: The maximum bytes to read from the debugfs "file"
 * @ppos: The current position in the debugfs "file"
 *
 * This function handles reading from the "sample" file within the hardware
 * latency detector debugfs directory interface. This file is used to read
 * raw samples from the global ring_buffer and allows the user to see a
 * running latency history. By default this will block pending a new
 * value written into the sample buffer, unless there are already a
 * number of value(s) waiting in the buffer, or the sample file was
 * previously opened in a non-blocking mode of operation.
 */
static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
					size_t cnt, loff_t *ppos)
{
	int len = 0;
	char buf[64];
	struct sample *sample = NULL;

	if (!enabled)
		return 0;

	sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
	if (!sample)
		return -ENOMEM;

	while (!buffer_get_sample(sample)) {

		DEFINE_WAIT(wait);

		if (filp->f_flags & O_NONBLOCK) {
			len = -EAGAIN;
			goto out;
		}

		prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
		schedule();
		finish_wait(&data.wq, &wait);

		if (signal_pending(current)) {
			len = -EINTR;
			goto out;
		}

		if (!enabled) {			/* enable was toggled */
			len = 0;
			goto out;
		}
	}

	len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
		      sample->timestamp.tv_sec,
		      sample->timestamp.tv_nsec,
		      sample->duration);


	/* handling partial reads is more trouble than it's worth */
	if (len > cnt)
		goto out;

	if (copy_to_user(ubuf, buf, len))
		len = -EFAULT;

out:
	kfree(sample);
	return len;
}

/**
 * debug_sample_release - Release function for "sample" debugfs interface
 * @inode: The in-kernel inode represenation of the debugfs "file"
 * @filp: The active open file structure for the debugfs "file"
 *
 * This function completes the close of the debugfs interface "sample" file.
 * Frees the sample_open "lock" so that other users may open the interface.
 */
static int debug_sample_release(struct inode *inode, struct file *filp)
{
	atomic_dec(&data.sample_open);

	return 0;
}

/**
 * debug_threshold_fopen - Open function for "threshold" debugfs entry
 * @inode: The in-kernel inode representation of the debugfs "file"
 * @filp: The active open file structure for the debugfs "file"
 *
 * This function provides an open implementation for the "threshold" debugfs
 * interface to the hardware latency detector.
 */
static int debug_threshold_fopen(struct inode *inode, struct file *filp)
{
	return 0;
}

/**
 * debug_threshold_fread - Read function for "threshold" debugfs entry
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The userspace provided buffer to read value into
 * @cnt: The maximum number of bytes to read
 * @ppos: The current "file" position
 *
 * This function provides a read implementation for the "threshold" debugfs
 * interface to the hardware latency detector. It can be used to determine
 * the current threshold level at which a latency will be recorded in the
 * global ring buffer, typically on the order of 10us.
 */
static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
					 size_t cnt, loff_t *ppos)
{
	return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
}

/**
 * debug_threshold_fwrite - Write function for "threshold" debugfs entry
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The user buffer that contains the value to write
 * @cnt: The maximum number of bytes to write to "file"
 * @ppos: The current position in the debugfs "file"
 *
 * This function provides a write implementation for the "threshold" debugfs
 * interface to the hardware latency detector. It can be used to configure
 * the threshold level at which any subsequently detected latencies will
 * be recorded into the global ring buffer.
 */
static ssize_t  debug_threshold_fwrite(struct file *filp,
					const char __user *ubuf,
					size_t cnt,
					loff_t *ppos)
{
	int ret;

	ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);

	if (enabled)
		wake_up_process(kthread);

	return ret;
}

/**
 * debug_width_fopen - Open function for "width" debugfs entry
 * @inode: The in-kernel inode representation of the debugfs "file"
 * @filp: The active open file structure for the debugfs "file"
 *
 * This function provides an open implementation for the "width" debugfs
 * interface to the hardware latency detector.
 */
static int debug_width_fopen(struct inode *inode, struct file *filp)
{
	return 0;
}

/**
 * debug_width_fread - Read function for "width" debugfs entry
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The userspace provided buffer to read value into
 * @cnt: The maximum number of bytes to read
 * @ppos: The current "file" position
 *
 * This function provides a read implementation for the "width" debugfs
 * interface to the hardware latency detector. It can be used to determine
 * for how many us of the total window us we will actively sample for any
 * hardware-induced latecy periods. Obviously, it is not possible to
 * sample constantly and have the system respond to a sample reader, or,
 * worse, without having the system appear to have gone out to lunch.
 */
static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
				     size_t cnt, loff_t *ppos)
{
	return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
}

/**
 * debug_width_fwrite - Write function for "width" debugfs entry
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The user buffer that contains the value to write
 * @cnt: The maximum number of bytes to write to "file"
 * @ppos: The current position in the debugfs "file"
 *
 * This function provides a write implementation for the "width" debugfs
 * interface to the hardware latency detector. It can be used to configure
 * for how many us of the total window us we will actively sample for any
 * hardware-induced latency periods. Obviously, it is not possible to
 * sample constantly and have the system respond to a sample reader, or,
 * worse, without having the system appear to have gone out to lunch. It
 * is enforced that width is less that the total window size.
 */
static ssize_t  debug_width_fwrite(struct file *filp,
				       const char __user *ubuf,
				       size_t cnt,
				       loff_t *ppos)
{
	char buf[U64STR_SIZE];
	int csize = min(cnt, sizeof(buf));
	u64 val = 0;
	int err = 0;

	memset(buf, '\0', sizeof(buf));
	if (copy_from_user(buf, ubuf, csize))
		return -EFAULT;

	buf[U64STR_SIZE-1] = '\0';			/* just in case */
	err = strict_strtoull(buf, 10, &val);
	if (0 != err)
		return -EINVAL;

	mutex_lock(&data.lock);
	if (val < data.sample_window)
		data.sample_width = val;
	else {
		mutex_unlock(&data.lock);
		return -EINVAL;
	}
	mutex_unlock(&data.lock);

	if (enabled)
		wake_up_process(kthread);

	return csize;
}

/**
 * debug_window_fopen - Open function for "window" debugfs entry
 * @inode: The in-kernel inode representation of the debugfs "file"
 * @filp: The active open file structure for the debugfs "file"
 *
 * This function provides an open implementation for the "window" debugfs
 * interface to the hardware latency detector. The window is the total time
 * in us that will be considered one sample period. Conceptually, windows
 * occur back-to-back and contain a sample width period during which
 * actual sampling occurs.
 */
static int debug_window_fopen(struct inode *inode, struct file *filp)
{
	return 0;
}

/**
 * debug_window_fread - Read function for "window" debugfs entry
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The userspace provided buffer to read value into
 * @cnt: The maximum number of bytes to read
 * @ppos: The current "file" position
 *
 * This function provides a read implementation for the "window" debugfs
 * interface to the hardware latency detector. The window is the total time
 * in us that will be considered one sample period. Conceptually, windows
 * occur back-to-back and contain a sample width period during which
 * actual sampling occurs. Can be used to read the total window size.
 */
static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
				      size_t cnt, loff_t *ppos)
{
	return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
}

/**
 * debug_window_fwrite - Write function for "window" debugfs entry
 * @filp: The active open file structure for the debugfs "file"
 * @ubuf: The user buffer that contains the value to write
 * @cnt: The maximum number of bytes to write to "file"
 * @ppos: The current position in the debugfs "file"
 *
 * This function provides a write implementation for the "window" debufds
 * interface to the hardware latency detetector. The window is the total time
 * in us that will be considered one sample period. Conceptually, windows
 * occur back-to-back and contain a sample width period during which
 * actual sampling occurs. Can be used to write a new total window size. It
 * is enfoced that any value written must be greater than the sample width
 * size, or an error results.
 */
static ssize_t  debug_window_fwrite(struct file *filp,
					const char __user *ubuf,
					size_t cnt,
					loff_t *ppos)
{
	char buf[U64STR_SIZE];
	int csize = min(cnt, sizeof(buf));
	u64 val = 0;
	int err = 0;

	memset(buf, '\0', sizeof(buf));
	if (copy_from_user(buf, ubuf, csize))
		return -EFAULT;

	buf[U64STR_SIZE-1] = '\0';			/* just in case */
	err = strict_strtoull(buf, 10, &val);
	if (0 != err)
		return -EINVAL;

	mutex_lock(&data.lock);
	if (data.sample_width < val)
		data.sample_window = val;
	else {
		mutex_unlock(&data.lock);
		return -EINVAL;
	}
	mutex_unlock(&data.lock);

	return csize;
}

/*
 * Function pointers for the "count" debugfs file operations
 */
static const struct file_operations count_fops = {
	.open		= debug_count_fopen,
	.read		= debug_count_fread,
	.write		= debug_count_fwrite,
	.owner		= THIS_MODULE,
};

/*
 * Function pointers for the "enable" debugfs file operations
 */
static const struct file_operations enable_fops = {
	.open		= debug_enable_fopen,
	.read		= debug_enable_fread,
	.write		= debug_enable_fwrite,
	.owner		= THIS_MODULE,
};

/*
 * Function pointers for the "max" debugfs file operations
 */
static const struct file_operations max_fops = {
	.open		= debug_max_fopen,
	.read		= debug_max_fread,
	.write		= debug_max_fwrite,
	.owner		= THIS_MODULE,
};

/*
 * Function pointers for the "sample" debugfs file operations
 */
static const struct file_operations sample_fops = {
	.open 		= debug_sample_fopen,
	.read		= debug_sample_fread,
	.release	= debug_sample_release,
	.owner		= THIS_MODULE,
};

/*
 * Function pointers for the "threshold" debugfs file operations
 */
static const struct file_operations threshold_fops = {
	.open		= debug_threshold_fopen,
	.read		= debug_threshold_fread,
	.write		= debug_threshold_fwrite,
	.owner		= THIS_MODULE,
};

/*
 * Function pointers for the "width" debugfs file operations
 */
static const struct file_operations width_fops = {
	.open		= debug_width_fopen,
	.read		= debug_width_fread,
	.write		= debug_width_fwrite,
	.owner		= THIS_MODULE,
};

/*
 * Function pointers for the "window" debugfs file operations
 */
static const struct file_operations window_fops = {
	.open		= debug_window_fopen,
	.read		= debug_window_fread,
	.write		= debug_window_fwrite,
	.owner		= THIS_MODULE,
};

/**
 * init_debugfs - A function to initialize the debugfs interface files
 *
 * This function creates entries in debugfs for "hwlat_detector", including
 * files to read values from the detector, current samples, and the
 * maximum sample that has been captured since the hardware latency
 * dectector was started.
 */
static int init_debugfs(void)
{
	int ret = -ENOMEM;

	debug_dir = debugfs_create_dir(DRVNAME, NULL);
	if (!debug_dir)
		goto err_debug_dir;

	debug_sample = debugfs_create_file("sample", 0444,
					       debug_dir, NULL,
					       &sample_fops);
	if (!debug_sample)
		goto err_sample;

	debug_count = debugfs_create_file("count", 0444,
					      debug_dir, NULL,
					      &count_fops);
	if (!debug_count)
		goto err_count;

	debug_max = debugfs_create_file("max", 0444,
					    debug_dir, NULL,
					    &max_fops);
	if (!debug_max)
		goto err_max;

	debug_sample_window = debugfs_create_file("window", 0644,
						      debug_dir, NULL,
						      &window_fops);
	if (!debug_sample_window)
		goto err_window;

	debug_sample_width = debugfs_create_file("width", 0644,
						     debug_dir, NULL,
						     &width_fops);
	if (!debug_sample_width)
		goto err_width;

	debug_threshold = debugfs_create_file("threshold", 0644,
						  debug_dir, NULL,
						  &threshold_fops);
	if (!debug_threshold)
		goto err_threshold;

	debug_enable = debugfs_create_file("enable", 0644,
					       debug_dir, &enabled,
					       &enable_fops);
	if (!debug_enable)
		goto err_enable;

	else {
		ret = 0;
		goto out;
	}

err_enable:
	debugfs_remove(debug_threshold);
err_threshold:
	debugfs_remove(debug_sample_width);
err_width:
	debugfs_remove(debug_sample_window);
err_window:
	debugfs_remove(debug_max);
err_max:
	debugfs_remove(debug_count);
err_count:
	debugfs_remove(debug_sample);
err_sample:
	debugfs_remove(debug_dir);
err_debug_dir:
out:
	return ret;
}

/**
 * free_debugfs - A function to cleanup the debugfs file interface
 */
static void free_debugfs(void)
{
	/* could also use a debugfs_remove_recursive */
	debugfs_remove(debug_enable);
	debugfs_remove(debug_threshold);
	debugfs_remove(debug_sample_width);
	debugfs_remove(debug_sample_window);
	debugfs_remove(debug_max);
	debugfs_remove(debug_count);
	debugfs_remove(debug_sample);
	debugfs_remove(debug_dir);
}

/**
 * detector_init - Standard module initialization code
 */
static int detector_init(void)
{
	int ret = -ENOMEM;

	printk(KERN_INFO BANNER "version %s\n", VERSION);

	ret = init_stats();
	if (0 != ret)
		goto out;

	ret = init_debugfs();
	if (0 != ret)
		goto err_stats;

	if (enabled)
		ret = start_kthread();

	goto out;

err_stats:
	ring_buffer_free(ring_buffer);
out:
	return ret;

}

/**
 * detector_exit - Standard module cleanup code
 */
static void detector_exit(void)
{
	if (enabled) {
		enabled = 0;
		stop_kthread();
	}

	free_debugfs();
	ring_buffer_free(ring_buffer);	/* free up the ring buffer */

}

module_init(detector_init);
module_exit(detector_exit);