Discussion:
[patch 4/4] Handle memory unmap while hardware sampling is running
(too old to reply)
g***@linux.vnet.ibm.com
2010-12-20 13:05:45 UTC
Permalink
From: ***@linux.vnet.ibm.com

During memory unmap hardware sampling is deactivated.
After all samples have been collected hardware sampling is reactivated again.

Signed-off-by: Mahesh Salgaonkar <***@linux.vnet.ibm.com>
Signed-off-by: Maran Pakkirisamy <***@linux.vnet.ibm.com>
Signed-off-by: Heinz Graalfs <***@linux.vnet.ibm.com>
---
arch/s390/oprofile/hwsampler_files.c | 2 +-
drivers/oprofile/buffer_sync.c | 13 +++++++++++++
2 files changed, 14 insertions(+), 1 deletion(-)

Index: linux-2.6/drivers/oprofile/buffer_sync.c
===================================================================
--- linux-2.6.orig/drivers/oprofile/buffer_sync.c
+++ linux-2.6/drivers/oprofile/buffer_sync.c
@@ -32,6 +32,11 @@
#include <linux/sched.h>
#include <linux/gfp.h>

+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+#include <asm/hwsampler.h>
+extern unsigned long oprofile_hwsampler;
+#endif
+
#include "oprofile_stats.h"
#include "event_buffer.h"
#include "cpu_buffer.h"
@@ -513,6 +518,10 @@ void sync_buffer(int cpu)

mutex_lock(&buffer_mutex);

+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ if (oprofile_hwsampler)
+ hwsampler_deactivate(cpu);
+#endif
add_cpu_switch(cpu);

op_cpu_buffer_reset(cpu);
@@ -569,6 +578,10 @@ void sync_buffer(int cpu)

mark_done(cpu);

+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ if (oprofile_hwsampler)
+ hwsampler_activate(cpu);
+#endif
mutex_unlock(&buffer_mutex);
}

Index: linux-2.6/arch/s390/oprofile/hwsampler_files.c
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/hwsampler_files.c
+++ linux-2.6/arch/s390/oprofile/hwsampler_files.c
@@ -22,7 +22,7 @@ unsigned long oprofile_max_interval;
static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;

-static unsigned long oprofile_hwsampler;
+unsigned long oprofile_hwsampler;

static int oprofile_hwsampler_start(void)
{
g***@linux.vnet.ibm.com
2010-12-20 13:05:43 UTC
Permalink
From: ***@linux.vnet.ibm.com

OProfile is enhanced to export all files for controlling System z's hardware sampling,
and to invoke hwsampler exported functions to initialize and use System z's hardware sampling.

The patch invokes hwsampler_setup() during oprofile init and exports following
hwsampler files under oprofilefs if hwsampler's setup succeeded:

A new directory for hardware sampling based files

/dev/oprofile/hwsampling/

The userland daemon must explicitly write to the following files
to disable (or enable) hardware based sampling

/dev/oprofile/hwsampling/hwsampler

to modify the actual sampling rate

/dev/oprofile/hwsampling/hw_interval

to modify the amount of sampling memory (measured in 4K pages)

/dev/oprofile/hwsampling/hw_sdbt_blocks

The following files are read only and show
the possible minimum sampling rate

/dev/oprofile/hwsampling/hw_min_interval

the possible maximum sampling rate

/dev/oprofile/hwsampling/hw_max_interval

The patch splits the oprofile_timer_[init/exit] function so that it can be also called
through user context (oprofilefs) to avoid kernel oops.

Signed-off-by: Mahesh Salgaonkar <***@linux.vnet.ibm.com>
Signed-off-by: Maran Pakkirisamy <***@linux.vnet.ibm.com>
Signed-off-by: Heinz Graalfs <***@linux.vnet.ibm.com>
---
arch/s390/oprofile/Makefile | 1
arch/s390/oprofile/hwsampler_files.c | 120 +++++++++++++++++++++++++++++++++++
arch/s390/oprofile/init.c | 35 ++++++++++
drivers/oprofile/oprof.c | 37 ++++++++++
drivers/oprofile/oprof.h | 2
drivers/oprofile/timer_int.c | 16 +++-
include/linux/oprofile.h | 15 ++++
7 files changed, 223 insertions(+), 3 deletions(-)

Index: linux-2.6/arch/s390/oprofile/Makefile
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/Makefile
+++ linux-2.6/arch/s390/oprofile/Makefile
@@ -8,6 +8,7 @@ DRIVER_OBJS = $(addprefix ../../../drive
timer_int.o )

oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-y += $(if $(CONFIG_HWSAMPLER), hwsampler_files.o,)

HW_SAMPLER_DRIVER_OBJS = $(addprefix ../../../drivers/s390/hwsampler/, \
hwsampler-main.o smpctl.o )
Index: linux-2.6/arch/s390/oprofile/hwsampler_files.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/s390/oprofile/hwsampler_files.c
@@ -0,0 +1,120 @@
+/**
+ * arch/s390/oprofile/hwsampler_files.c
+ *
+ * Copyright IBM Corp. 2010
+ * Author: Mahesh Salgaonkar (***@linux.vnet.ibm.com)
+ */
+#include <linux/oprofile.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+
+#include <asm/hwsampler.h>
+
+#define DEFAULT_INTERVAL 4096
+
+#define DEFAULT_SDBT_BLOCKS 1
+#define DEFAULT_SDB_BLOCKS 511
+
+static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
+unsigned long oprofile_min_interval;
+unsigned long oprofile_max_interval;
+
+static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
+static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
+
+static unsigned long oprofile_hwsampler;
+
+static int oprofile_hwsampler_start(void)
+{
+ int retval;
+
+ printk(KERN_INFO "oprofile_hwsampler_start\n");
+
+ retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+ if (retval)
+ return retval;
+
+ retval = hwsampler_start_all(oprofile_hw_interval);
+
+ return retval;
+}
+
+static void oprofile_hwsampler_stop(void)
+{
+ printk(KERN_INFO "oprofile_hwsampler_stop\n");
+
+ hwsampler_stop_all();
+ hwsampler_deallocate();
+ return;
+}
+
+int oprofile_arch_set_hwsampler(struct oprofile_operations *ops)
+{
+ printk(KERN_INFO "oprofile: using hardware sampling\n");
+ ops->start = oprofile_hwsampler_start;
+ ops->stop = oprofile_hwsampler_stop;
+ ops->cpu_type = "timer";
+
+ return 0;
+}
+
+static ssize_t hwsampler_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(oprofile_hwsampler, buf, count, offset);
+}
+
+static ssize_t hwsampler_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+
+ if (oprofile_hwsampler == val)
+ return -EINVAL;
+
+ retval = oprofile_set_hwsampler(val);
+
+ if (retval)
+ return retval;
+
+ oprofile_hwsampler = val;
+ return count;
+}
+
+static const struct file_operations hwsampler_fops = {
+ .read = hwsampler_read,
+ .write = hwsampler_write,
+};
+
+int oprofile_create_hwsampling_files(struct super_block *sb,
+ struct dentry *root)
+{
+ struct dentry *hw_dir;
+
+ /* reinitialize default values */
+ oprofile_hwsampler = 1;
+
+ hw_dir = oprofilefs_mkdir(sb, root, "hwsampling");
+ if (!hw_dir)
+ return -EINVAL;
+
+ oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops);
+ oprofilefs_create_ulong(sb, hw_dir, "hw_interval",
+ &oprofile_hw_interval);
+ oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval",
+ &oprofile_min_interval);
+ oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval",
+ &oprofile_max_interval);
+ oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks",
+ &oprofile_sdbt_blocks);
+
+ return 0;
+}
Index: linux-2.6/drivers/oprofile/oprof.c
===================================================================
--- linux-2.6.orig/drivers/oprofile/oprof.c
+++ linux-2.6/drivers/oprofile/oprof.c
@@ -239,10 +239,43 @@ int oprofile_set_ulong(unsigned long *ad
return err;
}

+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+int oprofile_set_hwsampler(unsigned long val)
+{
+ int err = 0;
+
+ mutex_lock(&start_mutex);
+
+ if (oprofile_started) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ switch (val) {
+ case 1:
+ /* Switch to hardware sampling. */
+ __oprofile_timer_exit();
+ err = oprofile_arch_set_hwsampler(&oprofile_ops);
+ break;
+ case 0:
+ printk(KERN_INFO "oprofile: using timer interrupt.\n");
+ err = __oprofile_timer_init(&oprofile_ops);
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+out:
+ mutex_unlock(&start_mutex);
+ return err;
+}
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
+
static int __init oprofile_init(void)
{
int err;

+ memset(&oprofile_ops, 0, sizeof(oprofile_ops));
err = oprofile_arch_init(&oprofile_ops);
if (err < 0 || timer) {
printk(KERN_INFO "oprofile: using timer interrupt.\n");
@@ -250,6 +283,10 @@ static int __init oprofile_init(void)
if (err)
return err;
}
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ else if (err == 0)
+ oprofile_arch_set_hwsampler(&oprofile_ops);
+#endif
return oprofilefs_register();
}

Index: linux-2.6/drivers/oprofile/oprof.h
===================================================================
--- linux-2.6.orig/drivers/oprofile/oprof.h
+++ linux-2.6/drivers/oprofile/oprof.h
@@ -35,7 +35,9 @@ struct dentry;

void oprofile_create_files(struct super_block *sb, struct dentry *root);
int oprofile_timer_init(struct oprofile_operations *ops);
+int __oprofile_timer_init(struct oprofile_operations *ops);
void oprofile_timer_exit(void);
+void __oprofile_timer_exit(void);

int oprofile_set_ulong(unsigned long *addr, unsigned long val);
int oprofile_set_timeout(unsigned long time);
Index: linux-2.6/drivers/oprofile/timer_int.c
===================================================================
--- linux-2.6.orig/drivers/oprofile/timer_int.c
+++ linux-2.6/drivers/oprofile/timer_int.c
@@ -97,14 +97,13 @@ static struct notifier_block __refdata o
.notifier_call = oprofile_cpu_notify,
};

-int __init oprofile_timer_init(struct oprofile_operations *ops)
+int __oprofile_timer_init(struct oprofile_operations *ops)
{
int rc;

rc = register_hotcpu_notifier(&oprofile_cpu_notifier);
if (rc)
return rc;
- ops->create_files = NULL;
ops->setup = NULL;
ops->shutdown = NULL;
ops->start = oprofile_hrtimer_start;
@@ -113,7 +112,18 @@ int __init oprofile_timer_init(struct op
return 0;
}

-void __exit oprofile_timer_exit(void)
+int __init oprofile_timer_init(struct oprofile_operations *ops)
+{
+ return __oprofile_timer_init(ops);
+}
+
+void __oprofile_timer_exit(void)
{
unregister_hotcpu_notifier(&oprofile_cpu_notifier);
}
+
+void __exit oprofile_timer_exit(void)
+{
+ __oprofile_timer_exit();
+}
+
Index: linux-2.6/include/linux/oprofile.h
===================================================================
--- linux-2.6.orig/include/linux/oprofile.h
+++ linux-2.6/include/linux/oprofile.h
@@ -89,6 +89,21 @@ int oprofile_arch_init(struct oprofile_o
*/
void oprofile_arch_exit(void);

+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+/**
+ * setup hardware sampler for oprofiling.
+ */
+
+int oprofile_set_hwsampler(unsigned long);
+
+/**
+ * hardware sampler module initialization for the s390 arch
+ */
+
+int oprofile_arch_set_hwsampler(struct oprofile_operations *ops);
+
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
+
/**
* Add a sample. This may be called from any context.
*/
Index: linux-2.6/arch/s390/oprofile/init.c
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/init.c
+++ linux-2.6/arch/s390/oprofile/init.c
@@ -11,16 +11,51 @@
#include <linux/oprofile.h>
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/fs.h>

+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+#include <asm/hwsampler.h>
+
+extern int oprofile_create_hwsampling_files(struct super_block *sb,
+ struct dentry *root);
+
+extern unsigned long oprofile_min_interval;
+extern unsigned long oprofile_max_interval;
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */

extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);

int __init oprofile_arch_init(struct oprofile_operations* ops)
{
ops->backtrace = s390_backtrace;
+
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ if (hwsampler_setup())
+ return -ENODEV;
+
+ /*
+ * create hwsampler files only if hwsampler_setup() succeeds.
+ */
+ ops->create_files = oprofile_create_hwsampling_files;
+ oprofile_min_interval = hwsampler_query_min_interval();
+ if (oprofile_min_interval < 0) {
+ oprofile_min_interval = 0;
+ return -ENODEV;
+ }
+ oprofile_max_interval = hwsampler_query_max_interval();
+ if (oprofile_max_interval < 0) {
+ oprofile_max_interval = 0;
+ return -ENODEV;
+ }
+ return 0;
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
+
return -ENODEV;
}

void oprofile_arch_exit(void)
{
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ hwsampler_shutdown();
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
}
Robert Richter
2011-01-03 19:02:03 UTC
Permalink
Post by g***@linux.vnet.ibm.com
OProfile is enhanced to export all files for controlling System z's hardware sampling,
and to invoke hwsampler exported functions to initialize and use System z's hardware sampling.
The patch invokes hwsampler_setup() during oprofile init and exports following
A new directory for hardware sampling based files
/dev/oprofile/hwsampling/
The userland daemon must explicitly write to the following files
to disable (or enable) hardware based sampling
/dev/oprofile/hwsampling/hwsampler
to modify the actual sampling rate
/dev/oprofile/hwsampling/hw_interval
to modify the amount of sampling memory (measured in 4K pages)
/dev/oprofile/hwsampling/hw_sdbt_blocks
The following files are read only and show
the possible minimum sampling rate
/dev/oprofile/hwsampling/hw_min_interval
the possible maximum sampling rate
/dev/oprofile/hwsampling/hw_max_interval
The patch splits the oprofile_timer_[init/exit] function so that it can be also called
through user context (oprofilefs) to avoid kernel oops.
---
arch/s390/oprofile/Makefile | 1
arch/s390/oprofile/hwsampler_files.c | 120 +++++++++++++++++++++++++++++++++++
I would rather see a file hwsampler.c here that contains all oprofile
hwsampler code in it and also sets up a struct oprofile_operations*
ops.

Doing so, most of global functions and variables below can be made
static.
Post by g***@linux.vnet.ibm.com
arch/s390/oprofile/init.c | 35 ++++++++++
We should find a better solution than changing all those files only
Post by g***@linux.vnet.ibm.com
drivers/oprofile/oprof.c | 37 ++++++++++
drivers/oprofile/oprof.h | 2
drivers/oprofile/timer_int.c | 16 +++-
include/linux/oprofile.h | 15 ++++
I want to see most of this in arch/s390.
Post by g***@linux.vnet.ibm.com
7 files changed, 223 insertions(+), 3 deletions(-)
Index: linux-2.6/arch/s390/oprofile/Makefile
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/Makefile
+++ linux-2.6/arch/s390/oprofile/Makefile
@@ -8,6 +8,7 @@ DRIVER_OBJS = $(addprefix ../../../drive
timer_int.o )
oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-y += $(if $(CONFIG_HWSAMPLER), hwsampler_files.o,)
HW_SAMPLER_DRIVER_OBJS = $(addprefix ../../../drivers/s390/hwsampler/, \
hwsampler-main.o smpctl.o )
Index: linux-2.6/arch/s390/oprofile/hwsampler_files.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/s390/oprofile/hwsampler_files.c
@@ -0,0 +1,120 @@
+/**
+ * arch/s390/oprofile/hwsampler_files.c
+ *
+ * Copyright IBM Corp. 2010
+ */
+#include <linux/oprofile.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+
+#include <asm/hwsampler.h>
+
+#define DEFAULT_INTERVAL 4096
+
+#define DEFAULT_SDBT_BLOCKS 1
+#define DEFAULT_SDB_BLOCKS 511
+
+static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
+unsigned long oprofile_min_interval;
+unsigned long oprofile_max_interval;
This could be static.
Post by g***@linux.vnet.ibm.com
+
+static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
+static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
+
+static unsigned long oprofile_hwsampler;
+
+static int oprofile_hwsampler_start(void)
+{
+ int retval;
+
+ printk(KERN_INFO "oprofile_hwsampler_start\n");
This looks like a debug msg.
Post by g***@linux.vnet.ibm.com
+
+ retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+ if (retval)
+ return retval;
+
+ retval = hwsampler_start_all(oprofile_hw_interval);
+
+ return retval;
+}
+
+static void oprofile_hwsampler_stop(void)
+{
+ printk(KERN_INFO "oprofile_hwsampler_stop\n");
Same here.
Post by g***@linux.vnet.ibm.com
+
+ hwsampler_stop_all();
+ hwsampler_deallocate();
+ return;
+}
+
+int oprofile_arch_set_hwsampler(struct oprofile_operations *ops)
+{
+ printk(KERN_INFO "oprofile: using hardware sampling\n");
+ ops->start = oprofile_hwsampler_start;
+ ops->stop = oprofile_hwsampler_stop;
+ ops->cpu_type = "timer";
Wouldn't it be better to have a different cpu_type string here, I
don't think the oprofilefs interface is exactly the same as for timer
mode. How the daemon distinguishs between both modes?
Post by g***@linux.vnet.ibm.com
+
+ return 0;
+}
+
+static ssize_t hwsampler_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(oprofile_hwsampler, buf, count, offset);
+}
+
+static ssize_t hwsampler_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+
+ if (oprofile_hwsampler == val)
+ return -EINVAL;
+
+ retval = oprofile_set_hwsampler(val);
+
+ if (retval)
+ return retval;
+
+ oprofile_hwsampler = val;
+ return count;
+}
+
+static const struct file_operations hwsampler_fops = {
+ .read = hwsampler_read,
+ .write = hwsampler_write,
+};
+
+int oprofile_create_hwsampling_files(struct super_block *sb,
+ struct dentry *root)
This can be made static too.
Post by g***@linux.vnet.ibm.com
+{
+ struct dentry *hw_dir;
+
+ /* reinitialize default values */
+ oprofile_hwsampler = 1;
+
+ hw_dir = oprofilefs_mkdir(sb, root, "hwsampling");
+ if (!hw_dir)
+ return -EINVAL;
+
+ oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops);
+ oprofilefs_create_ulong(sb, hw_dir, "hw_interval",
+ &oprofile_hw_interval);
+ oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval",
+ &oprofile_min_interval);
+ oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval",
+ &oprofile_max_interval);
+ oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks",
+ &oprofile_sdbt_blocks);
+
+ return 0;
+}
Index: linux-2.6/drivers/oprofile/oprof.c
===================================================================
--- linux-2.6.orig/drivers/oprofile/oprof.c
+++ linux-2.6/drivers/oprofile/oprof.c
@@ -239,10 +239,43 @@ int oprofile_set_ulong(unsigned long *ad
return err;
}
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+int oprofile_set_hwsampler(unsigned long val)
+{
+ int err = 0;
+
+ mutex_lock(&start_mutex);
+
+ if (oprofile_started) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ switch (val) {
+ /* Switch to hardware sampling. */
+ __oprofile_timer_exit();
+ err = oprofile_arch_set_hwsampler(&oprofile_ops);
+ break;
+ printk(KERN_INFO "oprofile: using timer interrupt.\n");
+ err = __oprofile_timer_init(&oprofile_ops);
+ break;
Is there a use case for switching the mode at runtime? There are
kernel parameters to force timer mode while booting or loading the
module. I don't like exporting all these timer and hwsampler
functions. We could avoid this by making hwsampler architectural code
and leaving the timer code as it is.
Post by g***@linux.vnet.ibm.com
+ err = -EINVAL;
+ }
+
+ mutex_unlock(&start_mutex);
+ return err;
+}
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
+
static int __init oprofile_init(void)
{
int err;
+ memset(&oprofile_ops, 0, sizeof(oprofile_ops));
The struct is already initialized to 0.
Post by g***@linux.vnet.ibm.com
err = oprofile_arch_init(&oprofile_ops);
if (err < 0 || timer) {
printk(KERN_INFO "oprofile: using timer interrupt.\n");
@@ -250,6 +283,10 @@ static int __init oprofile_init(void)
if (err)
return err;
}
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ else if (err == 0)
+ oprofile_arch_set_hwsampler(&oprofile_ops);
I would like to see this in oprofile_arch_init().
Post by g***@linux.vnet.ibm.com
+#endif
return oprofilefs_register();
}
Index: linux-2.6/drivers/oprofile/oprof.h
===================================================================
--- linux-2.6.orig/drivers/oprofile/oprof.h
+++ linux-2.6/drivers/oprofile/oprof.h
@@ -35,7 +35,9 @@ struct dentry;
void oprofile_create_files(struct super_block *sb, struct dentry *root);
int oprofile_timer_init(struct oprofile_operations *ops);
+int __oprofile_timer_init(struct oprofile_operations *ops);
void oprofile_timer_exit(void);
+void __oprofile_timer_exit(void);
See my comments above, I don't want to export this.
Post by g***@linux.vnet.ibm.com
int oprofile_set_ulong(unsigned long *addr, unsigned long val);
int oprofile_set_timeout(unsigned long time);
Index: linux-2.6/drivers/oprofile/timer_int.c
===================================================================
--- linux-2.6.orig/drivers/oprofile/timer_int.c
+++ linux-2.6/drivers/oprofile/timer_int.c
@@ -97,14 +97,13 @@ static struct notifier_block __refdata o
.notifier_call = oprofile_cpu_notify,
};
-int __init oprofile_timer_init(struct oprofile_operations *ops)
+int __oprofile_timer_init(struct oprofile_operations *ops)
{
int rc;
rc = register_hotcpu_notifier(&oprofile_cpu_notifier);
if (rc)
return rc;
- ops->create_files = NULL;
ops->setup = NULL;
ops->shutdown = NULL;
ops->start = oprofile_hrtimer_start;
@@ -113,7 +112,18 @@ int __init oprofile_timer_init(struct op
return 0;
}
-void __exit oprofile_timer_exit(void)
+int __init oprofile_timer_init(struct oprofile_operations *ops)
+{
+ return __oprofile_timer_init(ops);
+}
+
+void __oprofile_timer_exit(void)
{
unregister_hotcpu_notifier(&oprofile_cpu_notifier);
}
+
+void __exit oprofile_timer_exit(void)
+{
+ __oprofile_timer_exit();
+}
+
Index: linux-2.6/include/linux/oprofile.h
===================================================================
--- linux-2.6.orig/include/linux/oprofile.h
+++ linux-2.6/include/linux/oprofile.h
@@ -89,6 +89,21 @@ int oprofile_arch_init(struct oprofile_o
*/
void oprofile_arch_exit(void);
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+/**
+ * setup hardware sampler for oprofiling.
+ */
+
+int oprofile_set_hwsampler(unsigned long);
+
+/**
+ * hardware sampler module initialization for the s390 arch
+ */
+
+int oprofile_arch_set_hwsampler(struct oprofile_operations *ops);
This is not generic code, there is no other architecture that may
reuse this. We should move this to arch/s390.
Post by g***@linux.vnet.ibm.com
+
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
+
/**
* Add a sample. This may be called from any context.
*/
Index: linux-2.6/arch/s390/oprofile/init.c
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/init.c
+++ linux-2.6/arch/s390/oprofile/init.c
@@ -11,16 +11,51 @@
#include <linux/oprofile.h>
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/fs.h>
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+#include <asm/hwsampler.h>
+
+extern int oprofile_create_hwsampling_files(struct super_block *sb,
+ struct dentry *root);
+
+extern unsigned long oprofile_min_interval;
+extern unsigned long oprofile_max_interval;
This becomes static if we move it to arch/s390/oprofile/hwsampler.c
(see below).
Post by g***@linux.vnet.ibm.com
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
int __init oprofile_arch_init(struct oprofile_operations* ops)
{
ops->backtrace = s390_backtrace;
+
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ if (hwsampler_setup())
+ return -ENODEV;
+
+ /*
+ * create hwsampler files only if hwsampler_setup() succeeds.
+ */
+ ops->create_files = oprofile_create_hwsampling_files;
+ oprofile_min_interval = hwsampler_query_min_interval();
+ if (oprofile_min_interval < 0) {
+ oprofile_min_interval = 0;
+ return -ENODEV;
+ }
+ oprofile_max_interval = hwsampler_query_max_interval();
+ if (oprofile_max_interval < 0) {
+ oprofile_max_interval = 0;
+ return -ENODEV;
+ }
+ return 0;
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
Move all the code for CONFIG_OPROFILE_HWSAMPLING_MODE in this file to

arch/s390/oprofile/hwsampler.c

and only export an oprofile_hwsampler_init() function. This can be an
empty function stub for the !CONFIG_OPROFILE_HWSAMPLING_MODE case.
Post by g***@linux.vnet.ibm.com
+
return -ENODEV;
}
void oprofile_arch_exit(void)
{
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ hwsampler_shutdown();
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
Same here...

-Robert
Post by g***@linux.vnet.ibm.com
}
--
Advanced Micro Devices, Inc.
Operating System Research Center
Heinz Graalfs
2011-01-19 16:55:28 UTC
Permalink
Robert, here is the 2nd part...

Heinz
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
OProfile is enhanced to export all files for controlling System z's hardware sampling,
and to invoke hwsampler exported functions to initialize and use System z's hardware sampling.
The patch invokes hwsampler_setup() during oprofile init and exports following
A new directory for hardware sampling based files
/dev/oprofile/hwsampling/
The userland daemon must explicitly write to the following files
to disable (or enable) hardware based sampling
/dev/oprofile/hwsampling/hwsampler
to modify the actual sampling rate
/dev/oprofile/hwsampling/hw_interval
to modify the amount of sampling memory (measured in 4K pages)
/dev/oprofile/hwsampling/hw_sdbt_blocks
The following files are read only and show
the possible minimum sampling rate
/dev/oprofile/hwsampling/hw_min_interval
the possible maximum sampling rate
/dev/oprofile/hwsampling/hw_max_interval
The patch splits the oprofile_timer_[init/exit] function so that it can be also called
through user context (oprofilefs) to avoid kernel oops.
---
arch/s390/oprofile/Makefile | 1
arch/s390/oprofile/hwsampler_files.c | 120 +++++++++++++++++++++++++++++++++++
I would rather see a file hwsampler.c here that contains all oprofile
hwsampler code in it and also sets up a struct oprofile_operations*
ops.
I added hwsampler.c and also kept the hwsampler_files.c
Post by Robert Richter
Doing so, most of global functions and variables below can be made
static.
Post by g***@linux.vnet.ibm.com
arch/s390/oprofile/init.c | 35 ++++++++++
We should find a better solution than changing all those files only
Post by g***@linux.vnet.ibm.com
drivers/oprofile/oprof.c | 37 ++++++++++
drivers/oprofile/oprof.h | 2
drivers/oprofile/timer_int.c | 16 +++-
include/linux/oprofile.h | 15 ++++
I want to see most of this in arch/s390.
mostly done, but we need to possibility to switch the modes...
see also below
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
7 files changed, 223 insertions(+), 3 deletions(-)
Index: linux-2.6/arch/s390/oprofile/Makefile
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/Makefile
+++ linux-2.6/arch/s390/oprofile/Makefile
@@ -8,6 +8,7 @@ DRIVER_OBJS = $(addprefix ../../../drive
timer_int.o )
oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-y += $(if $(CONFIG_HWSAMPLER), hwsampler_files.o,)
HW_SAMPLER_DRIVER_OBJS = $(addprefix ../../../drivers/s390/hwsampler/, \
hwsampler-main.o smpctl.o )
Index: linux-2.6/arch/s390/oprofile/hwsampler_files.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/s390/oprofile/hwsampler_files.c
@@ -0,0 +1,120 @@
+/**
+ * arch/s390/oprofile/hwsampler_files.c
+ *
+ * Copyright IBM Corp. 2010
+ */
+#include <linux/oprofile.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+
+#include <asm/hwsampler.h>
+
+#define DEFAULT_INTERVAL 4096
+
+#define DEFAULT_SDBT_BLOCKS 1
+#define DEFAULT_SDB_BLOCKS 511
+
+static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
+unsigned long oprofile_min_interval;
+unsigned long oprofile_max_interval;
This could be static.
OK, done
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+
+static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
+static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
+
+static unsigned long oprofile_hwsampler;
+
+static int oprofile_hwsampler_start(void)
+{
+ int retval;
+
+ printk(KERN_INFO "oprofile_hwsampler_start\n");
This looks like a debug msg.
OK, removed
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+
+ retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+ if (retval)
+ return retval;
+
+ retval = hwsampler_start_all(oprofile_hw_interval);
+
+ return retval;
+}
+
+static void oprofile_hwsampler_stop(void)
+{
+ printk(KERN_INFO "oprofile_hwsampler_stop\n");
Same here.
OK, removed
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+
+ hwsampler_stop_all();
+ hwsampler_deallocate();
+ return;
+}
+
+int oprofile_arch_set_hwsampler(struct oprofile_operations *ops)
+{
+ printk(KERN_INFO "oprofile: using hardware sampling\n");
+ ops->start = oprofile_hwsampler_start;
+ ops->stop = oprofile_hwsampler_stop;
+ ops->cpu_type = "timer";
Wouldn't it be better to have a different cpu_type string here, I
don't think the oprofilefs interface is exactly the same as for timer
mode. How the daemon distinguishs between both modes?
the user space daemon can live with this unchanged when the kernel is
configured for OProfile and CPUMF hardware sampling.

We plan to update the daemon with new options for hardware sampling also
and to prepare opreport for appropriate message headers.
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+
+ return 0;
+}
+
+static ssize_t hwsampler_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(oprofile_hwsampler, buf, count, offset);
+}
+
+static ssize_t hwsampler_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+
+ if (oprofile_hwsampler == val)
+ return -EINVAL;
+
+ retval = oprofile_set_hwsampler(val);
+
+ if (retval)
+ return retval;
+
+ oprofile_hwsampler = val;
+ return count;
+}
+
+static const struct file_operations hwsampler_fops = {
+ .read = hwsampler_read,
+ .write = hwsampler_write,
+};
+
+int oprofile_create_hwsampling_files(struct super_block *sb,
+ struct dentry *root)
This can be made static too.
OK, done
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+{
+ struct dentry *hw_dir;
+
+ /* reinitialize default values */
+ oprofile_hwsampler = 1;
+
+ hw_dir = oprofilefs_mkdir(sb, root, "hwsampling");
+ if (!hw_dir)
+ return -EINVAL;
+
+ oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops);
+ oprofilefs_create_ulong(sb, hw_dir, "hw_interval",
+ &oprofile_hw_interval);
+ oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval",
+ &oprofile_min_interval);
+ oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval",
+ &oprofile_max_interval);
+ oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks",
+ &oprofile_sdbt_blocks);
+
+ return 0;
+}
Index: linux-2.6/drivers/oprofile/oprof.c
===================================================================
--- linux-2.6.orig/drivers/oprofile/oprof.c
+++ linux-2.6/drivers/oprofile/oprof.c
@@ -239,10 +239,43 @@ int oprofile_set_ulong(unsigned long *ad
return err;
}
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+int oprofile_set_hwsampler(unsigned long val)
+{
+ int err = 0;
+
+ mutex_lock(&start_mutex);
+
+ if (oprofile_started) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ switch (val) {
+ /* Switch to hardware sampling. */
+ __oprofile_timer_exit();
+ err = oprofile_arch_set_hwsampler(&oprofile_ops);
+ break;
+ printk(KERN_INFO "oprofile: using timer interrupt.\n");
+ err = __oprofile_timer_init(&oprofile_ops);
+ break;
Is there a use case for switching the mode at runtime? There are
kernel parameters to force timer mode while booting or loading the
module. I don't like exporting all these timer and hwsampler
functions. We could avoid this by making hwsampler architectural code
and leaving the timer code as it is.
hardware sampling might have a need for lots of kernel memory, which
might not be available in all circumstances. It was nice if one could
switch to timer based sampling in such a case.
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+ err = -EINVAL;
+ }
+
+ mutex_unlock(&start_mutex);
+ return err;
+}
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
+
static int __init oprofile_init(void)
{
int err;
+ memset(&oprofile_ops, 0, sizeof(oprofile_ops));
The struct is already initialized to 0.
OK, statement removed
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
err = oprofile_arch_init(&oprofile_ops);
if (err < 0 || timer) {
printk(KERN_INFO "oprofile: using timer interrupt.\n");
@@ -250,6 +283,10 @@ static int __init oprofile_init(void)
if (err)
return err;
}
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ else if (err == 0)
+ oprofile_arch_set_hwsampler(&oprofile_ops);
I would like to see this in oprofile_arch_init().
OK, done
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+#endif
return oprofilefs_register();
}
Index: linux-2.6/drivers/oprofile/oprof.h
===================================================================
--- linux-2.6.orig/drivers/oprofile/oprof.h
+++ linux-2.6/drivers/oprofile/oprof.h
@@ -35,7 +35,9 @@ struct dentry;
void oprofile_create_files(struct super_block *sb, struct dentry *root);
int oprofile_timer_init(struct oprofile_operations *ops);
+int __oprofile_timer_init(struct oprofile_operations *ops);
void oprofile_timer_exit(void);
+void __oprofile_timer_exit(void);
See my comments above, I don't want to export this.
Post by g***@linux.vnet.ibm.com
int oprofile_set_ulong(unsigned long *addr, unsigned long val);
int oprofile_set_timeout(unsigned long time);
Index: linux-2.6/drivers/oprofile/timer_int.c
===================================================================
--- linux-2.6.orig/drivers/oprofile/timer_int.c
+++ linux-2.6/drivers/oprofile/timer_int.c
@@ -97,14 +97,13 @@ static struct notifier_block __refdata o
.notifier_call = oprofile_cpu_notify,
};
-int __init oprofile_timer_init(struct oprofile_operations *ops)
+int __oprofile_timer_init(struct oprofile_operations *ops)
{
int rc;
rc = register_hotcpu_notifier(&oprofile_cpu_notifier);
if (rc)
return rc;
- ops->create_files = NULL;
ops->setup = NULL;
ops->shutdown = NULL;
ops->start = oprofile_hrtimer_start;
@@ -113,7 +112,18 @@ int __init oprofile_timer_init(struct op
return 0;
}
-void __exit oprofile_timer_exit(void)
+int __init oprofile_timer_init(struct oprofile_operations *ops)
+{
+ return __oprofile_timer_init(ops);
+}
+
+void __oprofile_timer_exit(void)
{
unregister_hotcpu_notifier(&oprofile_cpu_notifier);
}
+
+void __exit oprofile_timer_exit(void)
+{
+ __oprofile_timer_exit();
+}
+
Index: linux-2.6/include/linux/oprofile.h
===================================================================
--- linux-2.6.orig/include/linux/oprofile.h
+++ linux-2.6/include/linux/oprofile.h
@@ -89,6 +89,21 @@ int oprofile_arch_init(struct oprofile_o
*/
void oprofile_arch_exit(void);
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+/**
+ * setup hardware sampler for oprofiling.
+ */
+
+int oprofile_set_hwsampler(unsigned long);
+
+/**
+ * hardware sampler module initialization for the s390 arch
+ */
+
+int oprofile_arch_set_hwsampler(struct oprofile_operations *ops);
This is not generic code, there is no other architecture that may
reuse this. We should move this to arch/s390.
Hmm, we need it to be able to switch modes.
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
+
/**
* Add a sample. This may be called from any context.
*/
Index: linux-2.6/arch/s390/oprofile/init.c
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/init.c
+++ linux-2.6/arch/s390/oprofile/init.c
@@ -11,16 +11,51 @@
#include <linux/oprofile.h>
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/fs.h>
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+#include <asm/hwsampler.h>
+
+extern int oprofile_create_hwsampling_files(struct super_block *sb,
+ struct dentry *root);
+
+extern unsigned long oprofile_min_interval;
+extern unsigned long oprofile_max_interval;
This becomes static if we move it to arch/s390/oprofile/hwsampler.c
(see below).
done
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
int __init oprofile_arch_init(struct oprofile_operations* ops)
{
ops->backtrace = s390_backtrace;
+
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ if (hwsampler_setup())
+ return -ENODEV;
+
+ /*
+ * create hwsampler files only if hwsampler_setup() succeeds.
+ */
+ ops->create_files = oprofile_create_hwsampling_files;
+ oprofile_min_interval = hwsampler_query_min_interval();
+ if (oprofile_min_interval < 0) {
+ oprofile_min_interval = 0;
+ return -ENODEV;
+ }
+ oprofile_max_interval = hwsampler_query_max_interval();
+ if (oprofile_max_interval < 0) {
+ oprofile_max_interval = 0;
+ return -ENODEV;
+ }
+ return 0;
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
Move all the code for CONFIG_OPROFILE_HWSAMPLING_MODE in this file to
arch/s390/oprofile/hwsampler.c
and only export an oprofile_hwsampler_init() function. This can be an
empty function stub for the !CONFIG_OPROFILE_HWSAMPLING_MODE case.
done
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+
return -ENODEV;
}
void oprofile_arch_exit(void)
{
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ hwsampler_shutdown();
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
Same here...
done
Post by Robert Richter
-Robert
Post by g***@linux.vnet.ibm.com
}
g***@linux.vnet.ibm.com
2010-12-20 13:05:42 UTC
Permalink
From: ***@linux.vnet.ibm.com

The CPU Measurement Facility CPUMF is described in the z/Architecture Principles of Operation.

The patch introduces
- a new configuration option OPROFILE_HWSAMPLING_MODE
- a new kernel module hwsampler that controls all hardware sampling related operations as
- checking if hardware sampling feature is available
- ie: on System z models z10 and up, in LPAR mode only, and authorised during LPAR activation
- allocating memory for the hardware sampling feature
- starting/stopping hardware sampling
The hwsampler module will also depend on CONFIG_OPROFILE and CONFIG_64BIT.

All functions required to start and stop hardware sampling have to be
invoked by the oprofile kernel module as provided by the other patches of this patch set.

In case hardware based sampling cannot be setup standard timer based sampling is used by OProfile.

Signed-off-by: Mahesh Salgaonkar <***@linux.vnet.ibm.com>
Signed-off-by: Maran Pakkirisamy <***@linux.vnet.ibm.com>
Signed-off-by: Heinz Graalfs <***@linux.vnet.ibm.com>
---
arch/s390/Kconfig | 22
arch/s390/include/asm/hwsampler.h | 130 +++
arch/s390/oprofile/Makefile | 6
drivers/s390/hwsampler/hwsampler-main.c | 1155 ++++++++++++++++++++++++++++++++
drivers/s390/hwsampler/smpctl.c | 170 ++++
5 files changed, 1483 insertions(+)

Index: linux-2.6/arch/s390/include/asm/hwsampler.h
===================================================================
--- /dev/null
+++ linux-2.6/arch/s390/include/asm/hwsampler.h
@@ -0,0 +1,130 @@
+/*
+ * CPUMF HW sampler structures and prototypes
+ *
+ * Copyright IBM Corp. 2010
+ * Author(s): Heinz Graalfs <***@de.ibm.com>
+ */
+
+#ifndef HWSAMPLER_H_
+#define HWSAMPLER_H_
+
+#include <linux/workqueue.h>
+
+struct qsi_info_block /* QUERY SAMPLING information block */
+{ /* Bit(s) */
+ unsigned int b0_13:14; /* 0-13: zeros */
+ unsigned int as:1; /* 14: sampling authorisation control*/
+ unsigned int b15_21:7; /* 15-21: zeros */
+ unsigned int es:1; /* 22: sampling enable control */
+ unsigned int b23_29:7; /* 23-29: zeros */
+ unsigned int cs:1; /* 30: sampling activation control */
+ unsigned int:1; /* 31: reserved */
+ unsigned int bsdes:16; /* 4-5: size of sampling entry */
+ unsigned int:16; /* 6-7: reserved */
+ unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+ unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+ unsigned long tear; /* 24-31: TEAR contents */
+ unsigned long dear; /* 32-39: DEAR contents */
+ unsigned int rsvrd0; /* 40-43: reserved */
+ unsigned int cpu_speed; /* 44-47: CPU speed */
+ unsigned long long rsvrd1; /* 48-55: reserved */
+ unsigned long long rsvrd2; /* 56-63: reserved */
+};
+
+struct ssctl_request_block /* SET SAMPLING CONTROLS req block */
+{ /* bytes 0 - 7 Bit(s) */
+ unsigned int s:1; /* 0: maximum buffer indicator */
+ unsigned int h:1; /* 1: part. level reserved for VM use*/
+ unsigned long b2_53:52; /* 2-53: zeros */
+ unsigned int es:1; /* 54: sampling enable control */
+ unsigned int b55_61:7; /* 55-61: - zeros */
+ unsigned int cs:1; /* 62: sampling activation control */
+ unsigned int b63:1; /* 63: zero */
+ unsigned long interval; /* 8-15: sampling interval */
+ unsigned long tear; /* 16-23: TEAR contents */
+ unsigned long dear; /* 24-31: DEAR contents */
+ /* 32-63: */
+ unsigned long rsvrd1; /* reserved */
+ unsigned long rsvrd2; /* reserved */
+ unsigned long rsvrd3; /* reserved */
+ unsigned long rsvrd4; /* reserved */
+};
+
+typedef void oprf_add_sample_func(unsigned long pc,
+ struct pt_regs * const regs,
+ unsigned long event, int is_kernel,
+ struct task_struct *task);
+
+struct cpu_buffer {
+ unsigned long first_sdbt; /* @ of 1st SDB-Table for this CP*/
+ unsigned long worker_entry;
+ unsigned long sample_overflow; /* taken from SDB ... */
+ struct qsi_info_block qsi;
+ struct ssctl_request_block ssctl;
+ struct work_struct worker;
+ oprf_add_sample_func *add_sample_f;
+ atomic_t ext_params;
+ unsigned long req_alert;
+ unsigned long loss_of_sample_data;
+ unsigned long invalid_entry_address;
+ unsigned long incorrect_sdbt_entry;
+ unsigned long sample_auth_change_alert;
+ unsigned int finish:1;
+ unsigned int oom:1;
+ unsigned int stop_mode:1;
+};
+
+struct data_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:4; /* 16-19 reserved */
+ unsigned int U:4; /* 20-23 Number of unique instruct. */
+ unsigned int z:2; /* zeros */
+ unsigned int T:1; /* 26 PSW DAT mode */
+ unsigned int W:1; /* 27 PSW wait state */
+ unsigned int P:1; /* 28 PSW Problem state */
+ unsigned int AS:2; /* 29-30 PSW address-space control */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ unsigned int:16;
+ unsigned int prim_asn:16; /* primary ASN */
+ unsigned long long ia; /* Instruction Address */
+ unsigned long long lpp; /* Logical-Partition Program Param. */
+ unsigned long long vpp; /* Virtual-Machine Program Param. */
+};
+
+struct trailer_entry {
+ unsigned int f:1; /* 0 - Block Full Indicator */
+ unsigned int a:1; /* 1 - Alert request control */
+ unsigned long:62; /* 2 - 63: Reserved */
+ unsigned long overflow; /* 64 - sample Overflow count */
+ unsigned long timestamp; /* 16 - time-stamp */
+ unsigned long timestamp1; /* */
+ unsigned long reserved1; /* 32 -Reserved */
+ unsigned long reserved2; /* */
+ unsigned long progusage1; /* 48 - reserved for programming use */
+ unsigned long progusage2; /* */
+};
+
+int hwsampler_setup(void);
+int hwsampler_shutdown(void);
+int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
+int hwsampler_deallocate(void);
+long hwsampler_query_min_interval(void);
+long hwsampler_query_max_interval(void);
+int hwsampler_start_all(unsigned long interval);
+int hwsampler_stop_all(void);
+int hwsampler_deactivate(unsigned int cpu);
+int hwsampler_activate(unsigned int cpu);
+unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu);
+
+int smp_ctl_qsi(int);
+int smp_ctl_ssctl_deactivate(int);
+int smp_ctl_ssctl_stop(int);
+int smp_ctl_ssctl_enable_activate(int, unsigned long);
+
+int qsi(void *);
+
+void execute_qsi(void *);
+void execute_ssctl(void *);
+
+#endif /*HWSAMPLER_H_*/
+
Index: linux-2.6/drivers/s390/hwsampler/hwsampler-main.c
===================================================================
--- /dev/null
+++ linux-2.6/drivers/s390/hwsampler/hwsampler-main.c
@@ -0,0 +1,1155 @@
+/**
+ * drivers/s390/hwsampler/hwsampler-main.c
+ *
+ * Copyright IBM Corp. 2010
+ * Author: Heinz Graalfs <***@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/semaphore.h>
+#include <linux/oom.h>
+
+#include <asm/lowcore.h>
+#include <asm/s390_ext.h>
+
+#include "asm/hwsampler.h"
+
+#define MAX_NUM_SDB 511
+#define MIN_NUM_SDB 1
+
+#define ALERT_REQ_MASK 0x4000000000000000ul
+#define BUFFER_FULL_MASK 0x8000000000000000ul
+
+#define EI_IEA (1 << 31) /* invalid entry address */
+#define EI_ISE (1 << 30) /* incorrect SDBT entry */
+#define EI_PRA (1 << 29) /* program request alert */
+#define EI_SACA (1 << 23) /* sampler authorization change alert */
+#define EI_LSDA (1 << 22) /* loss of sample data alert */
+
+static inline unsigned long *trailer_entry_ptr(unsigned long v)
+{
+ void *ret;
+
+ ret = (void *)v;
+ ret += PAGE_SIZE;
+ ret -= sizeof(struct trailer_entry);
+
+ return (unsigned long *) ret;
+}
+
+DEFINE_PER_CPU(struct cpu_buffer, hws_cpu_buffer);
+EXPORT_PER_CPU_SYMBOL(hws_cpu_buffer);
+
+static struct workqueue_struct *hws_wq;
+
+static DEFINE_MUTEX(hws_sem);
+static DEFINE_MUTEX(hws_sem_oom);
+
+static unsigned char hws_flush_all;
+
+static unsigned int hws_oom;
+
+static unsigned int hws_state;
+enum {
+ HWS_INIT = 1,
+ HWS_DEALLOCATED,
+ HWS_STOPPED,
+ HWS_STARTED,
+ HWS_STOPPING };
+
+/* set to 1 if called by kernel during memory allocation */
+static unsigned char oom_killer_was_active;
+/* size of SDBT and SDB as of allocate API */
+static unsigned long num_sdbt = 100;
+static unsigned long num_sdb = 511;
+/* sampling interval (machine cycles) */
+static unsigned long interval;
+
+static unsigned long min_sampler_rate;
+static unsigned long max_sampler_rate;
+
+/* prototypes for external interrupt handler and worker */
+static void hws_ext_handler(unsigned int ext_int_code,
+ unsigned int param32, unsigned long param64);
+
+static void worker(struct work_struct *work);
+
+static void add_samples_to_oprofile(unsigned cpu, unsigned long *,
+ oprf_add_sample_func *,
+ unsigned long *dear);
+
+static void init_all_cpu_buffers(void)
+{
+ int cpu;
+ struct cpu_buffer *cb;
+
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ memset(cb, 0, sizeof(struct cpu_buffer));
+ }
+}
+
+static int is_link_entry(unsigned long *s)
+{
+ return *s & 0x1ul ? 1 : 0;
+}
+
+static unsigned long *get_next_sdbt(unsigned long *s)
+{
+ return (unsigned long *) (*s & ~0x1ul);
+}
+
+static int prepare_cpu_buffers(void)
+{
+ int cpu;
+ int rc;
+ struct cpu_buffer *cb;
+
+ rc = 0;
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ atomic_set(&cb->ext_params, 0);
+ cb->worker_entry = 0;
+ cb->sample_overflow = 0;
+ cb->req_alert = 0;
+ cb->incorrect_sdbt_entry = 0;
+ cb->invalid_entry_address = 0;
+ cb->loss_of_sample_data = 0;
+ cb->sample_auth_change_alert = 0;
+ cb->finish = 0;
+ cb->oom = 0;
+ cb->stop_mode = 0;
+ cb->add_sample_f =
+ __symbol_get("oprofile_add_ext_hw_sample");
+ if (!cb->add_sample_f) {
+ printk(KERN_ERR
+ "hwsampler: failed to get oprofile_add_ext_hw_sample"
+ " function symbol");
+ rc = -EINVAL;
+ }
+ }
+
+ return rc;
+}
+
+/*
+ * allocate_sdbt() - allocate sampler memory
+ * @cpu: the cpu for which sampler memory is allocated
+ *
+ * A 4K page is allocated for each requested SDBT.
+ * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs.
+ * Set ALERT_REQ mask in each SDBs trailer.
+ * Returns zero if successful, <0 otherwise.
+ */
+static int allocate_sdbt(int cpu)
+{
+ int j, k, rc;
+ unsigned long *sdbt;
+ unsigned long sdb;
+ unsigned long *tail;
+ unsigned long *trailer;
+ struct cpu_buffer *cb;
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+
+ if (cb->first_sdbt)
+ return -EINVAL;
+
+ sdbt = NULL;
+ tail = sdbt;
+
+ for (j = 0; j < num_sdbt; j++) {
+ sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+
+ mutex_lock(&hws_sem_oom);
+ /* OOM killer might have been activated */
+ barrier();
+ if (oom_killer_was_active || !sdbt) {
+ if (sdbt)
+ free_page((unsigned long)sdbt);
+
+ goto allocate_sdbt_error;
+ }
+ if (cb->first_sdbt == 0)
+ cb->first_sdbt = (unsigned long)sdbt;
+
+ /* link current page to tail of chain */
+ if (tail)
+ *tail = (unsigned long)(void *)sdbt + 1;
+
+ mutex_unlock(&hws_sem_oom);
+
+ for (k = 0; k < num_sdb; k++) {
+ /* get and set SDB page */
+ sdb = get_zeroed_page(GFP_KERNEL);
+
+ mutex_lock(&hws_sem_oom);
+ /* OOM killer might have been activated */
+ barrier();
+ if (oom_killer_was_active || !sdb) {
+ if (sdb)
+ free_page(sdb);
+
+ goto allocate_sdbt_error;
+ }
+ *sdbt = sdb;
+ trailer = trailer_entry_ptr(*sdbt);
+ *trailer = ALERT_REQ_MASK;
+ sdbt++;
+ mutex_unlock(&hws_sem_oom);
+ }
+ tail = sdbt;
+ }
+ mutex_lock(&hws_sem_oom);
+ if (oom_killer_was_active)
+ goto allocate_sdbt_error;
+
+ rc = 0;
+ if (tail)
+ *tail = (unsigned long)
+ ((void *)cb->first_sdbt) + 1;
+
+allocate_sdbt_exit:
+ mutex_unlock(&hws_sem_oom);
+ return rc;
+
+allocate_sdbt_error:
+ rc = -ENOMEM;
+ goto allocate_sdbt_exit;
+}
+
+/*
+ * deallocate_sdbt() - deallocate all sampler memory
+ *
+ * For each online CPU all SDBT trees are deallocated.
+ * Returns the number of freed pages.
+ */
+static int deallocate_sdbt(void)
+{
+ int cpu;
+ int counter;
+
+ counter = 0;
+
+ for_each_online_cpu(cpu) {
+ unsigned long start;
+ unsigned long sdbt;
+ unsigned long *curr;
+ struct cpu_buffer *cb;
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+
+ if (!cb->first_sdbt)
+ continue;
+
+ sdbt = cb->first_sdbt;
+ curr = (unsigned long *) sdbt;
+ start = sdbt;
+
+ /* we'll free the SDBT after all SDBs are processed... */
+ while (1) {
+ if (!*curr || !sdbt)
+ break;
+
+ /* watch for link entry reset if found */
+ if (is_link_entry(curr)) {
+ curr = get_next_sdbt(curr);
+ if (sdbt)
+ free_page(sdbt);
+
+ /* we are done if we reach the start */
+ if ((unsigned long) curr == start)
+ break;
+ else
+ sdbt = (unsigned long) curr;
+ } else {
+ /* process SDB pointer */
+ if (*curr) {
+ free_page(*curr);
+ curr++;
+ }
+ }
+ counter++;
+ }
+ cb->first_sdbt = 0;
+ }
+ return counter;
+}
+
+static int start_sampling(int cpu)
+{
+ int rc;
+ struct cpu_buffer *cb;
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ rc = smp_ctl_ssctl_enable_activate(cpu, interval);
+ if (rc) {
+ printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu);
+ goto start_exit;
+ }
+
+ rc = -EINVAL;
+ if (!cb->qsi.es) {
+ printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu);
+ goto start_exit;
+ }
+
+ if (!cb->qsi.cs) {
+ printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu);
+ goto start_exit;
+ }
+
+ printk(KERN_INFO
+ "hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n",
+ cpu, interval);
+
+ rc = 0;
+
+start_exit:
+ return rc;
+}
+
+static int stop_sampling(int cpu)
+{
+ unsigned long v;
+ int rc;
+ struct cpu_buffer *cb;
+
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ if (!rc && !cb->qsi.es)
+ printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu);
+
+ rc = smp_ctl_ssctl_stop(cpu);
+ if (rc) {
+ printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n",
+ cpu, rc);
+ goto stop_exit;
+ }
+
+ printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu);
+
+stop_exit:
+ v = cb->req_alert;
+ if (v)
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert,"
+ " count=%lu.\n", cpu, v);
+
+ v = cb->loss_of_sample_data;
+ if (v)
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data,"
+ " count=%lu.\n", cpu, v);
+
+ v = cb->invalid_entry_address;
+ if (v)
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address,"
+ " count=%lu.\n", cpu, v);
+
+ v = cb->incorrect_sdbt_entry;
+ if (v)
+ printk(KERN_ERR
+ "hwsampler: CPU %d CPUMF Incorrect SDBT address,"
+ " count=%lu.\n", cpu, v);
+
+ v = cb->sample_auth_change_alert;
+ if (v)
+ printk(KERN_ERR
+ "hwsampler: CPU %d CPUMF Sample authorization change,"
+ " count=%lu.\n", cpu, v);
+
+ return rc;
+}
+
+static int check_hardware_prerequisites(void)
+{
+ unsigned long long facility_bits[2];
+
+ memcpy(facility_bits, S390_lowcore.stfle_fac_list, 32);
+ if (!(facility_bits[1] & (1ULL << 59)))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+/*
+ * hws_oom_callback() - the OOM callback function
+ *
+ * In case the callback is invoked during memory allocation for the
+ * hw sampler, all obtained memory is deallocated and a flag is set
+ * so main sampler memory allocation can exit with a failure code.
+ * In case the callback is invoked during sampling the hw sampler
+ * is deactivated for all CPUs.
+ */
+static int hws_oom_callback(struct notifier_block *nfb,
+ unsigned long dummy, void *parm)
+{
+ unsigned long *freed;
+ int cpu;
+ struct cpu_buffer *cb;
+
+ freed = parm;
+
+ mutex_lock(&hws_sem_oom);
+
+ if (hws_state == HWS_DEALLOCATED) {
+ /* during memory allocation */
+ if (oom_killer_was_active == 0) {
+ oom_killer_was_active = 1;
+ *freed += deallocate_sdbt();
+ }
+ } else {
+ int i;
+ cpu = get_cpu();
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+
+ if (!cb->oom) {
+ for_each_online_cpu(i) {
+ smp_ctl_ssctl_deactivate(i);
+ cb->oom = 1;
+ }
+ cb->finish = 1;
+
+ printk(KERN_INFO
+ "hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n",
+ cpu);
+ }
+ }
+
+ mutex_unlock(&hws_sem_oom);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block hws_oom_notifier = {
+ .notifier_call = hws_oom_callback
+};
+
+static int __cpuinit hws_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ /* We do not have sampler space available for all possible CPUs.
+ All CPUs should be online when hw sampling is activated. */
+ return NOTIFY_BAD;
+}
+
+static struct notifier_block hws_cpu_notifier = {
+ .notifier_call = hws_cpu_callback
+};
+
+/**
+ * hwsampler_deactivate() - set hardware sampling temporarily inactive
+ * @cpu: specifies the CPU to be set inactive.
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_deactivate(unsigned int cpu)
+{
+ /*
+ * Deactivate hw sampling temporarily and flush the buffer
+ * by pushing all the pending samples to oprofile buffer.
+ *
+ * This function can be called under one of the following conditions:
+ * Memory unmap, task is exiting.
+ */
+ int rc;
+ struct cpu_buffer *cb;
+
+ rc = 0;
+ mutex_lock(&hws_sem);
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ if (hws_state == HWS_STARTED) {
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+ if (cb->qsi.cs) {
+ rc = smp_ctl_ssctl_deactivate(cpu);
+ if (rc) {
+ printk(KERN_INFO
+ "hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu);
+ cb->finish = 1;
+ hws_state = HWS_STOPPING;
+ } else {
+ hws_flush_all = 1;
+ /* Add work to queue to read pending samples.*/
+ queue_work_on(cpu, hws_wq, &cb->worker);
+ }
+ }
+ }
+ mutex_unlock(&hws_sem);
+
+ if (hws_wq)
+ flush_workqueue(hws_wq);
+
+ return rc;
+}
+EXPORT_SYMBOL(hwsampler_deactivate);
+
+/**
+ * hwsampler_activate() - activate/resume hardware sampling which was deactivated
+ * @cpu: specifies the CPU to be set active.
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_activate(unsigned int cpu)
+{
+ /*
+ * Re-activate hw sampling. This should be called in pair with
+ * hwsampler_deactivate().
+ */
+ int rc;
+ struct cpu_buffer *cb;
+
+ rc = 0;
+ mutex_lock(&hws_sem);
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ if (hws_state == HWS_STARTED) {
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+ if (!cb->qsi.cs) {
+ hws_flush_all = 0;
+ rc = smp_ctl_ssctl_enable_activate(cpu, interval);
+ if (rc) {
+ printk(KERN_ERR
+ "CPU %d, CPUMF activate sampling failed.\n",
+ cpu);
+ }
+ }
+ }
+
+ mutex_unlock(&hws_sem);
+
+ return rc;
+}
+EXPORT_SYMBOL(hwsampler_activate);
+
+static void hws_ext_handler(unsigned int ext_int_code,
+ unsigned int param32, unsigned long param64)
+{
+ int cpu;
+ struct cpu_buffer *cb;
+
+ cpu = smp_processor_id();
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+
+ atomic_xchg(
+ &cb->ext_params,
+ atomic_read(&cb->ext_params)
+ | S390_lowcore.ext_params);
+
+ if (hws_wq)
+ queue_work(hws_wq, &cb->worker);
+}
+
+static int check_qsi_on_setup(void)
+{
+ int rc;
+ unsigned int cpu;
+ struct cpu_buffer *cb;
+
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+ if (rc)
+ return -EOPNOTSUPP;
+
+ if (!cb->qsi.as) {
+ printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n");
+ return -EINVAL;
+ }
+
+ if (cb->qsi.es) {
+ printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n");
+ rc = smp_ctl_ssctl_stop(cpu);
+ if (rc)
+ return -EINVAL;
+
+ printk(KERN_INFO
+ "CPU %d, CPUMF Sampling stopped now.\n", cpu);
+ }
+ }
+ return 0;
+}
+
+static int check_qsi_on_start(void)
+{
+ unsigned int cpu;
+ int rc;
+ struct cpu_buffer *cb;
+
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+
+ if (!cb->qsi.as)
+ return -EINVAL;
+
+ if (cb->qsi.es)
+ return -EINVAL;
+
+ if (cb->qsi.cs)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void worker_on_start(unsigned int cpu)
+{
+ struct cpu_buffer *cb;
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ cb->worker_entry = cb->first_sdbt;
+}
+
+static int worker_check_error(unsigned int cpu, int ext_params)
+{
+ int rc;
+ unsigned long *sdbt;
+ struct cpu_buffer *cb;
+
+ rc = 0;
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ sdbt = (unsigned long *) cb->worker_entry;
+
+ if (!sdbt || !*sdbt)
+ return -EINVAL;
+
+ if (ext_params & EI_IEA)
+ cb->req_alert++;
+
+ if (ext_params & EI_LSDA)
+ cb->loss_of_sample_data++;
+
+ if (ext_params & EI_IEA) {
+ cb->invalid_entry_address++;
+ rc = -EINVAL;
+ }
+
+ if (ext_params & EI_ISE) {
+ cb->incorrect_sdbt_entry++;
+ rc = -EINVAL;
+ }
+
+ if (ext_params & EI_SACA) {
+ cb->sample_auth_change_alert++;
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+static void worker_on_finish(unsigned int cpu)
+{
+ int rc, i;
+ struct cpu_buffer *cb;
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+
+ if (cb->finish) {
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+ if (cb->qsi.es) {
+ printk(KERN_INFO
+ "hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n",
+ cpu);
+ rc = smp_ctl_ssctl_stop(cpu);
+ if (rc)
+ printk(KERN_INFO
+ "hwsampler: CPU %d, CPUMF Deactivation failed.\n",
+ cpu);
+
+ for_each_online_cpu(i) {
+ if (i == cpu)
+ continue;
+ if (!cb->finish) {
+ cb->finish = 1;
+ queue_work_on(i, hws_wq,
+ &cb->worker);
+ }
+ }
+ }
+ }
+}
+
+static void worker_on_interrupt(unsigned int cpu)
+{
+ unsigned long *sdbt;
+ unsigned char done;
+ struct cpu_buffer *cb;
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+
+ sdbt = (unsigned long *) cb->worker_entry;
+
+ done = 0;
+ /* do not proceed if stop was entered,
+ * forget the buffers not yet processed */
+ while (!done && !cb->stop_mode) {
+ unsigned long *trailer;
+ struct trailer_entry *te;
+ unsigned long *dear = 0;
+
+ trailer = trailer_entry_ptr(*sdbt);
+ /* leave loop if no more work to do */
+ if (!(*trailer & BUFFER_FULL_MASK)) {
+ done = 1;
+ if (!hws_flush_all)
+ continue;
+ }
+
+ te = (struct trailer_entry *)trailer;
+ cb->sample_overflow += te->overflow;
+
+ add_samples_to_oprofile(cpu, sdbt,
+ cb->add_sample_f, dear);
+
+ /* reset trailer */
+ xchg((unsigned char *) te, 0x40);
+
+ /* advance to next sdb slot in current sdbt */
+ sdbt++;
+ /* in case link bit is set use address w/o link bit */
+ if (is_link_entry(sdbt))
+ sdbt = get_next_sdbt(sdbt);
+
+ cb->worker_entry = (unsigned long)sdbt;
+ }
+}
+
+static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
+ oprf_add_sample_func *add_sample_f,
+ unsigned long *dear)
+{
+ struct data_entry *sample_data_ptr;
+ unsigned long *trailer;
+
+ trailer = trailer_entry_ptr(*sdbt);
+ if (dear) {
+ if (dear > trailer)
+ return;
+ trailer = dear;
+ }
+
+ sample_data_ptr = (struct data_entry *)(*sdbt);
+
+ while ((unsigned long *)sample_data_ptr < trailer) {
+ struct pt_regs *regs = NULL;
+ struct task_struct *tsk = NULL;
+
+ /*
+ * Check sampling mode, 1 indicates basic (=customer) sampling
+ * mode.
+ */
+ if (sample_data_ptr->def != 1) {
+ /* sample slot is not yet written */
+ break;
+ } else {
+ /* make sure we don't use it twice,
+ * the next time the sampler will set it again */
+ sample_data_ptr->def = 0;
+ }
+
+ /* Get pt_regs. */
+ if (sample_data_ptr->P == 1) {
+ /* userspace sample */
+ unsigned int pid = sample_data_ptr->prim_asn;
+ rcu_read_lock();
+ tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
+ if (tsk)
+ regs = task_pt_regs(tsk);
+ rcu_read_unlock();
+ } else {
+ /* kernelspace sample */
+ regs = task_pt_regs(current);
+ }
+
+ mutex_lock(&hws_sem);
+ (*add_sample_f)(sample_data_ptr->ia, regs, 0,
+ !sample_data_ptr->P, tsk);
+ mutex_unlock(&hws_sem);
+
+ sample_data_ptr++;
+ }
+}
+
+static void worker(struct work_struct *work)
+{
+ unsigned int cpu;
+ int ext_params;
+ struct cpu_buffer *cb;
+
+ cb = container_of(work, struct cpu_buffer, worker);
+ cpu = smp_processor_id();
+ ext_params = atomic_xchg(&cb->ext_params, 0);
+
+ if (!cb->worker_entry)
+ worker_on_start(cpu);
+
+ if (worker_check_error(cpu, ext_params))
+ return;
+
+ if (!cb->finish)
+ worker_on_interrupt(cpu);
+
+ if (cb->finish)
+ worker_on_finish(cpu);
+}
+
+/**
+ * hwsampler_allocate() - allocate memory for the hardware sampler
+ * @sdbt: number of SDBTs per online CPU (must be > 0)
+ * @sdb: number of SDBs per SDBT (minimum 1, maximum 511)
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_allocate(unsigned long sdbt, unsigned long sdb)
+{
+ int cpu, rc;
+ mutex_lock(&hws_sem);
+
+ rc = -EINVAL;
+ if (hws_state != HWS_DEALLOCATED)
+ goto allocate_exit;
+
+ if (sdbt < 1)
+ goto allocate_exit;
+
+ if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB)
+ goto allocate_exit;
+
+ num_sdbt = sdbt;
+ num_sdb = sdb;
+
+ oom_killer_was_active = 0;
+ register_oom_notifier(&hws_oom_notifier);
+
+ for_each_online_cpu(cpu) {
+ if (allocate_sdbt(cpu)) {
+ unregister_oom_notifier(&hws_oom_notifier);
+ goto allocate_error;
+ }
+ }
+ unregister_oom_notifier(&hws_oom_notifier);
+ if (oom_killer_was_active)
+ goto allocate_error;
+
+ hws_state = HWS_STOPPED;
+ rc = 0;
+
+allocate_exit:
+ mutex_unlock(&hws_sem);
+ return rc;
+
+allocate_error:
+ rc = -ENOMEM;
+ printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n");
+ goto allocate_exit;
+}
+EXPORT_SYMBOL(hwsampler_allocate);
+
+/**
+ * hwsampler_deallocate() - deallocate hardware sampler memory
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_deallocate()
+{
+ int rc;
+
+ mutex_lock(&hws_sem);
+
+ rc = -EINVAL;
+ if (hws_state != HWS_STOPPED)
+ goto deallocate_exit;
+
+ smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */
+ deallocate_sdbt();
+
+ hws_state = HWS_DEALLOCATED;
+ rc = 0;
+
+deallocate_exit:
+ mutex_unlock(&hws_sem);
+
+ return rc;
+}
+EXPORT_SYMBOL(hwsampler_deallocate);
+
+long hwsampler_query_min_interval(void)
+{
+ if (min_sampler_rate)
+ return min_sampler_rate;
+ else
+ return -EINVAL;
+}
+EXPORT_SYMBOL(hwsampler_query_min_interval);
+
+long hwsampler_query_max_interval(void)
+{
+ if (max_sampler_rate)
+ return max_sampler_rate;
+ else
+ return -EINVAL;
+}
+EXPORT_SYMBOL(hwsampler_query_max_interval);
+
+unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu)
+{
+ struct cpu_buffer *cb;
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+
+ return cb->sample_overflow;
+}
+EXPORT_SYMBOL(hwsampler_get_sample_overflow_count);
+
+int hwsampler_setup()
+{
+ int rc;
+ int cpu;
+ struct cpu_buffer *cb;
+
+ mutex_lock(&hws_sem);
+
+ rc = -EINVAL;
+ if (hws_state != HWS_INIT)
+ goto setup_exit;
+
+ init_all_cpu_buffers();
+
+ rc = check_hardware_prerequisites();
+ if (rc)
+ goto setup_exit;
+
+ rc = check_qsi_on_setup();
+ if (rc)
+ goto setup_exit;
+
+ rc = -EINVAL;
+ hws_wq = create_workqueue("hwsampler");
+ if (!hws_wq)
+ goto setup_exit;
+
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ INIT_WORK(&cb->worker, worker);
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+ if (min_sampler_rate != cb->qsi.min_sampl_rate) {
+ if (min_sampler_rate) {
+ printk(KERN_WARNING
+ "hwsampler: different min sampler rate values.\n");
+ if (min_sampler_rate < cb->qsi.min_sampl_rate)
+ min_sampler_rate =
+ cb->qsi.min_sampl_rate;
+ } else
+ min_sampler_rate = cb->qsi.min_sampl_rate;
+ }
+ if (max_sampler_rate != cb->qsi.max_sampl_rate) {
+ if (max_sampler_rate) {
+ printk(KERN_WARNING
+ "hwsampler: different max sampler rate values.\n");
+ if (max_sampler_rate > cb->qsi.max_sampl_rate)
+ max_sampler_rate =
+ cb->qsi.max_sampl_rate;
+ } else
+ max_sampler_rate = cb->qsi.max_sampl_rate;
+ }
+ }
+ register_external_interrupt(0x1407, hws_ext_handler);
+
+ if (!try_module_get(THIS_MODULE))
+ goto setup_error;
+
+ hws_state = HWS_DEALLOCATED;
+ rc = 0;
+
+setup_exit:
+ mutex_unlock(&hws_sem);
+ return rc;
+
+setup_error:
+ destroy_workqueue(hws_wq);
+ hws_wq = NULL;
+ goto setup_exit;
+}
+EXPORT_SYMBOL(hwsampler_setup);
+
+int hwsampler_shutdown()
+{
+ int rc;
+
+ mutex_lock(&hws_sem);
+
+ rc = -EINVAL;
+ if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) {
+ mutex_unlock(&hws_sem);
+
+ if (hws_wq)
+ flush_workqueue(hws_wq);
+
+ mutex_lock(&hws_sem);
+
+ if (hws_state == HWS_STOPPED) {
+ smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */
+ deallocate_sdbt();
+ }
+ if (hws_wq) {
+ destroy_workqueue(hws_wq);
+ hws_wq = NULL;
+ }
+
+ unregister_external_interrupt(0x1407, hws_ext_handler);
+ module_put(THIS_MODULE);
+ hws_state = HWS_INIT;
+ rc = 0;
+ }
+ mutex_unlock(&hws_sem);
+
+ return rc;
+}
+EXPORT_SYMBOL(hwsampler_shutdown);
+
+/**
+ * hwsampler_start_all() - start hardware sampling on all online CPUs
+ * @rate: specifies the used interval when samples are taken
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_start_all(unsigned long rate)
+{
+ int rc, cpu;
+
+ mutex_lock(&hws_sem);
+
+ hws_oom = 0;
+
+ rc = -EINVAL;
+ if (hws_state != HWS_STOPPED)
+ goto start_all_exit;
+
+ interval = rate;
+
+ /* fail if rate is not valid */
+ if (interval < min_sampler_rate || interval > max_sampler_rate)
+ goto start_all_exit;
+
+ rc = check_qsi_on_start();
+ if (rc)
+ goto start_all_exit;
+
+ rc = prepare_cpu_buffers();
+ if (rc)
+ goto start_all_exit;
+
+ for_each_online_cpu(cpu) {
+ rc = start_sampling(cpu);
+ if (rc)
+ break;
+ }
+ if (rc) {
+ for_each_online_cpu(cpu) {
+ stop_sampling(cpu);
+ }
+ goto start_all_exit;
+ }
+ hws_state = HWS_STARTED;
+ rc = 0;
+
+start_all_exit:
+ mutex_unlock(&hws_sem);
+
+ if (rc)
+ return rc;
+
+ register_oom_notifier(&hws_oom_notifier);
+ hws_oom = 1;
+ hws_flush_all = 0;
+ /* now let them in, 1407 CPUMF external interrupts */
+ smp_ctl_set_bit(0, 5); /* set CR0 bit 58 */
+
+ return 0;
+}
+EXPORT_SYMBOL(hwsampler_start_all);
+
+/**
+ * hwsampler_stop_all() - stop hardware sampling on all online CPUs
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_stop_all()
+{
+ int tmp_rc, rc, cpu;
+ struct cpu_buffer *cb;
+
+ mutex_lock(&hws_sem);
+
+ rc = 0;
+ if (hws_state == HWS_INIT) {
+ mutex_unlock(&hws_sem);
+ return rc;
+ }
+ hws_state = HWS_STOPPING;
+ mutex_unlock(&hws_sem);
+
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ cb->stop_mode = 1;
+ tmp_rc = stop_sampling(cpu);
+ if (tmp_rc)
+ rc = tmp_rc;
+ }
+
+ if (hws_wq)
+ flush_workqueue(hws_wq);
+
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+ mutex_lock(&hws_sem);
+ if (cb->add_sample_f) {
+ symbol_put_addr(cb->add_sample_f);
+ cb->add_sample_f = NULL;
+ }
+ mutex_unlock(&hws_sem);
+ }
+
+ mutex_lock(&hws_sem);
+ if (hws_oom) {
+ unregister_oom_notifier(&hws_oom_notifier);
+ hws_oom = 0;
+ }
+ hws_state = HWS_STOPPED;
+ mutex_unlock(&hws_sem);
+
+ return rc;
+}
+EXPORT_SYMBOL(hwsampler_stop_all);
+
+static int __init hwsampler_init(void)
+{
+ hws_state = HWS_INIT;
+ register_cpu_notifier(&hws_cpu_notifier);
+ return 0;
+}
+
+static void __exit hwsampler_exit(void)
+{
+ unregister_cpu_notifier(&hws_cpu_notifier);
+}
+
+module_init(hwsampler_init);
+module_exit(hwsampler_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Heinz Graalfs <***@de.ibm.com>");
+MODULE_DESCRIPTION("IBM CPUMF Customer Mode Sampling Kernel Module");
Index: linux-2.6/drivers/s390/hwsampler/smpctl.c
===================================================================
--- /dev/null
+++ linux-2.6/drivers/s390/hwsampler/smpctl.c
@@ -0,0 +1,170 @@
+/**
+ * drivers/s390/hwsampler/smpctl.c
+ *
+ * Copyright IBM Corp. 2010
+ * Author: Heinz Graalfs <***@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+
+#include "asm/hwsampler.h"
+
+DECLARE_PER_CPU(struct cpu_buffer, hws_cpu_buffer);
+
+struct hws_execute_parms {
+ void *buffer;
+ signed int rc;
+};
+
+int ssctl(void *buffer)
+{
+ int cc;
+
+ /* set in order to detect a program check */
+ cc = 1;
+
+ asm volatile(
+ "0: .insn s,0xB2870000,0(%1)\n"
+ "1: ipm %0\n"
+ " srl %0,28\n"
+ "2:\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : "+d" (cc), "+a" (buffer)
+ : "m" (*((struct ssctl_request_block *)buffer))
+ : "cc", "memory");
+
+ return cc ? -EINVAL : 0 ;
+}
+
+int qsi(void *buffer)
+{
+ int cc;
+ cc = 1;
+
+ asm volatile(
+ "0: .insn s,0xB2860000,0(%1)\n"
+ "1: lhi %0,0\n"
+ "2:\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : "=d" (cc), "+a" (buffer)
+ : "m" (*((struct qsi_info_block *)buffer))
+ : "cc", "memory");
+
+ return cc ? -EINVAL : 0;
+}
+
+void execute_qsi(void *parms)
+{
+ struct hws_execute_parms *ep = parms;
+
+ ep->rc = qsi(ep->buffer);
+}
+
+void execute_ssctl(void *parms)
+{
+ struct hws_execute_parms *ep = parms;
+
+ ep->rc = ssctl(ep->buffer);
+}
+
+int smp_ctl_ssctl_stop(int cpu)
+{
+ int rc;
+ struct hws_execute_parms ep;
+ struct cpu_buffer *cb;
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+
+ cb->ssctl.es = 0;
+ cb->ssctl.cs = 0;
+
+ ep.buffer = &cb->ssctl;
+ smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+ rc = ep.rc;
+ if (rc) {
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+ dump_stack();
+ }
+
+ ep.buffer = &cb->qsi;
+ smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+ if (cb->qsi.es || cb->qsi.cs) {
+ printk(KERN_EMERG "CPUMF sampling did not stop properly.\n");
+ dump_stack();
+ }
+
+ return rc;
+}
+
+int smp_ctl_ssctl_deactivate(int cpu)
+{
+ int rc;
+ struct hws_execute_parms ep;
+ struct cpu_buffer *cb;
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+
+ cb->ssctl.es = 1;
+ cb->ssctl.cs = 0;
+
+ ep.buffer = &cb->ssctl;
+ smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+ rc = ep.rc;
+ if (rc)
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+
+ ep.buffer = &cb->qsi;
+ smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+ if (cb->qsi.cs)
+ printk(KERN_EMERG "CPUMF sampling was not set inactive.\n");
+
+ return rc;
+}
+
+int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval)
+{
+ int rc;
+ struct hws_execute_parms ep;
+ struct cpu_buffer *cb;
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+
+ cb->ssctl.h = 1;
+ cb->ssctl.tear = cb->first_sdbt;
+ cb->ssctl.dear = *(unsigned long *) cb->first_sdbt;
+ cb->ssctl.interval = interval;
+ cb->ssctl.es = 1;
+ cb->ssctl.cs = 1;
+
+ ep.buffer = &cb->ssctl;
+ smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+ rc = ep.rc;
+ if (rc)
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+
+ ep.buffer = &cb->qsi;
+ smp_call_function_single(cpu, execute_qsi, &ep, 1);
+ if (ep.rc)
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu);
+
+ return rc;
+}
+
+int smp_ctl_qsi(int cpu)
+{
+ struct hws_execute_parms ep;
+ struct cpu_buffer *cb;
+
+ cb = &per_cpu(hws_cpu_buffer, cpu);
+
+ ep.buffer = &cb->qsi;
+ smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+ return ep.rc;
+}
Index: linux-2.6/arch/s390/Kconfig
===================================================================
--- linux-2.6.orig/arch/s390/Kconfig
+++ linux-2.6/arch/s390/Kconfig
@@ -127,6 +127,7 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_BH
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+ select HAVE_HWSAMPLER

config SCHED_OMIT_FRAME_POINTER
bool
@@ -618,6 +619,27 @@ config SECCOMP

If unsure, say Y.

+config HWSAMPLER
+ tristate "Exploit CPUMF hardware sampling with OProfile"
+ depends on OPROFILE
+ depends on HAVE_HWSAMPLER
+ depends on 64BIT
+ select OPROFILE_HWSAMPLING_MODE
+ help
+ Hardware (HW) sampling is a feature provided by z processor.
+ The sampling process is implemented in hardware and millicode
+ and thus does not affect the operating system being observed,
+ apart from the required buffer memory that Linux kernel must
+ provide.
+
+ If unsure, say N.
+
+config HAVE_HWSAMPLER
+ bool
+
+config OPROFILE_HWSAMPLING_MODE
+ bool
+
endmenu

menu "Power Management"
Index: linux-2.6/arch/s390/oprofile/Makefile
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/Makefile
+++ linux-2.6/arch/s390/oprofile/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_HWSAMPLER) += hwsampler.o
obj-$(CONFIG_OPROFILE) += oprofile.o

DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
@@ -7,3 +8,8 @@ DRIVER_OBJS = $(addprefix ../../../drive
timer_int.o )

oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+
+HW_SAMPLER_DRIVER_OBJS = $(addprefix ../../../drivers/s390/hwsampler/, \
+ hwsampler-main.o smpctl.o )
+
+hwsampler-y := $(HW_SAMPLER_DRIVER_OBJS)
Robert Richter
2011-01-03 17:06:17 UTC
Permalink
This should include the real name like in your Signed-off-by tag.

You can fix this by reconfiguring git and recommitting your patches
(git rebase -i ..., git commit --amend --reset-author).
Post by g***@linux.vnet.ibm.com
The CPU Measurement Facility CPUMF is described in the z/Architecture Principles of Operation.
The patch introduces
- a new configuration option OPROFILE_HWSAMPLING_MODE
- a new kernel module hwsampler that controls all hardware sampling related operations as
- checking if hardware sampling feature is available
- ie: on System z models z10 and up, in LPAR mode only, and authorised during LPAR activation
- allocating memory for the hardware sampling feature
- starting/stopping hardware sampling
The hwsampler module will also depend on CONFIG_OPROFILE and CONFIG_64BIT.
All functions required to start and stop hardware sampling have to be
invoked by the oprofile kernel module as provided by the other patches of this patch set.
In case hardware based sampling cannot be setup standard timer based sampling is used by OProfile.
---
arch/s390/Kconfig | 22
arch/s390/include/asm/hwsampler.h | 130 +++
arch/s390/oprofile/Makefile | 6
drivers/s390/hwsampler/hwsampler-main.c | 1155 ++++++++++++++++++++++++++++++++
drivers/s390/hwsampler/smpctl.c | 170 ++++
Is there a reason for splitting the code into two files? If we would
merge hwsampler-main.c and smpctl.c we could make a lot functions
static which simplifies the interface. We could also drop the
hwsampler/ directory and put all in drivers/s390/hwsampler.c.

Another thing is, wouldn't all of this better be part of cpu
initialization code? This is not really a driver, it only registers a
cpu notifier. Do you need to build this as module? I leave this
decision to the s390 maintainers.
Post by g***@linux.vnet.ibm.com
5 files changed, 1483 insertions(+)
Index: linux-2.6/arch/s390/include/asm/hwsampler.h
===================================================================
--- /dev/null
+++ linux-2.6/arch/s390/include/asm/hwsampler.h
This file should only contain definitions for the public interface.
All structs should be private, defined in something like

drivers/s390/hwsampler.h

or so. All of them are only used in hwsampler-main.c or smpctl.c.

To avoid namespace collisions, add a prefix like hws_ to all symbols.
Post by g***@linux.vnet.ibm.com
@@ -0,0 +1,130 @@
+/*
+ * CPUMF HW sampler structures and prototypes
+ *
+ * Copyright IBM Corp. 2010
+ */
+
+#ifndef HWSAMPLER_H_
+#define HWSAMPLER_H_
+
+#include <linux/workqueue.h>
+
+struct qsi_info_block /* QUERY SAMPLING information block */
+{ /* Bit(s) */
+ unsigned int b0_13:14; /* 0-13: zeros */
+ unsigned int as:1; /* 14: sampling authorisation control*/
+ unsigned int b15_21:7; /* 15-21: zeros */
+ unsigned int es:1; /* 22: sampling enable control */
+ unsigned int b23_29:7; /* 23-29: zeros */
+ unsigned int cs:1; /* 30: sampling activation control */
+ unsigned int:1; /* 31: reserved */
+ unsigned int bsdes:16; /* 4-5: size of sampling entry */
+ unsigned int:16; /* 6-7: reserved */
+ unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+ unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+ unsigned long tear; /* 24-31: TEAR contents */
+ unsigned long dear; /* 32-39: DEAR contents */
+ unsigned int rsvrd0; /* 40-43: reserved */
+ unsigned int cpu_speed; /* 44-47: CPU speed */
+ unsigned long long rsvrd1; /* 48-55: reserved */
+ unsigned long long rsvrd2; /* 56-63: reserved */
+};
+
+struct ssctl_request_block /* SET SAMPLING CONTROLS req block */
+{ /* bytes 0 - 7 Bit(s) */
+ unsigned int s:1; /* 0: maximum buffer indicator */
+ unsigned int h:1; /* 1: part. level reserved for VM use*/
+ unsigned long b2_53:52; /* 2-53: zeros */
+ unsigned int es:1; /* 54: sampling enable control */
+ unsigned int b55_61:7; /* 55-61: - zeros */
+ unsigned int cs:1; /* 62: sampling activation control */
+ unsigned int b63:1; /* 63: zero */
+ unsigned long interval; /* 8-15: sampling interval */
+ unsigned long tear; /* 16-23: TEAR contents */
+ unsigned long dear; /* 24-31: DEAR contents */
+ /* 32-63: */
+ unsigned long rsvrd1; /* reserved */
+ unsigned long rsvrd2; /* reserved */
+ unsigned long rsvrd3; /* reserved */
+ unsigned long rsvrd4; /* reserved */
+};
+
+typedef void oprf_add_sample_func(unsigned long pc,
+ struct pt_regs * const regs,
+ unsigned long event, int is_kernel,
+ struct task_struct *task);
Don't use typedefs.
Post by g***@linux.vnet.ibm.com
+
+struct cpu_buffer {
+ unsigned long worker_entry;
+ unsigned long sample_overflow; /* taken from SDB ... */
+ struct qsi_info_block qsi;
+ struct ssctl_request_block ssctl;
+ struct work_struct worker;
+ oprf_add_sample_func *add_sample_f;
+ atomic_t ext_params;
+ unsigned long req_alert;
+ unsigned long loss_of_sample_data;
+ unsigned long invalid_entry_address;
+ unsigned long incorrect_sdbt_entry;
+ unsigned long sample_auth_change_alert;
+ unsigned int finish:1;
+ unsigned int oom:1;
+ unsigned int stop_mode:1;
+};
+
+struct data_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:4; /* 16-19 reserved */
+ unsigned int U:4; /* 20-23 Number of unique instruct. */
+ unsigned int z:2; /* zeros */
+ unsigned int T:1; /* 26 PSW DAT mode */
+ unsigned int W:1; /* 27 PSW wait state */
+ unsigned int P:1; /* 28 PSW Problem state */
+ unsigned int AS:2; /* 29-30 PSW address-space control */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ unsigned int:16;
+ unsigned int prim_asn:16; /* primary ASN */
+ unsigned long long ia; /* Instruction Address */
+ unsigned long long lpp; /* Logical-Partition Program Param. */
+ unsigned long long vpp; /* Virtual-Machine Program Param. */
+};
+
+struct trailer_entry {
+ unsigned int f:1; /* 0 - Block Full Indicator */
+ unsigned int a:1; /* 1 - Alert request control */
+ unsigned long:62; /* 2 - 63: Reserved */
+ unsigned long overflow; /* 64 - sample Overflow count */
+ unsigned long timestamp; /* 16 - time-stamp */
+ unsigned long timestamp1; /* */
+ unsigned long reserved1; /* 32 -Reserved */
+ unsigned long reserved2; /* */
+ unsigned long progusage1; /* 48 - reserved for programming use */
+ unsigned long progusage2; /* */
+};
+
+int hwsampler_setup(void);
+int hwsampler_shutdown(void);
+int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
+int hwsampler_deallocate(void);
+long hwsampler_query_min_interval(void);
+long hwsampler_query_max_interval(void);
+int hwsampler_start_all(unsigned long interval);
+int hwsampler_stop_all(void);
+int hwsampler_deactivate(unsigned int cpu);
+int hwsampler_activate(unsigned int cpu);
+unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu);
+
+int smp_ctl_qsi(int);
+int smp_ctl_ssctl_deactivate(int);
+int smp_ctl_ssctl_stop(int);
+int smp_ctl_ssctl_enable_activate(int, unsigned long);
+
+int qsi(void *);
+
+void execute_qsi(void *);
+void execute_ssctl(void *);
Many functions above are for internal use only, these should be
removed from this interface and made static.
Post by g***@linux.vnet.ibm.com
+
+#endif /*HWSAMPLER_H_*/
+
Index: linux-2.6/drivers/s390/hwsampler/hwsampler-main.c
===================================================================
--- /dev/null
+++ linux-2.6/drivers/s390/hwsampler/hwsampler-main.c
+static int __cpuinit hws_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ /* We do not have sampler space available for all possible CPUs.
+ All CPUs should be online when hw sampling is activated. */
+ return NOTIFY_BAD;
Is this to prevent bringing cpus on-/offline?
Post by g***@linux.vnet.ibm.com
+}
[...]
Post by g***@linux.vnet.ibm.com
+static int __init hwsampler_init(void)
+{
+ hws_state = HWS_INIT;
+ register_cpu_notifier(&hws_cpu_notifier);
+ return 0;
+}
+
+static void __exit hwsampler_exit(void)
+{
+ unregister_cpu_notifier(&hws_cpu_notifier);
+}
+
+module_init(hwsampler_init);
+module_exit(hwsampler_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IBM CPUMF Customer Mode Sampling Kernel Module");
[...]
Post by g***@linux.vnet.ibm.com
Index: linux-2.6/arch/s390/Kconfig
===================================================================
--- linux-2.6.orig/arch/s390/Kconfig
+++ linux-2.6/arch/s390/Kconfig
@@ -127,6 +127,7 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_BH
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+ select HAVE_HWSAMPLER
config SCHED_OMIT_FRAME_POINTER
bool
@@ -618,6 +619,27 @@ config SECCOMP
If unsure, say Y.
+config HWSAMPLER
+ tristate "Exploit CPUMF hardware sampling with OProfile"
+ depends on OPROFILE
+ depends on HAVE_HWSAMPLER
+ depends on 64BIT
+ select OPROFILE_HWSAMPLING_MODE
+ help
+ Hardware (HW) sampling is a feature provided by z processor.
+ The sampling process is implemented in hardware and millicode
+ and thus does not affect the operating system being observed,
+ apart from the required buffer memory that Linux kernel must
+ provide.
+
+ If unsure, say N.
+
+config HAVE_HWSAMPLER
+ bool
+
+config OPROFILE_HWSAMPLING_MODE
+ bool
+
endmenu
menu "Power Management"
Index: linux-2.6/arch/s390/oprofile/Makefile
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/Makefile
+++ linux-2.6/arch/s390/oprofile/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_HWSAMPLER) += hwsampler.o
obj-$(CONFIG_OPROFILE) += oprofile.o
DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
@@ -7,3 +8,8 @@ DRIVER_OBJS = $(addprefix ../../../drive
timer_int.o )
oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+
+HW_SAMPLER_DRIVER_OBJS = $(addprefix ../../../drivers/s390/hwsampler/, \
+ hwsampler-main.o smpctl.o )
+
+hwsampler-y := $(HW_SAMPLER_DRIVER_OBJS)
Have you tried building this as a module. Not really sure, but I think it should be

hwsampler-$(CONFIG_HWSAMPLER) := ...

See also my statement above about putting this to cpu init code
instead of having a driver for it.

-Robert
--
Advanced Micro Devices, Inc.
Operating System Research Center
Heinz Graalfs
2011-01-19 16:54:44 UTC
Permalink
Hello Robert,

please find below (and in the following 2 mails) my answers to your
comments.

The patch-set will follow soon.

Heinz
Post by Robert Richter
This should include the real name like in your Signed-off-by tag.
You can fix this by reconfiguring git and recommitting your patches
(git rebase -i ..., git commit --amend --reset-author).
Post by g***@linux.vnet.ibm.com
The CPU Measurement Facility CPUMF is described in the z/Architecture Principles of Operation.
The patch introduces
- a new configuration option OPROFILE_HWSAMPLING_MODE
- a new kernel module hwsampler that controls all hardware sampling related operations as
- checking if hardware sampling feature is available
- ie: on System z models z10 and up, in LPAR mode only, and authorised during LPAR activation
- allocating memory for the hardware sampling feature
- starting/stopping hardware sampling
The hwsampler module will also depend on CONFIG_OPROFILE and CONFIG_64BIT.
All functions required to start and stop hardware sampling have to be
invoked by the oprofile kernel module as provided by the other patches of this patch set.
In case hardware based sampling cannot be setup standard timer based sampling is used by OProfile.
---
arch/s390/Kconfig | 22
arch/s390/include/asm/hwsampler.h | 130 +++
arch/s390/oprofile/Makefile | 6
drivers/s390/hwsampler/hwsampler-main.c | 1155 ++++++++++++++++++++++++++++++++
drivers/s390/hwsampler/smpctl.c | 170 ++++
Is there a reason for splitting the code into two files? If we would
merge hwsampler-main.c and smpctl.c we could make a lot functions
static which simplifies the interface. We could also drop the
hwsampler/ directory and put all in drivers/s390/hwsampler.c.
I merged smpctl.c contents into new hwsampler.c, hwsampler.c is now
located in arch/s390/oprofile.

As you proposed I integrated everything in the oprofile kernel module.
Post by Robert Richter
Another thing is, wouldn't all of this better be part of cpu
initialization code? This is not really a driver, it only registers a
cpu notifier. Do you need to build this as module? I leave this
decision to the s390 maintainers.
Post by g***@linux.vnet.ibm.com
5 files changed, 1483 insertions(+)
Index: linux-2.6/arch/s390/include/asm/hwsampler.h
===================================================================
--- /dev/null
+++ linux-2.6/arch/s390/include/asm/hwsampler.h
This file should only contain definitions for the public interface.
All structs should be private, defined in something like
drivers/s390/hwsampler.h
I moved the structs to drivers/s390/hwsampler.h
Post by Robert Richter
or so. All of them are only used in hwsampler-main.c or smpctl.c.
To avoid namespace collisions, add a prefix like hws_ to all symbols.
OK, done
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
@@ -0,0 +1,130 @@
+/*
+ * CPUMF HW sampler structures and prototypes
+ *
+ * Copyright IBM Corp. 2010
+ */
+
+#ifndef HWSAMPLER_H_
+#define HWSAMPLER_H_
+
+#include <linux/workqueue.h>
+
+struct qsi_info_block /* QUERY SAMPLING information block */
+{ /* Bit(s) */
+ unsigned int b0_13:14; /* 0-13: zeros */
+ unsigned int as:1; /* 14: sampling authorisation control*/
+ unsigned int b15_21:7; /* 15-21: zeros */
+ unsigned int es:1; /* 22: sampling enable control */
+ unsigned int b23_29:7; /* 23-29: zeros */
+ unsigned int cs:1; /* 30: sampling activation control */
+ unsigned int:1; /* 31: reserved */
+ unsigned int bsdes:16; /* 4-5: size of sampling entry */
+ unsigned int:16; /* 6-7: reserved */
+ unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+ unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+ unsigned long tear; /* 24-31: TEAR contents */
+ unsigned long dear; /* 32-39: DEAR contents */
+ unsigned int rsvrd0; /* 40-43: reserved */
+ unsigned int cpu_speed; /* 44-47: CPU speed */
+ unsigned long long rsvrd1; /* 48-55: reserved */
+ unsigned long long rsvrd2; /* 56-63: reserved */
+};
+
+struct ssctl_request_block /* SET SAMPLING CONTROLS req block */
+{ /* bytes 0 - 7 Bit(s) */
+ unsigned int s:1; /* 0: maximum buffer indicator */
+ unsigned int h:1; /* 1: part. level reserved for VM use*/
+ unsigned long b2_53:52; /* 2-53: zeros */
+ unsigned int es:1; /* 54: sampling enable control */
+ unsigned int b55_61:7; /* 55-61: - zeros */
+ unsigned int cs:1; /* 62: sampling activation control */
+ unsigned int b63:1; /* 63: zero */
+ unsigned long interval; /* 8-15: sampling interval */
+ unsigned long tear; /* 16-23: TEAR contents */
+ unsigned long dear; /* 24-31: DEAR contents */
+ /* 32-63: */
+ unsigned long rsvrd1; /* reserved */
+ unsigned long rsvrd2; /* reserved */
+ unsigned long rsvrd3; /* reserved */
+ unsigned long rsvrd4; /* reserved */
+};
+
+typedef void oprf_add_sample_func(unsigned long pc,
+ struct pt_regs * const regs,
+ unsigned long event, int is_kernel,
+ struct task_struct *task);
Don't use typedefs.
OK, done
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+
+struct cpu_buffer {
+ unsigned long worker_entry;
+ unsigned long sample_overflow; /* taken from SDB ... */
+ struct qsi_info_block qsi;
+ struct ssctl_request_block ssctl;
+ struct work_struct worker;
+ oprf_add_sample_func *add_sample_f;
+ atomic_t ext_params;
+ unsigned long req_alert;
+ unsigned long loss_of_sample_data;
+ unsigned long invalid_entry_address;
+ unsigned long incorrect_sdbt_entry;
+ unsigned long sample_auth_change_alert;
+ unsigned int finish:1;
+ unsigned int oom:1;
+ unsigned int stop_mode:1;
+};
+
+struct data_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:4; /* 16-19 reserved */
+ unsigned int U:4; /* 20-23 Number of unique instruct. */
+ unsigned int z:2; /* zeros */
+ unsigned int T:1; /* 26 PSW DAT mode */
+ unsigned int W:1; /* 27 PSW wait state */
+ unsigned int P:1; /* 28 PSW Problem state */
+ unsigned int AS:2; /* 29-30 PSW address-space control */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ unsigned int:16;
+ unsigned int prim_asn:16; /* primary ASN */
+ unsigned long long ia; /* Instruction Address */
+ unsigned long long lpp; /* Logical-Partition Program Param. */
+ unsigned long long vpp; /* Virtual-Machine Program Param. */
+};
+
+struct trailer_entry {
+ unsigned int f:1; /* 0 - Block Full Indicator */
+ unsigned int a:1; /* 1 - Alert request control */
+ unsigned long:62; /* 2 - 63: Reserved */
+ unsigned long overflow; /* 64 - sample Overflow count */
+ unsigned long timestamp; /* 16 - time-stamp */
+ unsigned long timestamp1; /* */
+ unsigned long reserved1; /* 32 -Reserved */
+ unsigned long reserved2; /* */
+ unsigned long progusage1; /* 48 - reserved for programming use */
+ unsigned long progusage2; /* */
+};
+
+int hwsampler_setup(void);
+int hwsampler_shutdown(void);
+int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
+int hwsampler_deallocate(void);
+long hwsampler_query_min_interval(void);
+long hwsampler_query_max_interval(void);
+int hwsampler_start_all(unsigned long interval);
+int hwsampler_stop_all(void);
+int hwsampler_deactivate(unsigned int cpu);
+int hwsampler_activate(unsigned int cpu);
+unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu);
+
all smp_ functions are now static in hwsampler.c
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+int smp_ctl_qsi(int);
+int smp_ctl_ssctl_deactivate(int);
+int smp_ctl_ssctl_stop(int);
+int smp_ctl_ssctl_enable_activate(int, unsigned long);
+
+int qsi(void *);
+
+void execute_qsi(void *);
+void execute_ssctl(void *);
Many functions above are for internal use only, these should be
removed from this interface and made static.
all structs moved to arch/s390/oprofile/hwsampler.h
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+
+#endif /*HWSAMPLER_H_*/
+
Index: linux-2.6/drivers/s390/hwsampler/hwsampler-main.c
===================================================================
--- /dev/null
+++ linux-2.6/drivers/s390/hwsampler/hwsampler-main.c
+static int __cpuinit hws_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ /* We do not have sampler space available for all possible CPUs.
+ All CPUs should be online when hw sampling is activated. */
+ return NOTIFY_BAD;
Is this to prevent bringing cpus on-/offline?
yes, we don't allow cpu varyon/off during hardware sampling
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
+}
[...]
Post by g***@linux.vnet.ibm.com
+static int __init hwsampler_init(void)
+{
+ hws_state = HWS_INIT;
+ register_cpu_notifier(&hws_cpu_notifier);
+ return 0;
+}
+
+static void __exit hwsampler_exit(void)
+{
+ unregister_cpu_notifier(&hws_cpu_notifier);
+}
+
+module_init(hwsampler_init);
+module_exit(hwsampler_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IBM CPUMF Customer Mode Sampling Kernel Module");
[...]
Post by g***@linux.vnet.ibm.com
Index: linux-2.6/arch/s390/Kconfig
===================================================================
--- linux-2.6.orig/arch/s390/Kconfig
+++ linux-2.6/arch/s390/Kconfig
@@ -127,6 +127,7 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_BH
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+ select HAVE_HWSAMPLER
config SCHED_OMIT_FRAME_POINTER
bool
@@ -618,6 +619,27 @@ config SECCOMP
If unsure, say Y.
+config HWSAMPLER
+ tristate "Exploit CPUMF hardware sampling with OProfile"
+ depends on OPROFILE
+ depends on HAVE_HWSAMPLER
+ depends on 64BIT
+ select OPROFILE_HWSAMPLING_MODE
+ help
+ Hardware (HW) sampling is a feature provided by z processor.
+ The sampling process is implemented in hardware and millicode
+ and thus does not affect the operating system being observed,
+ apart from the required buffer memory that Linux kernel must
+ provide.
+
+ If unsure, say N.
+
+config HAVE_HWSAMPLER
+ bool
+
+config OPROFILE_HWSAMPLING_MODE
+ bool
+
endmenu
menu "Power Management"
Index: linux-2.6/arch/s390/oprofile/Makefile
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/Makefile
+++ linux-2.6/arch/s390/oprofile/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_HWSAMPLER) += hwsampler.o
obj-$(CONFIG_OPROFILE) += oprofile.o
DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
@@ -7,3 +8,8 @@ DRIVER_OBJS = $(addprefix ../../../drive
timer_int.o )
oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+
+HW_SAMPLER_DRIVER_OBJS = $(addprefix ../../../drivers/s390/hwsampler/, \
+ hwsampler-main.o smpctl.o )
+
+hwsampler-y := $(HW_SAMPLER_DRIVER_OBJS)
Have you tried building this as a module. Not really sure, but I think it should be
see above
Post by Robert Richter
hwsampler-$(CONFIG_HWSAMPLER) := ...
See also my statement above about putting this to cpu init code
instead of having a driver for it.
-Robert
g***@linux.vnet.ibm.com
2010-12-20 13:05:44 UTC
Permalink
From: ***@linux.vnet.ibm.com

This patch introduces a new oprofile sample add function (oprofile_add_ext_hw_sample)
that can also take task_struct as an argument, which is used by the hwsampler kernel module
when copying hardware samples to OProfile buffers.

Signed-off-by: Mahesh Salgaonkar <***@linux.vnet.ibm.com>
Signed-off-by: Maran Pakkirisamy <***@linux.vnet.ibm.com>
Signed-off-by: Heinz Graalfs <***@linux.vnet.ibm.com>
---
drivers/oprofile/cpu_buffer.c | 26 +++++++++++++++++++-------
1 file changed, 19 insertions(+), 7 deletions(-)

Index: linux-2.6/drivers/oprofile/cpu_buffer.c
===================================================================
--- linux-2.6.orig/drivers/oprofile/cpu_buffer.c
+++ linux-2.6/drivers/oprofile/cpu_buffer.c
@@ -22,6 +22,7 @@
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/errno.h>
+#include <linux/module.h>

#include "event_buffer.h"
#include "cpu_buffer.h"
@@ -258,8 +259,10 @@ op_add_sample(struct oprofile_cpu_buffer
*/
static int
log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
- unsigned long backtrace, int is_kernel, unsigned long event)
+ unsigned long backtrace, int is_kernel, unsigned long event,
+ struct task_struct *task)
{
+ struct task_struct *tsk = task ? task : current;
cpu_buf->sample_received++;

if (pc == ESCAPE_CODE) {
@@ -267,7 +270,7 @@ log_sample(struct oprofile_cpu_buffer *c
return 0;
}

- if (op_add_code(cpu_buf, backtrace, is_kernel, current))
+ if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
goto fail;

if (op_add_sample(cpu_buf, pc, event))
@@ -292,7 +295,8 @@ static inline void oprofile_end_trace(st

static inline void
__oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
- unsigned long event, int is_kernel)
+ unsigned long event, int is_kernel,
+ struct task_struct *task)
{
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
unsigned long backtrace = oprofile_backtrace_depth;
@@ -301,7 +305,7 @@ __oprofile_add_ext_sample(unsigned long
* if log_sample() fail we can't backtrace since we lost the
* source of this event
*/
- if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event))
+ if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, task))
/* failed */
return;

@@ -316,9 +320,17 @@ __oprofile_add_ext_sample(unsigned long
void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
unsigned long event, int is_kernel)
{
- __oprofile_add_ext_sample(pc, regs, event, is_kernel);
+ __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
}

+void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs,
+ unsigned long event, int is_kernel,
+ struct task_struct *task)
+{
+ __oprofile_add_ext_sample(pc, regs, event, is_kernel, task);
+}
+EXPORT_SYMBOL_GPL(oprofile_add_ext_hw_sample);
+
void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
{
int is_kernel;
@@ -332,7 +344,7 @@ void oprofile_add_sample(struct pt_regs
pc = ESCAPE_CODE; /* as this causes an early return. */
}

- __oprofile_add_ext_sample(pc, regs, event, is_kernel);
+ __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
}

/*
@@ -403,7 +415,7 @@ int oprofile_write_commit(struct op_entr
void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
{
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
- log_sample(cpu_buf, pc, 0, is_kernel, event);
+ log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
}

void oprofile_add_trace(unsigned long pc)
Robert Richter
2011-01-04 15:34:11 UTC
Permalink
Post by g***@linux.vnet.ibm.com
This patch introduces a new oprofile sample add function (oprofile_add_ext_hw_sample)
that can also take task_struct as an argument, which is used by the hwsampler kernel module
when copying hardware samples to OProfile buffers.
---
drivers/oprofile/cpu_buffer.c | 26 +++++++++++++++++++-------
1 file changed, 19 insertions(+), 7 deletions(-)
+void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs,
+ unsigned long event, int is_kernel,
+ struct task_struct *task)
+{
+ __oprofile_add_ext_sample(pc, regs, event, is_kernel, task);
+}
+EXPORT_SYMBOL_GPL(oprofile_add_ext_hw_sample);
Hmm, I am not convinced of this new interface between the hwsampler
module and oprofile. It is asymmetric and bloats the function's
parameters list. A first simplification would be to not implement
hwsampler as module and integrate this in oprofile. Then, we can look
for a better way to add samples to the oprofile buffer. What do you
think?

-Robert
--
Advanced Micro Devices, Inc.
Operating System Research Center
Heinz Graalfs
2011-01-19 16:56:46 UTC
Permalink
Robert,

here is the 3rd part.

Thanks a lot for your comments!

Heinz
Post by Robert Richter
Post by g***@linux.vnet.ibm.com
This patch introduces a new oprofile sample add function (oprofile_add_ext_hw_sample)
that can also take task_struct as an argument, which is used by the hwsampler kernel module
when copying hardware samples to OProfile buffers.
---
drivers/oprofile/cpu_buffer.c | 26 +++++++++++++++++++-------
1 file changed, 19 insertions(+), 7 deletions(-)
+void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs,
+ unsigned long event, int is_kernel,
+ struct task_struct *task)
+{
+ __oprofile_add_ext_sample(pc, regs, event, is_kernel, task);
+}
+EXPORT_SYMBOL_GPL(oprofile_add_ext_hw_sample);
Hmm, I am not convinced of this new interface between the hwsampler
module and oprofile. It is asymmetric and bloats the function's
parameters list. A first simplification would be to not implement
hwsampler as module and integrate this in oprofile. Then, we can look
for a better way to add samples to the oprofile buffer. What do you
think?
ok, as you suggested hwsampler is now integrated in oprofile, and the
EXPORT is gone, instead I added it to oprofile.h, which I fear you will
not accept but I try.
Post by Robert Richter
-Robert
Robert Richter
2011-01-03 20:39:09 UTC
Permalink
Post by g***@linux.vnet.ibm.com
During memory unmap hardware sampling is deactivated.
After all samples have been collected hardware sampling is reactivated again.
Do you mean you switch off the profiler for each buffer sync? This
happens per default every 100ms. Why is this needed? Which memory is
unmapped?

This hooks in the sync_buffer() are ugly. We must try to find a better
solution here.

There is oprofile_put_buff() that allows to write directly to the the
event buffer which is used in powerpc/cell.

-Robert
Post by g***@linux.vnet.ibm.com
---
arch/s390/oprofile/hwsampler_files.c | 2 +-
drivers/oprofile/buffer_sync.c | 13 +++++++++++++
2 files changed, 14 insertions(+), 1 deletion(-)
Index: linux-2.6/drivers/oprofile/buffer_sync.c
===================================================================
--- linux-2.6.orig/drivers/oprofile/buffer_sync.c
+++ linux-2.6/drivers/oprofile/buffer_sync.c
@@ -32,6 +32,11 @@
#include <linux/sched.h>
#include <linux/gfp.h>
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+#include <asm/hwsampler.h>
+extern unsigned long oprofile_hwsampler;
+#endif
+
#include "oprofile_stats.h"
#include "event_buffer.h"
#include "cpu_buffer.h"
@@ -513,6 +518,10 @@ void sync_buffer(int cpu)
mutex_lock(&buffer_mutex);
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ if (oprofile_hwsampler)
+ hwsampler_deactivate(cpu);
+#endif
add_cpu_switch(cpu);
op_cpu_buffer_reset(cpu);
@@ -569,6 +578,10 @@ void sync_buffer(int cpu)
mark_done(cpu);
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ if (oprofile_hwsampler)
+ hwsampler_activate(cpu);
+#endif
mutex_unlock(&buffer_mutex);
}
Index: linux-2.6/arch/s390/oprofile/hwsampler_files.c
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/hwsampler_files.c
+++ linux-2.6/arch/s390/oprofile/hwsampler_files.c
@@ -22,7 +22,7 @@ unsigned long oprofile_max_interval;
static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
-static unsigned long oprofile_hwsampler;
+unsigned long oprofile_hwsampler;
static int oprofile_hwsampler_start(void)
{
--
Advanced Micro Devices, Inc.
Operating System Research Center
Continue reading on narkive:
Loading...