Date: Wed, 24 May 2000 17:29:56 -0400
From: John Baboval <baboval@mclinux.com>
To: linux-kernel@vger.rutgers.edu, torvalds@transmeta.com
Subject: [patch] profile enhancements, please read!
--dDRMvlgZJXvWKvBx
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
The attached patch makes the following enhancements to the kernel profiler:
- Dynamic allocation of the profile buffer.
This allows for profiling on a production system without the continuous
performance hit that the profiler causes. You can turn it on, get you
data, and then turn it off again....
I submitted this as a patch to 2.3.99pre8, but nobody commented, and it
didn't get applied. I'm resubmitting because the further enhancements
are impractical without this. Control of the buffer is via a
/proc/sys/kernel entry 'prof_shift'. you can write the same value to
this file as you would pass to the profile= parameter at boot time. To
turn off profiling, write a zero to the file. I have a second
implementation of this feature as a system call which I feel is
cleaner, though less practical. Any comments on this matter would be
appreciated.
- Selective profiling by context
This allows the kernel profiler to only be activated when the context
matches the constraints specified in the new files
/proc/sys/kernel/prof_pid and prof_name. The profiler is triggered when
the value in prof_pid equals the pid, or when the value in prof_name
matches the comm entry in the task_struct. If prof_name is NULL
(default) and prof_pid is -1 (default) the profiler acts as usual.
The benefit of this is being able to see what section of the kernel is
effecting the particular task you are trying to optimize.
I also am interested in hearing wether people would like the opposite of
this, i.e. all processes except one specified.... This would be a
simple addition. This would be useful for testing with a client and
server running on the same machine (if this is all you have to work
with).
- Percentage readouts for readprofile
These aren't included in this patch, as it's not directly kernel
related. If anyone wants these they can e-mail me. Basically this adds
a column to the readprofile output that indicates the percentage of
time spent in each area.
Please review and test/apply this patch.
--
-John
<baboval@missioncriticallinux.com>
--dDRMvlgZJXvWKvBx
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="patch-2.3.99pre9-profile"
diff -u -b --recursive --new-file 2.3.99pre9_clean/arch/alpha/kernel/irq_impl.h linux/arch/alpha/kernel/irq_impl.h
--- 2.3.99pre9_clean/arch/alpha/kernel/irq_impl.h Sun Feb 27 16:28:01 2000
+++ linux/arch/alpha/kernel/irq_impl.h Wed May 24 16:35:56 2000
@@ -41,6 +41,8 @@
extern void handle_irq(int irq, struct pt_regs * regs);
extern unsigned long prof_cpu_mask;
+extern char prof_name[16];
+extern int prof_pid;
static inline void
alpha_do_profile(unsigned long pc)
@@ -49,6 +51,11 @@
if (!prof_buffer)
return;
+
+ if(prof_pid != -1 || prof_name[0] != 0)
+ if((current->pid != prof_pid) &&
+ (strcmp(current->comm, prof_name) != 0))
+ return;
/*
* Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
diff -u -b --recursive --new-file 2.3.99pre9_clean/fs/proc/proc_misc.c linux/fs/proc/proc_misc.c
--- 2.3.99pre9_clean/fs/proc/proc_misc.c Fri May 12 14:36:27 2000
+++ linux/fs/proc/proc_misc.c Wed May 24 16:40:59 2000
@@ -590,7 +590,7 @@
return count;
}
-static struct file_operations proc_profile_operations = {
+struct file_operations proc_profile_operations = {
read: read_profile,
write: write_profile,
};
diff -u -b --recursive --new-file 2.3.99pre9_clean/include/asm-i386/hw_irq.h linux/include/asm-i386/hw_irq.h
--- 2.3.99pre9_clean/include/asm-i386/hw_irq.h Tue May 23 17:18:47 2000
+++ linux/include/asm-i386/hw_irq.h Wed May 24 16:33:43 2000
@@ -181,6 +181,9 @@
extern unsigned int * prof_buffer;
extern unsigned long prof_len;
extern unsigned long prof_shift;
+extern char prof_name[16];
+extern int prof_pid;
+
/*
* x86 profiling function, SMP safe. We might want to do this in
@@ -190,6 +193,11 @@
{
if (!prof_buffer)
return;
+
+ if(prof_pid != -1 || prof_name[0] != 0)
+ if((current->pid != prof_pid) &&
+ (strcmp(current->comm, prof_name) != 0))
+ return;
/*
* Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
diff -u -b --recursive --new-file 2.3.99pre9_clean/include/linux/sysctl.h linux/include/linux/sysctl.h
--- 2.3.99pre9_clean/include/linux/sysctl.h Fri May 12 14:36:27 2000
+++ linux/include/linux/sysctl.h Wed May 24 16:42:39 2000
@@ -112,6 +112,9 @@
KERN_OVERFLOWUID=46, /* int: overflow UID */
KERN_OVERFLOWGID=47, /* int: overflow GID */
KERN_SHMPATH=48, /* string: path to shm fs */
+ KERN_PROFSWITCH=49, /* int: sets the profile shift value */
+ KERN_PROF_PID=50, /* int: sets the pid to profile */
+ KERN_PROF_NAME=51, /* string: sets the process name to profile */
};
diff -u -b --recursive --new-file 2.3.99pre9_clean/kernel/Makefile linux/kernel/Makefile
--- 2.3.99pre9_clean/kernel/Makefile Wed Mar 22 12:39:11 2000
+++ linux/kernel/Makefile Wed May 24 16:43:09 2000
@@ -10,7 +10,7 @@
O_TARGET := kernel.o
O_OBJS = sched.o dma.o fork.o exec_domain.o panic.o printk.o sys.o \
module.o exit.o itimer.o info.o time.o softirq.o resource.o \
- sysctl.o acct.o capability.o ptrace.o timer.o
+ sysctl.o acct.o capability.o ptrace.o timer.o profile.o
OX_OBJS += signal.o
diff -u -b --recursive --new-file 2.3.99pre9_clean/kernel/profile.c linux/kernel/profile.c
--- 2.3.99pre9_clean/kernel/profile.c Wed Dec 31 19:00:00 1969
+++ linux/kernel/profile.c Wed May 24 16:46:25 2000
@@ -0,0 +1,125 @@
+/* This is the runtime enabler/disabler function for the kernel profiler.
+ * The profile shift value is passed as a parameter; if it is 0 then profiling
+ * is disabled.
+ *
+ * return values:
+ * 0 - Profiling is disabled
+ * 1 - Profiling successfuly enabled; buffer was allocated with kmalloc
+ * 2 - Profiling successfuly enabled; buffer was allocated with vmalloc
+ * 3 - bootmem profile buffer was left untouched
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/malloc.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>
+#include <asm/page.h>
+
+#if defined(CONFIG_PROC_FS)
+
+extern unsigned int * prof_buffer;
+extern unsigned long prof_len;
+extern unsigned long prof_shift;
+unsigned int prof_type = 0;
+extern struct proc_dir_entry proc_root;
+
+extern char _stext, _etext;
+extern struct file_operations proc_profile_operations;
+
+long prof_switch(long shift)
+{
+
+ struct proc_dir_entry *entry;
+
+ if (shift > 0) {
+ unsigned int size;
+
+ /* Don't create a new profile buffer/entry if one already exists,
+ * But still honor the request in case the resolution has changed */
+
+ if(prof_buffer != NULL)
+ {
+ if(shift != prof_shift)
+ prof_switch(0);
+ else
+ return prof_type;
+ }
+
+ prof_shift = shift;
+
+ /* only text is profiled */
+ prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
+
+ /* If prof_shift is too big then this is pointless. prof_shift should never be
+ * bigger than log base 2 of the buffer size. */
+#define log2(x) ffz(~(x))
+
+ if((1 << prof_shift) >= prof_len * sizeof(unsigned int))
+ prof_shift = log2(prof_len * sizeof(unsigned int)) - 1;
+ prof_len >>= prof_shift;
+
+ size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1;
+ /* Use kmalloc if size < 512 * PAGE_SIZE, if it's bigger use vmalloc */
+ if(size < 512 * PAGE_SIZE)
+ {
+ if(!(prof_buffer = kmalloc(size, GFP_KERNEL)))
+ {
+ if(!(prof_buffer = vmalloc(size)))
+ {
+ /* Bail... Can't get enought memory...*/
+ return(0);
+ }else prof_type = 2;
+ }else prof_type = 1;
+ }
+ else
+ {
+ if(!(prof_buffer = vmalloc(size)))
+ {
+ /* Bail... Can't get enough memory...*/
+ return(0);
+ }else prof_type = 2;
+ }
+ /* Zero out the new profile buffer */
+ memset(prof_buffer, 0, size);
+
+ /* Register the /proc/profile entry */
+ entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL);
+ if (entry) {
+ entry->proc_fops = &proc_profile_operations;
+ entry->size = (1+prof_len) * sizeof(unsigned int);
+ }
+ return prof_type;
+
+ }
+ else {
+ /* Turn off profiling */
+ if(prof_buffer)
+ {
+ if(prof_type == 1) /* Buffer was allocated with kmalloc */ {
+ void *buffer;
+ remove_proc_entry("profile", &proc_root);
+ buffer = prof_buffer;
+ prof_buffer = NULL;
+ kfree(buffer);
+ prof_type = 0;
+ }
+ else if(prof_type == 2) /* buffer was allocated with vmalloc */ {
+ void *buffer;
+ remove_proc_entry("profile", &proc_root);
+ buffer = prof_buffer;
+ prof_buffer = NULL;
+ vfree(buffer);
+ prof_type = 0;
+ }
+ else {
+ /* This is a boottime profile buffer... */
+ return(3);
+ }
+ }
+ return(0);
+ }
+
+}
+#endif /* CONFIG_PROC_FS */
diff -u -b --recursive --new-file 2.3.99pre9_clean/kernel/sysctl.c linux/kernel/sysctl.c
--- 2.3.99pre9_clean/kernel/sysctl.c Fri May 12 14:21:20 2000
+++ linux/kernel/sysctl.c Wed May 24 16:53:18 2000
@@ -127,6 +127,12 @@
extern int inodes_stat[];
extern int dentry_stat[];
+static int profile;
+extern unsigned long prof_shift;
+extern long prof_switch(long prof_shift);
+extern int prof_pid;
+extern char prof_name[16];
+
/* The default sysctl tables: */
static ctl_table root_table[] = {
@@ -222,6 +228,14 @@
{KERN_OVERFLOWGID, "overflowgid", &overflowgid, sizeof(int), 0644, NULL,
&proc_dointvec_minmax, &sysctl_intvec, NULL,
&minolduid, &maxolduid},
+ {KERN_PROFSWITCH, "prof_shift", &profile, sizeof(int), 0644, NULL,
+ &proc_prof_switch},
+ {KERN_PROF_PID, "prof_pid", &prof_pid, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {KERN_PROF_NAME, "prof_name",
+ prof_name,
+ 16, 0644, NULL, &proc_doutsstring,
+ &sysctl_string},
{0}
};
@@ -796,6 +810,15 @@
return do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET);
}
+int proc_prof_switch(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ int i, j = 0;
+ i = do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET);
+ if(i >= 0) j = prof_switch((unsigned long)profile);
+ profile = prof_shift;
+ return j;
+}
/*
* init may raise the set.
*/
diff -u -b --recursive --new-file 2.3.99pre9_clean/kernel/timer.c linux/kernel/timer.c
--- 2.3.99pre9_clean/kernel/timer.c Fri May 12 14:21:20 2000
+++ linux/kernel/timer.c Wed May 24 16:40:01 2000
@@ -71,6 +71,8 @@
unsigned int * prof_buffer;
unsigned long prof_len;
unsigned long prof_shift;
+int prof_pid = -1;
+char prof_name[16];
/*
* Event timer code
--dDRMvlgZJXvWKvBx--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/