• Jiri Pirko's avatar
    getrusage: fill ru_maxrss value · 1f10206c
    Jiri Pirko authored
    Make ->ru_maxrss value in struct rusage filled accordingly to rss hiwater
    mark.  This struct is filled as a parameter to getrusage syscall.
    ->ru_maxrss value is set to KBs which is the way it is done in BSD
    systems.  /usr/bin/time (gnu time) application converts ->ru_maxrss to KBs
    which seems to be incorrect behavior.  Maintainer of this util was
    notified by me with the patch which corrects it and cc'ed.
    
    To make this happen we extend struct signal_struct by two fields.  The
    first one is ->maxrss which we use to store rss hiwater of the task.  The
    second one is ->cmaxrss which we use to store highest rss hiwater of all
    task childs.  These values are used in k_getrusage() to actually fill
    ->ru_maxrss.  k_getrusage() uses current rss hiwater value directly if mm
    struct exists.
    
    Note:
    exec() clear mm->hiwater_rss, but doesn't clear sig->maxrss.
    it is intetionally behavior. *BSD getrusage have exec() inheriting.
    
    test programs
    ========================================================
    
    getrusage.c
    ===========
     #include <stdio.h>
     #include <stdlib.h>
     #include <string.h>
     #include <sys/types.h>
     #include <sys/time.h>
     #include <sys/resource.h>
     #include <sys/types.h>
     #include <sys/wait.h>
     #include <unistd.h>
     #include <signal.h>
     #include <sys/mman.h>
    
     #include "common.h"
    
     #define err(str) perror(str), exit(1)
    
    int main(int argc, char** argv)
    {
    	int status;
    
    	printf("allocate 100MB\n");
    	consume(100);
    
    	printf("testcase1: fork inherit? \n");
    	printf("  expect: initial.self ~= child.self\n");
    	show_rusage("initial");
    	if (__fork()) {
    		wait(&status);
    	} else {
    		show_rusage("fork child");
    		_exit(0);
    	}
    	printf("\n");
    
    	printf("testcase2: fork inherit? (cont.) \n");
    	printf("  expect: initial.children ~= 100MB, but child.children = 0\n");
    	show_rusage("initial");
    	if (__fork()) {
    		wait(&status);
    	} else {
    		show_rusage("child");
    		_exit(0);
    	}
    	printf("\n");
    
    	printf("testcase3: fork + malloc \n");
    	printf("  expect: child.self ~= initial.self + 50MB\n");
    	show_rusage("initial");
    	if (__fork()) {
    		wait(&status);
    	} else {
    		printf("allocate +50MB\n");
    		consume(50);
    		show_rusage("fork child");
    		_exit(0);
    	}
    	printf("\n");
    
    	printf("testcase4: grandchild maxrss\n");
    	printf("  expect: post_wait.children ~= 300MB\n");
    	show_rusage("initial");
    	if (__fork()) {
    		wait(&status);
    		show_rusage("post_wait");
    	} else {
    		system("./child -n 0 -g 300");
    		_exit(0);
    	}
    	printf("\n");
    
    	printf("testcase5: zombie\n");
    	printf("  expect: pre_wait ~= initial, IOW the zombie process is not accounted.\n");
    	printf("          post_wait ~= 400MB, IOW wait() collect child's max_rss. \n");
    	show_rusage("initial");
    	if (__fork()) {
    		sleep(1); /* children become zombie */
    		show_rusage("pre_wait");
    		wait(&status);
    		show_rusage("post_wait");
    	} else {
    		system("./child -n 400");
    		_exit(0);
    	}
    	printf("\n");
    
    	printf("testcase6: SIG_IGN\n");
    	printf("  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).\n");
    	show_rusage("initial");
    	signal(SIGCHLD, SIG_IGN);
    	if (__fork()) {
    		sleep(1); /* children become zombie */
    		show_rusage("after_zombie");
    	} else {
    		system("./child -n 500");
    		_exit(0);
    	}
    	printf("\n");
    	signal(SIGCHLD, SIG_DFL);
    
    	printf("testcase7: exec (without fork) \n");
    	printf("  expect: initial ~= exec \n");
    	show_rusage("initial");
    	execl("./child", "child", "-v", NULL);
    
    	return 0;
    }
    
    child.c
    =======
     #include <sys/types.h>
     #include <unistd.h>
     #include <sys/types.h>
     #include <sys/wait.h>
     #include <stdio.h>
     #include <stdlib.h>
     #include <string.h>
     #include <sys/types.h>
     #include <sys/time.h>
     #include <sys/resource.h>
    
     #include "common.h"
    
    int main(int argc, char** argv)
    {
    	int status;
    	int c;
    	long consume_size = 0;
    	long grandchild_consume_size = 0;
    	int show = 0;
    
    	while ((c = getopt(argc, argv, "n:g:v")) != -1) {
    		switch (c) {
    		case 'n':
    			consume_size = atol(optarg);
    			break;
    		case 'v':
    			show = 1;
    			break;
    		case 'g':
    
    			grandchild_consume_size = atol(optarg);
    			break;
    		default:
    			break;
    		}
    	}
    
    	if (show)
    		show_rusage("exec");
    
    	if (consume_size) {
    		printf("child alloc %ldMB\n", consume_size);
    		consume(consume_size);
    	}
    
    	if (grandchild_consume_size) {
    		if (fork()) {
    			wait(&status);
    		} else {
    			printf("grandchild alloc %ldMB\n", grandchild_consume_size);
    			consume(grandchild_consume_size);
    
    			exit(0);
    		}
    	}
    
    	return 0;
    }
    
    common.c
    ========
     #include <stdio.h>
     #include <stdlib.h>
     #include <string.h>
     #include <sys/types.h>
     #include <sys/time.h>
     #include <sys/resource.h>
     #include <sys/types.h>
     #include <sys/wait.h>
     #include <unistd.h>
     #include <signal.h>
     #include <sys/mman.h>
    
     #include "common.h"
     #define err(str) perror(str), exit(1)
    
    void show_rusage(char *prefix)
    {
        	int err, err2;
        	struct rusage rusage_self;
        	struct rusage rusage_children;
    
        	printf("%s: ", prefix);
        	err = getrusage(RUSAGE_SELF, &rusage_self);
        	if (!err)
        		printf("self %ld ", rusage_self.ru_maxrss);
        	err2 = getrusage(RUSAGE_CHILDREN, &rusage_children);
        	if (!err2)
        		printf("children %ld ", rusage_children.ru_maxrss);
    
        	printf("\n");
    }
    
    /* Some buggy OS need this worthless CPU waste. */
    void make_pagefault(void)
    {
    	void *addr;
    	int size = getpagesize();
    	int i;
    
    	for (i=0; i<1000; i++) {
    		addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
    		if (addr == MAP_FAILED)
    			err("make_pagefault");
    		memset(addr, 0, size);
    		munmap(addr, size);
    	}
    }
    
    void consume(int mega)
    {
        	size_t sz = mega * 1024 * 1024;
        	void *ptr;
    
        	ptr = malloc(sz);
        	memset(ptr, 0, sz);
    	make_pagefault();
    }
    
    pid_t __fork(void)
    {
    	pid_t pid;
    
    	pid = fork();
    	make_pagefault();
    
    	return pid;
    }
    
    common.h
    ========
    void show_rusage(char *prefix);
    void make_pagefault(void);
    void consume(int mega);
    pid_t __fork(void);
    
    FreeBSD result (expected result)
    ========================================================
    allocate 100MB
    testcase1: fork inherit?
      expect: initial.self ~= child.self
    initial: self 103492 children 0
    fork child: self 103540 children 0
    
    testcase2: fork inherit? (cont.)
      expect: initial.children ~= 100MB, but child.children = 0
    initial: self 103540 children 103540
    child: self 103564 children 0
    
    testcase3: fork + malloc
      expect: child.self ~= initial.self + 50MB
    initial: self 103564 children 103564
    allocate +50MB
    fork child: self 154860 children 0
    
    testcase4: grandchild maxrss
      expect: post_wait.children ~= 300MB
    initial: self 103564 children 154860
    grandchild alloc 300MB
    post_wait: self 103564 children 308720
    
    testcase5: zombie
      expect: pre_wait ~= initial, IOW the zombie process is not accounted.
              post_wait ~= 400MB, IOW wait() collect child's max_rss.
    initial: self 103564 children 308720
    child alloc 400MB
    pre_wait: self 103564 children 308720
    post_wait: self 103564 children 411312
    
    testcase6: SIG_IGN
      expect: initial ~= after_zombie (child's 500MB alloc should be ignored).
    initial: self 103564 children 411312
    child alloc 500MB
    after_zombie: self 103624 children 411312
    
    testcase7: exec (without fork)
      expect: initial ~= exec
    initial: self 103624 children 411312
    exec: self 103624 children 411312
    
    Linux result (actual test result)
    ========================================================
    allocate 100MB
    testcase1: fork inherit?
      expect: initial.self ~= child.self
    initial: self 102848 children 0
    fork child: self 102572 children 0
    
    testcase2: fork inherit? (cont.)
      expect: initial.children ~= 100MB, but child.children = 0
    initial: self 102876 children 102644
    child: self 102572 children 0
    
    testcase3: fork + malloc
      expect: child.self ~= initial.self + 50MB
    initial: self 102876 children 102644
    allocate +50MB
    fork child: self 153804 children 0
    
    testcase4: grandchild maxrss
      expect: post_wait.children ~= 300MB
    initial: self 102876 children 153864
    grandchild alloc 300MB
    post_wait: self 102876 children 307536
    
    testcase5: zombie
      expect: pre_wait ~= initial, IOW the zombie process is not accounted.
              post_wait ~= 400MB, IOW wait() collect child's max_rss.
    initial: self 102876 children 307536
    child alloc 400MB
    pre_wait: self 102876 children 307536
    post_wait: self 102876 children 410076
    
    testcase6: SIG_IGN
      expect: initial ~= after_zombie (child's 500MB alloc should be ignored).
    initial: self 102876 children 410076
    child alloc 500MB
    after_zombie: self 102880 children 410076
    
    testcase7: exec (without fork)
      expect: initial ~= exec
    initial: self 102880 children 410076
    exec: self 102880 children 410076
    Signed-off-by: default avatarJiri Pirko <jpirko@redhat.com>
    Signed-off-by: default avatarKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
    Cc: Oleg Nesterov <oleg@redhat.com>
    Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
    Cc: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
    Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
    1f10206c
sys.c 36.5 KB