Commit cae0f5da authored by Ben Blum's avatar Ben Blum Committed by James Toy

Add an rwsem that lives in a threadgroup's sighand_struct (next to the

sighand's atomic count, to piggyback on its cacheline), and two functions
in kernel/cgroup.c (for now) for easily+safely obtaining and releasing it.

If another part of the kernel later wants to use such a locking mechanism,
the CONFIG_CGROUPS ifdefs should be changed to a higher-up flag that
CGROUPS and the other system would both depend on, and the lock/unlock
functions could be moved to sched.c or so.
Signed-off-by: default avatarBen Blum <bblum@google.com>
Signed-off-by: default avatarPaul Menage <menage@google.com>
Acked-by: default avatarLi Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 3fc37ef7
...@@ -30,10 +30,12 @@ extern int cgroup_init(void); ...@@ -30,10 +30,12 @@ extern int cgroup_init(void);
extern void cgroup_lock(void); extern void cgroup_lock(void);
extern bool cgroup_lock_live_group(struct cgroup *cgrp); extern bool cgroup_lock_live_group(struct cgroup *cgrp);
extern void cgroup_unlock(void); extern void cgroup_unlock(void);
extern void cgroup_fork(struct task_struct *p); extern void cgroup_fork(struct task_struct *p, unsigned long clone_flags);
extern void cgroup_fork_callbacks(struct task_struct *p); extern void cgroup_fork_callbacks(struct task_struct *p);
extern void cgroup_post_fork(struct task_struct *p); extern void cgroup_post_fork(struct task_struct *p, unsigned long clone_flags);
extern void cgroup_exit(struct task_struct *p, int run_callbacks); extern void cgroup_exit(struct task_struct *p, int run_callbacks);
extern void cgroup_fork_failed(struct task_struct *p, int run_callbacks,
unsigned long clone_flags);
extern int cgroupstats_build(struct cgroupstats *stats, extern int cgroupstats_build(struct cgroupstats *stats,
struct dentry *dentry); struct dentry *dentry);
...@@ -568,10 +570,14 @@ unsigned short css_depth(struct cgroup_subsys_state *css); ...@@ -568,10 +570,14 @@ unsigned short css_depth(struct cgroup_subsys_state *css);
static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init_early(void) { return 0; }
static inline int cgroup_init(void) { return 0; } static inline int cgroup_init(void) { return 0; }
static inline void cgroup_fork(struct task_struct *p) {} static inline void cgroup_fork(struct task_struct *p,
unsigned long clone_flags) {}
static inline void cgroup_fork_callbacks(struct task_struct *p) {} static inline void cgroup_fork_callbacks(struct task_struct *p) {}
static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_post_fork(struct task_struct *p,
unsigned long clone_flags) {}
static inline void cgroup_exit(struct task_struct *p, int callbacks) {} static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
static inline void cgroup_fork_failed(struct task_struct *p, int callbacks,
unsigned long clone_flags) {}
static inline void cgroup_lock(void) {} static inline void cgroup_lock(void) {}
static inline void cgroup_unlock(void) {} static inline void cgroup_unlock(void) {}
......
...@@ -41,7 +41,16 @@ extern struct nsproxy init_nsproxy; ...@@ -41,7 +41,16 @@ extern struct nsproxy init_nsproxy;
INIT_IPC_NS(ipc_ns) \ INIT_IPC_NS(ipc_ns) \
} }
#ifdef CONFIG_CGROUPS
# define INIT_THREADGROUP_FORK_LOCK(sighand) \
.threadgroup_fork_lock = \
__RWSEM_INITIALIZER(sighand.threadgroup_fork_lock),
#else
# define INIT_THREADGROUP_FORK_LOCK(sighand)
#endif
#define INIT_SIGHAND(sighand) { \ #define INIT_SIGHAND(sighand) { \
INIT_THREADGROUP_FORK_LOCK(sighand) \
.count = ATOMIC_INIT(1), \ .count = ATOMIC_INIT(1), \
.action = { { { .sa_handler = NULL, } }, }, \ .action = { { { .sa_handler = NULL, } }, }, \
.siglock = __SPIN_LOCK_UNLOCKED(sighand.siglock), \ .siglock = __SPIN_LOCK_UNLOCKED(sighand.siglock), \
......
...@@ -482,6 +482,21 @@ extern int get_dumpable(struct mm_struct *mm); ...@@ -482,6 +482,21 @@ extern int get_dumpable(struct mm_struct *mm);
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
struct sighand_struct { struct sighand_struct {
#ifdef CONFIG_CGROUPS
/*
* The threadgroup_fork_lock is used to prevent any threads in a
* threadgroup from forking with CLONE_THREAD while held for writing,
* used for threadgroup-wide operations that are fork-sensitive. It
* lives here next to sighand.count as a cacheline optimization.
*
* TODO: if anybody besides cgroups uses this lock, change the
* CONFIG_CGROUPS to a higher-up CONFIG_* that the other user and
* cgroups would both depend upon. Also, they'll want to move where
* the readlock happens - it currently lives in kernel/cgroup.c in
* cgroup_{fork,post_fork,fork_failed}().
*/
struct rw_semaphore threadgroup_fork_lock;
#endif
atomic_t count; atomic_t count;
struct k_sigaction action[_NSIG]; struct k_sigaction action[_NSIG];
spinlock_t siglock; spinlock_t siglock;
......
...@@ -1528,6 +1528,65 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) ...@@ -1528,6 +1528,65 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
return 0; return 0;
} }
/**
* threadgroup_fork_lock - block all CLONE_THREAD forks in the threadgroup
* @tsk: the task whose threadgroup should be locked
*
* Takes the threadgroup_lock_mutex in the threadgroup's sighand_struct, by
* means of searching the threadgroup list for a live thread in the group.
* Returns the sighand_struct that should be given to threadgroup_fork_unlock,
* or NULL if all threads in the group are exiting and have cleared their
* sighand pointers.
*/
struct sighand_struct *threadgroup_fork_lock(struct task_struct *tsk)
{
struct sighand_struct *sighand;
struct task_struct *p;
/* tasklist lock protects sighand_struct's disappearance in exit(). */
read_lock(&tasklist_lock);
if (likely(tsk->sighand)) {
/* simple case - check the thread we were given first */
sighand = tsk->sighand;
} else {
sighand = NULL;
/*
* tsk is exiting; try to find another thread in the group
* whose sighand pointer is still alive.
*/
rcu_read_lock();
list_for_each_entry_rcu(p, &tsk->thread_group, thread_group) {
if (p->sighand) {
sighand = tsk->sighand;
break;
}
}
rcu_read_unlock();
}
/* prevent sighand from vanishing before we let go of tasklist_lock */
if (likely(sighand))
atomic_inc(&sighand->count);
/* done searching. */
read_unlock(&tasklist_lock);
if (likely(sighand))
down_write(&sighand->threadgroup_fork_lock);
return sighand;
}
/**
* threadgroup_fork_lock - let threadgroup resume CLONE_THREAD forks.
* @sighand: the threadgroup's sighand that threadgroup_fork_lock gave back
*
* Lets go of the threadgroup_fork_lock, and drops the sighand reference.
*/
void threadgroup_fork_unlock(struct sighand_struct *sighand)
{
up_write(&sighand->threadgroup_fork_lock);
__cleanup_sighand(sighand);
}
/** /**
* cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
* @cgrp: the cgroup the task is attaching to * @cgrp: the cgroup the task is attaching to
...@@ -3421,8 +3480,10 @@ static struct file_operations proc_cgroupstats_operations = { ...@@ -3421,8 +3480,10 @@ static struct file_operations proc_cgroupstats_operations = {
* At the point that cgroup_fork() is called, 'current' is the parent * At the point that cgroup_fork() is called, 'current' is the parent
* task, and the passed argument 'child' points to the child task. * task, and the passed argument 'child' points to the child task.
*/ */
void cgroup_fork(struct task_struct *child) void cgroup_fork(struct task_struct *child, unsigned long clone_flags)
{ {
if (clone_flags & CLONE_THREAD)
down_read(&current->sighand->threadgroup_fork_lock);
task_lock(current); task_lock(current);
child->cgroups = current->cgroups; child->cgroups = current->cgroups;
get_css_set(child->cgroups); get_css_set(child->cgroups);
...@@ -3459,7 +3520,7 @@ void cgroup_fork_callbacks(struct task_struct *child) ...@@ -3459,7 +3520,7 @@ void cgroup_fork_callbacks(struct task_struct *child)
* with the first call to cgroup_iter_start() - to guarantee that the * with the first call to cgroup_iter_start() - to guarantee that the
* new task ends up on its list. * new task ends up on its list.
*/ */
void cgroup_post_fork(struct task_struct *child) void cgroup_post_fork(struct task_struct *child, unsigned long clone_flags)
{ {
if (use_task_css_set_links) { if (use_task_css_set_links) {
write_lock(&css_set_lock); write_lock(&css_set_lock);
...@@ -3469,6 +3530,8 @@ void cgroup_post_fork(struct task_struct *child) ...@@ -3469,6 +3530,8 @@ void cgroup_post_fork(struct task_struct *child)
task_unlock(child); task_unlock(child);
write_unlock(&css_set_lock); write_unlock(&css_set_lock);
} }
if (clone_flags & CLONE_THREAD)
up_read(&current->sighand->threadgroup_fork_lock);
} }
/** /**
* cgroup_exit - detach cgroup from exiting task * cgroup_exit - detach cgroup from exiting task
...@@ -3539,6 +3602,26 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) ...@@ -3539,6 +3602,26 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
put_css_set_taskexit(cg); put_css_set_taskexit(cg);
} }
/**
* cgroup_fork_failed - undo operations for fork failure
* @tsk: pointer to task_struct of exiting process
* @run_callback: run exit callbacks?
*
* Description: Undo cgroup operations after cgroup_fork in fork failure.
*
* We release the read lock that was taken in cgroup_fork(), since it is
* supposed to be dropped in cgroup_post_fork in the success case. The other
* thing that wants to be done is detaching the failed child task from the
* cgroup, so we wrap cgroup_exit.
*/
void cgroup_fork_failed(struct task_struct *tsk, int run_callbacks,
unsigned long clone_flags)
{
if (clone_flags & CLONE_THREAD)
up_read(&current->sighand->threadgroup_fork_lock);
cgroup_exit(tsk, run_callbacks);
}
/** /**
* cgroup_clone - clone the cgroup the given subsystem is attached to * cgroup_clone - clone the cgroup the given subsystem is attached to
* @tsk: the task to be moved * @tsk: the task to be moved
......
...@@ -787,6 +787,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) ...@@ -787,6 +787,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
return -ENOMEM; return -ENOMEM;
atomic_set(&sig->count, 1); atomic_set(&sig->count, 1);
memcpy(sig->action, current->sighand->action, sizeof(sig->action)); memcpy(sig->action, current->sighand->action, sizeof(sig->action));
#ifdef CONFIG_CGROUPS
init_rwsem(&sig->threadgroup_fork_lock);
#endif
return 0; return 0;
} }
...@@ -1055,7 +1058,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1055,7 +1058,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
monotonic_to_bootbased(&p->real_start_time); monotonic_to_bootbased(&p->real_start_time);
p->io_context = NULL; p->io_context = NULL;
p->audit_context = NULL; p->audit_context = NULL;
cgroup_fork(p); cgroup_fork(p, clone_flags);
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
p->mempolicy = mpol_dup(p->mempolicy); p->mempolicy = mpol_dup(p->mempolicy);
if (IS_ERR(p->mempolicy)) { if (IS_ERR(p->mempolicy)) {
...@@ -1275,7 +1278,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1275,7 +1278,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
spin_unlock(&current->sighand->siglock); spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
proc_fork_connector(p); proc_fork_connector(p);
cgroup_post_fork(p); cgroup_post_fork(p, clone_flags);
perf_counter_fork(p); perf_counter_fork(p);
return p; return p;
...@@ -1308,7 +1311,7 @@ bad_fork_cleanup_policy: ...@@ -1308,7 +1311,7 @@ bad_fork_cleanup_policy:
mpol_put(p->mempolicy); mpol_put(p->mempolicy);
bad_fork_cleanup_cgroup: bad_fork_cleanup_cgroup:
#endif #endif
cgroup_exit(p, cgroup_callbacks_done); cgroup_fork_failed(p, cgroup_callbacks_done, clone_flags);
delayacct_tsk_free(p); delayacct_tsk_free(p);
if (p->binfmt) if (p->binfmt)
module_put(p->binfmt->module); module_put(p->binfmt->module);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment