Commit a8ddac7e authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

mutex: speed up generic mutex implementations

- atomic operations which both modify the variable and return something imply
  full smp memory barriers before and after the memory operations involved
  (failing atomic_cmpxchg, atomic_add_unless, etc don't imply a barrier because
  they don't modify the target). See Documentation/atomic_ops.txt.
  So remove extra barriers and branches.

- All architectures support atomic_cmpxchg. This has no relation to
  __HAVE_ARCH_CMPXCHG. We can just take the atomic_cmpxchg path unconditionally

This reduces a simple single threaded fastpath lock+unlock test from 590 cycles
to 203 cycles on a ppc970 system.
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5a439c56
...@@ -22,8 +22,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) ...@@ -22,8 +22,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
{ {
if (unlikely(atomic_dec_return(count) < 0)) if (unlikely(atomic_dec_return(count) < 0))
fail_fn(count); fail_fn(count);
else
smp_mb();
} }
/** /**
...@@ -41,10 +39,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) ...@@ -41,10 +39,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
{ {
if (unlikely(atomic_dec_return(count) < 0)) if (unlikely(atomic_dec_return(count) < 0))
return fail_fn(count); return fail_fn(count);
else { return 0;
smp_mb();
return 0;
}
} }
/** /**
...@@ -63,7 +58,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) ...@@ -63,7 +58,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
static inline void static inline void
__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
{ {
smp_mb();
if (unlikely(atomic_inc_return(count) <= 0)) if (unlikely(atomic_inc_return(count) <= 0))
fail_fn(count); fail_fn(count);
} }
...@@ -88,25 +82,9 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) ...@@ -88,25 +82,9 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{ {
/* if (likely(atomic_cmpxchg(count, 1, 0) == 1))
* We have two variants here. The cmpxchg based one is the best one
* because it never induce a false contention state. It is included
* here because architectures using the inc/dec algorithms over the
* xchg ones are much more likely to support cmpxchg natively.
*
* If not we fall back to the spinlock based variant - that is
* just as efficient (and simpler) as a 'destructive' probing of
* the mutex state would be.
*/
#ifdef __HAVE_ARCH_CMPXCHG
if (likely(atomic_cmpxchg(count, 1, 0) == 1)) {
smp_mb();
return 1; return 1;
}
return 0; return 0;
#else
return fail_fn(count);
#endif
} }
#endif #endif
...@@ -27,8 +27,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) ...@@ -27,8 +27,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
{ {
if (unlikely(atomic_xchg(count, 0) != 1)) if (unlikely(atomic_xchg(count, 0) != 1))
fail_fn(count); fail_fn(count);
else
smp_mb();
} }
/** /**
...@@ -46,10 +44,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) ...@@ -46,10 +44,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
{ {
if (unlikely(atomic_xchg(count, 0) != 1)) if (unlikely(atomic_xchg(count, 0) != 1))
return fail_fn(count); return fail_fn(count);
else { return 0;
smp_mb();
return 0;
}
} }
/** /**
...@@ -67,7 +62,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) ...@@ -67,7 +62,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
static inline void static inline void
__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
{ {
smp_mb();
if (unlikely(atomic_xchg(count, 1) != 0)) if (unlikely(atomic_xchg(count, 1) != 0))
fail_fn(count); fail_fn(count);
} }
...@@ -110,7 +104,6 @@ __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) ...@@ -110,7 +104,6 @@ __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
if (prev < 0) if (prev < 0)
prev = 0; prev = 0;
} }
smp_mb();
return prev; return prev;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment