static void *ls_next(struct seq_file *m, void *v, loff_t *pos)
{
(*pos)++;
return ls_start(m, pos);
}
static void ls_stop(struct seq_file *m, void *v)
{
}
static int ls_show(struct seq_file *m, void *v)
{
if (v == SEQ_START_TOKEN)
seq_header(m);
else
seq_stats(m, v);
return 0;
}
static const struct seq_operations lockstat_ops = {
.start = ls_start,
.next = ls_next,
.stop = ls_stop,
.show = ls_show,
};
static int lock_stat_open(struct inode *inode, struct file *file)
{
int res;
struct lock_class *class;
struct lock_stat_seq *data = vmalloc(sizeof(struct lock_stat_seq));
if (!data)
return -ENOMEM;
res = seq_open(file, &lockstat_ops);
if (!res) {
struct lock_stat_data *iter = data->stats;
struct seq_file *m = file->private_data;
list_for_each_entry(class, &all_lock_classes, lock_entry) {
iter->class = class;
iter->stats = lock_stats(class);
iter++;
}
data->iter_end = iter;
sort(data->stats, data->iter_end - data->stats,
sizeof(struct lock_stat_data),
lock_stat_cmp, NULL);
m->private = data;
} else
vfree(data);
return res;
}
static ssize_t lock_stat_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct lock_class *class;
char c;
if (count) {
if (get_user(c, buf))
return -EFAULT;
if (c != '0')
return count;
list_for_each_entry(class, &all_lock_classes, lock_entry)
clear_lock_stats(class);
}
return count;
}
static int lock_stat_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
vfree(seq->private);
return seq_release(inode, file);
}
static const struct file_operations proc_lock_stat_operations = {
.open = lock_stat_open,
.write = lock_stat_write,
.read = seq_read,
.llseek = seq_lseek,
.release = lock_stat_release,
};
#endif /* CONFIG_LOCK_STAT */
static int __init lockdep_proc_init(void)
{
proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations);
#ifdef CONFIG_PROVE_LOCKING
proc_create("lockdep_chains", S_IRUSR, NULL,
&proc_lockdep_chains_operations);
#endif
proc_create("lockdep_stats", S_IRUSR, NULL,
&proc_lockdep_stats_operations);
#ifdef CONFIG_LOCK_STAT
proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
&proc_lock_stat_operations);
#endif
return 0;
}
__initcall(lockdep_proc_init);
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/lockdep_states.h
/*
* Lockdep states,
*
* please update XXX_LOCK_USAGE_STATES in include/linux/lockdep.h whenever
* you add one, or come up with a nice dynamic solution.
*/
LOCKDEP_STATE(HARDIRQ)
LOCKDEP_STATE(SOFTIRQ)
LOCKDEP_STATE(RECLAIM_FS)
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/locktorture.c
/*
* Module-based torture test facility for locking
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright (C) IBM Corporation, 2014
*
* Author: Paul E. McKenney <paulmck@us.ibm.com>
* Based on kernel/rcu/torture.c.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/spinlock.h>
#include <linux/rwlock.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/atomic.h>
#include <linux/moduleparam.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/torture.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>");
torture_param(int, nwriters_stress, -1,
"Number of write-locking stress-test threads");
torture_param(int, nreaders_stress, -1,
"Number of read-locking stress-test threads");
torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
torture_param(int, onoff_interval, 0,
"Time between CPU hotplugs (s), 0=disable");
torture_param(int, shuffle_interval, 3,
"Number of jiffies between shuffles, 0=disable");
torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable.");
torture_param(int, stat_interval, 60,
"Number of seconds between stats printk()s");
torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
torture_param(bool, verbose, true,
"Enable verbose debugging printk()s");
static char *torture_type = "spin_lock";
module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type,
"Type of lock to torture (spin_lock, spin_lock_irq, mutex_lock, ...)");
static struct task_struct *stats_task;
static struct task_struct **writer_tasks;
static struct task_struct **reader_tasks;
static bool lock_is_write_held;
static bool lock_is_read_held;
struct lock_stress_stats {
long n_lock_fail;
long n_lock_acquired;
};
#if defined(MODULE)
#define LOCKTORTURE_RUNNABLE_INIT 1
#else
#define LOCKTORTURE_RUNNABLE_INIT 0
#endif
int torture_runnable = LOCKTORTURE_RUNNABLE_INIT;
module_param(torture_runnable, int, 0444);
MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init");
/* Forward reference. */
static void lock_torture_cleanup(void);
/*
* Operations vector for selecting different types of tests.
*/
struct lock_torture_ops {
void (*init)(void);
int (*writelock)(void);
void (*write_delay)(struct torture_random_state *trsp);
void (*writeunlock)(void);
int (*readlock)(void);
void (*read_delay)(struct torture_random_state *trsp);
void (*readunlock)(void);
unsigned long flags;
const char *name;
};
struct lock_torture_cxt {
int nrealwriters_stress;
int nrealreaders_stress;
bool debug_lock;
atomic_t n_lock_torture_errors;
struct lock_torture_ops *cur_ops;
struct lock_stress_stats *lwsa; /* writer statistics */
struct lock_stress_stats *lrsa; /* reader statistics */
};
static struct lock_torture_cxt cxt = { 0, 0, false,
ATOMIC_INIT(0),
NULL, NULL};
/*
* Definitions for lock torture testing.
*/
static int torture_lock_busted_write_lock(void)
{
return 0; /* BUGGY, do not use in real life!!! */
}
static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
{
const unsigned long longdelay_ms = 100;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms);
#ifdef CONFIG_PREEMPT
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
preempt_schedule(); /* Allow test to be preempted. */
#endif
}
static void torture_lock_busted_write_unlock(void)
{
/* BUGGY, do not use in real life!!! */
}
static struct lock_torture_ops lock_busted_ops = {
.writelock = torture_lock_busted_write_lock,
.write_delay = torture_lock_busted_write_delay,
.writeunlock = torture_lock_busted_write_unlock,
.readlock = NULL,
.read_delay = NULL,
.readunlock = NULL,
.name = "lock_busted"
};
static DEFINE_SPINLOCK(torture_spinlock);
static int torture_spin_lock_write_lock(void) __acquires(torture_spinlock)
{
spin_lock(&torture_spinlock);
return 0;
}
static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 2;
const unsigned long longdelay_ms = 100;
/* We want a short delay mostly to emulate likely code, and
* we want a long delay occasionally to force massive contention.
*/
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms);
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2 * shortdelay_us)))
udelay(shortdelay_us);
#ifdef CONFIG_PREEMPT
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
preempt_schedule(); /* Allow test to be preempted. */
#endif
}
static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock)
{
spin_unlock(&torture_spinlock);
}
static struct lock_torture_ops spin_lock_ops = {
.writelock = torture_spin_lock_write_lock,
.write_delay = torture_spin_lock_write_delay,
.writeunlock = torture_spin_lock_write_unlock,
.readlock = NULL,
.read_delay = NULL,
.readunlock = NULL,
.name = "spin_lock"
};
static int torture_spin_lock_write_lock_irq(void)
__acquires(torture_spinlock)
{
unsigned long flags;
spin_lock_irqsave(&torture_spinlock, flags);
cxt.cur_ops->flags = flags;
return 0;
}
static void torture_lock_spin_write_unlock_irq(void)
__releases(torture_spinlock)
{
spin_unlock_irqrestore(&torture_spinlock, cxt.cur_ops->flags);
}
static struct lock_torture_ops spin_lock_irq_ops = {
.writelock = torture_spin_lock_write_lock_irq,
.write_delay = torture_spin_lock_write_delay,
.writeunlock = torture_lock_spin_write_unlock_irq,
.readlock = NULL,
.read_delay = NULL,
.readunlock = NULL,
.name = "spin_lock_irq"
};
static DEFINE_RWLOCK(torture_rwlock);
static int torture_rwlock_write_lock(void) __acquires(torture_rwlock)
{
write_lock(&torture_rwlock);
return 0;
}
static void torture_rwlock_write_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 2;
const unsigned long longdelay_ms = 100;
/* We want a short delay mostly to emulate likely code, and
* we want a long delay occasionally to force massive contention.
*/
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms);
else
udelay(shortdelay_us);
}
static void torture_rwlock_write_unlock(void) __releases(torture_rwlock)
{
write_unlock(&torture_rwlock);
}
static int torture_rwlock_read_lock(void) __acquires(torture_rwlock)
{
read_lock(&torture_rwlock);
return 0;
}
static void torture_rwlock_read_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 10;
const unsigned long longdelay_ms = 100;
/* We want a short delay mostly to emulate likely code, and
* we want a long delay occasionally to force massive contention.
*/
if (!(torture_random(trsp) %
(cxt.nrealreaders_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms);
else
udelay(shortdelay_us);
}
static void torture_rwlock_read_unlock(void) __releases(torture_rwlock)
{
read_unlock(&torture_rwlock);
}
static struct lock_torture_ops rw_lock_ops = {
.writelock = torture_rwlock_write_lock,
.write_delay = torture_rwlock_write_delay,
.writeunlock = torture_rwlock_write_unlock,
.readlock = torture_rwlock_read_lock,
.read_delay = torture_rwlock_read_delay,
.readunlock = torture_rwlock_read_unlock,
.name = "rw_lock"
};
static int torture_rwlock_write_lock_irq(void) __acquires(torture_rwlock)
{
unsigned long flags;
write_lock_irqsave(&torture_rwlock, flags);
cxt.cur_ops->flags = flags;
return 0;
}
static void torture_rwlock_write_unlock_irq(void)
__releases(torture_rwlock)
{
write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
}
static int torture_rwlock_read_lock_irq(void) __acquires(torture_rwlock)
{
unsigned long flags;
read_lock_irqsave(&torture_rwlock, flags);
cxt.cur_ops->flags = flags;
return 0;
}
static void torture_rwlock_read_unlock_irq(void)
__releases(torture_rwlock)
{
read_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
}
static struct lock_torture_ops rw_lock_irq_ops = {
.writelock = torture_rwlock_write_lock_irq,
.write_delay = torture_rwlock_write_delay,
.writeunlock = torture_rwlock_write_unlock_irq,
.readlock = torture_rwlock_read_lock_irq,
.read_delay = torture_rwlock_read_delay,
.readunlock = torture_rwlock_read_unlock_irq,
.name = "rw_lock_irq"
};
static DEFINE_MUTEX(torture_mutex);
static int torture_mutex_lock(void) __acquires(torture_mutex)
{
mutex_lock(&torture_mutex);
return 0;
}
static void torture_mutex_delay(struct torture_random_state *trsp)
{
const unsigned long longdelay_ms = 100;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms * 5);
else
mdelay(longdelay_ms / 5);
#ifdef CONFIG_PREEMPT
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
preempt_schedule(); /* Allow test to be preempted. */
#endif
}
static void torture_mutex_unlock(void) __releases(torture_mutex)
{
mutex_unlock(&torture_mutex);
}
static struct lock_torture_ops mutex_lock_ops = {
.writelock = torture_mutex_lock,
.write_delay = torture_mutex_delay,
.writeunlock = torture_mutex_unlock,
.readlock = NULL,
.read_delay = NULL,
.readunlock = NULL,
.name = "mutex_lock"
};
static DECLARE_RWSEM(torture_rwsem);
static int torture_rwsem_down_write(void) __acquires(torture_rwsem)
{
down_write(&torture_rwsem);
return 0;
}
static void torture_rwsem_write_delay(struct torture_random_state *trsp)
{
const unsigned long longdelay_ms = 100;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms * 10);
else
mdelay(longdelay_ms / 10);
#ifdef CONFIG_PREEMPT
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
preempt_schedule(); /* Allow test to be preempted. */
#endif
}
static void torture_rwsem_up_write(void) __releases(torture_rwsem)
{
up_write(&torture_rwsem);
}
static int torture_rwsem_down_read(void) __acquires(torture_rwsem)
{
down_read(&torture_rwsem);
return 0;
}
static void torture_rwsem_read_delay(struct torture_random_state *trsp)
{
const unsigned long longdelay_ms = 100;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms * 2);
else
mdelay(longdelay_ms / 2);
#ifdef CONFIG_PREEMPT
if (!(torture_random(trsp) % (cxt.nrealreaders_stress * 20000)))
preempt_schedule(); /* Allow test to be preempted. */
#endif
}
static void torture_rwsem_up_read(void) __releases(torture_rwsem)
{
up_read(&torture_rwsem);
}
static struct lock_torture_ops rwsem_lock_ops = {
.writelock = torture_rwsem_down_write,
.write_delay = torture_rwsem_write_delay,
.writeunlock = torture_rwsem_up_write,
.readlock = torture_rwsem_down_read,
.read_delay = torture_rwsem_read_delay,
.readunlock = torture_rwsem_up_read,
.name = "rwsem_lock"
};
/*
* Lock torture writer kthread. Repeatedly acquires and releases
* the lock, checking for duplicate acquisitions.
*/
static int lock_torture_writer(void *arg)
{
struct lock_stress_stats *lwsp = arg;
static DEFINE_TORTURE_RANDOM(rand);
VERBOSE_TOROUT_STRING("lock_torture_writer task started");
set_user_nice(current, MAX_NICE);
do {
if ((torture_random(&rand) & 0xfffff) == 0)
schedule_timeout_uninterruptible(1);
cxt.cur_ops->writelock();
if (WARN_ON_ONCE(lock_is_write_held))
lwsp->n_lock_fail++;
lock_is_write_held = 1;
if (WARN_ON_ONCE(lock_is_read_held))
lwsp->n_lock_fail++; /* rare, but... */
lwsp->n_lock_acquired++;
cxt.cur_ops->write_delay(&rand);
lock_is_write_held = 0;
cxt.cur_ops->writeunlock();
stutter_wait("lock_torture_writer");
} while (!torture_must_stop());
torture_kthread_stopping("lock_torture_writer");
return 0;
}
/*
* Lock torture reader kthread. Repeatedly acquires and releases
* the reader lock.
*/
static int lock_torture_reader(void *arg)
{
struct lock_stress_stats *lrsp = arg;
static DEFINE_TORTURE_RANDOM(rand);
VERBOSE_TOROUT_STRING("lock_torture_reader task started");
set_user_nice(current, MAX_NICE);
do {
if ((torture_random(&rand) & 0xfffff) == 0)
schedule_timeout_uninterruptible(1);
cxt.cur_ops->readlock();
lock_is_read_held = 1;
if (WARN_ON_ONCE(lock_is_write_held))
lrsp->n_lock_fail++; /* rare, but... */
lrsp->n_lock_acquired++;
cxt.cur_ops->read_delay(&rand);
lock_is_read_held = 0;
cxt.cur_ops->readunlock();
stutter_wait("lock_torture_reader");
} while (!torture_must_stop());
torture_kthread_stopping("lock_torture_reader");
return 0;
}
/*
* Create an lock-torture-statistics message in the specified buffer.
*/
static void __torture_print_stats(char *page,
struct lock_stress_stats *statp, bool write)
{
bool fail = 0;
int i, n_stress;
long max = 0;
long min = statp[0].n_lock_acquired;
long long sum = 0;
n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress;
for (i = 0; i < n_stress; i++) {
if (statp[i].n_lock_fail)
fail = true;
sum += statp[i].n_lock_acquired;
if (max < statp[i].n_lock_fail)
max = statp[i].n_lock_fail;
if (min > statp[i].n_lock_fail)
min = statp[i].n_lock_fail;
}
page += sprintf(page,
"%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n",
write ? "Writes" : "Reads ",
sum, max, min, max / 2 > min ? "???" : "",
fail, fail ? "!!!" : "");
if (fail)
atomic_inc(&cxt.n_lock_torture_errors);
}
/*
* Print torture statistics. Caller must ensure that there is only one
* call to this function at a given time!!! This is normally accomplished
* by relying on the module system to only have one copy of the module
* loaded, and then by giving the lock_torture_stats kthread full control
* (or the init/cleanup functions when lock_torture_stats thread is not
* running).
*/
static void lock_torture_stats_print(void)
{
int size = cxt.nrealwriters_stress * 200 + 8192;
char *buf;
if (cxt.cur_ops->readlock)
size += cxt.nrealreaders_stress * 200 + 8192;
buf = kmalloc(size, GFP_KERNEL);
if (!buf) {
pr_err("lock_torture_stats_print: Out of memory, need: %d",
size);
return;
}
__torture_print_stats(buf, cxt.lwsa, true);
pr_alert("%s", buf);
kfree(buf);
if (cxt.cur_ops->readlock) {
buf = kmalloc(size, GFP_KERNEL);
if (!buf) {
pr_err("lock_torture_stats_print: Out of memory, need: %d",
size);
return;
}
__torture_print_stats(buf, cxt.lrsa, false);
pr_alert("%s", buf);
kfree(buf);
}
}
/*
* Periodically prints torture statistics, if periodic statistics printing
* was specified via the stat_interval module parameter.
*
* No need to worry about fullstop here, since this one doesn't reference
* volatile state or register callbacks.
*/
static int lock_torture_stats(void *arg)
{
VERBOSE_TOROUT_STRING("lock_torture_stats task started");
do {
schedule_timeout_interruptible(stat_interval * HZ);
lock_torture_stats_print();
torture_shutdown_absorb("lock_torture_stats");
} while (!torture_must_stop());
torture_kthread_stopping("lock_torture_stats");
return 0;
}
static inline void
lock_torture_print_module_parms(struct lock_torture_ops *cur_ops,
const char *tag)
{
pr_alert("%s" TORTURE_FLAG
"--- %s%s: nwriters_stress=%d nreaders_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n",
torture_type, tag, cxt.debug_lock ? " [debug]": "",
cxt.nrealwriters_stress, cxt.nrealreaders_stress, stat_interval,
verbose, shuffle_interval, stutter, shutdown_secs,
onoff_interval, onoff_holdoff);
}
static void lock_torture_cleanup(void)
{
int i;
if (torture_cleanup_begin())
return;
if (writer_tasks) {
for (i = 0; i < cxt.nrealwriters_stress; i++)
torture_stop_kthread(lock_torture_writer,
writer_tasks[i]);
kfree(writer_tasks);
writer_tasks = NULL;
}
if (reader_tasks) {
for (i = 0; i < cxt.nrealreaders_stress; i++)
torture_stop_kthread(lock_torture_reader,
reader_tasks[i]);
kfree(reader_tasks);
reader_tasks = NULL;
}
torture_stop_kthread(lock_torture_stats, stats_task);
lock_torture_stats_print(); /* -After- the stats thread is stopped! */
if (atomic_read(&cxt.n_lock_torture_errors))
lock_torture_print_module_parms(cxt.cur_ops,
"End of test: FAILURE");
else if (torture_onoff_failures())
lock_torture_print_module_parms(cxt.cur_ops,
"End of test: LOCK_HOTPLUG");
else
lock_torture_print_module_parms(cxt.cur_ops,
"End of test: SUCCESS");
torture_cleanup_end();
}
static int __init lock_torture_init(void)
{
int i, j;
int firsterr = 0;
static struct lock_torture_ops *torture_ops[] = {
&lock_busted_ops,
&spin_lock_ops, &spin_lock_irq_ops,
&rw_lock_ops, &rw_lock_irq_ops,
&mutex_lock_ops,
&rwsem_lock_ops,
};
if (!torture_init_begin(torture_type, verbose, &torture_runnable))
return -EBUSY;
/* Process args and tell the world that the torturer is on the job. */
for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
cxt.cur_ops = torture_ops[i];
if (strcmp(torture_type, cxt.cur_ops->name) == 0)
break;
}
if (i == ARRAY_SIZE(torture_ops)) {
pr_alert("lock-torture: invalid torture type: \"%s\"\n",
torture_type);
pr_alert("lock-torture types:");
for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
pr_alert(" %s", torture_ops[i]->name);
pr_alert("\n");
torture_init_end();
return -EINVAL;
}
if (cxt.cur_ops->init)
cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */
if (nwriters_stress >= 0)
cxt.nrealwriters_stress = nwriters_stress;
else
cxt.nrealwriters_stress = 2 * num_online_cpus();
#ifdef CONFIG_DEBUG_MUTEXES
if (strncmp(torture_type, "mutex", 5) == 0)
cxt.debug_lock = true;
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
if ((strncmp(torture_type, "spin", 4) == 0) ||
(strncmp(torture_type, "rw_lock", 7) == 0))
cxt.debug_lock = true;
#endif
/* Initialize the statistics so that each run gets its own numbers. */
lock_is_write_held = 0;
cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL);
if (cxt.lwsa == NULL) {
VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory");
firsterr = -ENOMEM;
goto unwind;
}
for (i = 0; i < cxt.nrealwriters_stress; i++) {
cxt.lwsa[i].n_lock_fail = 0;
cxt.lwsa[i].n_lock_acquired = 0;
}
if (cxt.cur_ops->readlock) {
if (nreaders_stress >= 0)
cxt.nrealreaders_stress = nreaders_stress;
else {
/*
* By default distribute evenly the number of
* readers and writers. We still run the same number
* of threads as the writer-only locks default.
*/
if (nwriters_stress < 0) /* user doesn't care */
cxt.nrealwriters_stress = num_online_cpus();
cxt.nrealreaders_stress = cxt.nrealwriters_stress;
}
lock_is_read_held = 0;
cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL);
if (cxt.lrsa == NULL) {
VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
firsterr = -ENOMEM;
kfree(cxt.lwsa);
goto unwind;
}
for (i = 0; i < cxt.nrealreaders_stress; i++) {
cxt.lrsa[i].n_lock_fail = 0;
cxt.lrsa[i].n_lock_acquired = 0;
}
}
lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
/* Prepare torture context. */
if (onoff_interval > 0) {
firsterr = torture_onoff_init(onoff_holdoff * HZ,
onoff_interval * HZ);
if (firsterr)
goto unwind;
}
if (shuffle_interval > 0) {
firsterr = torture_shuffle_init(shuffle_interval);
if (firsterr)
goto unwind;
}
if (shutdown_secs > 0) {
firsterr = torture_shutdown_init(shutdown_secs,
lock_torture_cleanup);
if (firsterr)
goto unwind;
}
if (stutter > 0) {
firsterr = torture_stutter_init(stutter);
if (firsterr)
goto unwind;
}
writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]),
GFP_KERNEL);
if (writer_tasks == NULL) {
VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory");
firsterr = -ENOMEM;
goto unwind;
}
if (cxt.cur_ops->readlock) {
reader_tasks = kzalloc(cxt.nrealreaders_stress * sizeof(reader_tasks[0]),
GFP_KERNEL);
if (reader_tasks == NULL) {
VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
firsterr = -ENOMEM;
goto unwind;
}
}
/*
* Create the kthreads and start torturing (oh, those poor little locks).
*
* TODO: Note that we interleave writers with readers, giving writers a
* slight advantage, by creating its kthread first. This can be modified
* for very specific needs, or even let the user choose the policy, if
* ever wanted.
*/
for (i = 0, j = 0; i < cxt.nrealwriters_stress ||
j < cxt.nrealreaders_stress; i++, j++) {
if (i >= cxt.nrealwriters_stress)
goto create_reader;
/* Create writer. */
firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i],
writer_tasks[i]);
if (firsterr)
goto unwind;
create_reader:
if (cxt.cur_ops->readlock == NULL || (j >= cxt.nrealreaders_stress))
continue;
/* Create reader. */
firsterr = torture_create_kthread(lock_torture_reader, &cxt.lrsa[j],
reader_tasks[j]);
if (firsterr)
goto unwind;
}
if (stat_interval > 0) {
firsterr = torture_create_kthread(lock_torture_stats, NULL,
stats_task);
if (firsterr)
goto unwind;
}
torture_init_end();
return 0;
unwind:
torture_init_end();
lock_torture_cleanup();
return firsterr;
}
module_init(lock_torture_init);
module_exit(lock_torture_cleanup);
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/Makefile
obj-y += mutex.o semaphore.o rwsem.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_lockdep_proc.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
endif
obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
obj-$(CONFIG_LOCKDEP) += lockdep.o
ifeq ($(CONFIG_PROC_FS),y)
obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
endif
obj-$(CONFIG_SMP) += spinlock.o
obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
obj-$(CONFIG_SMP) += lglock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/mcs_spinlock.h
/*
* MCS lock defines
*
* This file contains the main data structure and API definitions of MCS lock.
*
* The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
* with the desirable properties of being fair, and with each cpu trying
* to acquire the lock spinning on a local variable.
* It avoids expensive cache bouncings that common test-and-set spin-lock
* implementations incur.
*/
#ifndef __LINUX_MCS_SPINLOCK_H
#define __LINUX_MCS_SPINLOCK_H
#include <asm/mcs_spinlock.h>
struct mcs_spinlock {
struct mcs_spinlock *next;
int locked; /* 1 if lock acquired */
int count; /* nesting count, see qspinlock.c */
};
#ifndef arch_mcs_spin_lock_contended
/*
* Using smp_load_acquire() provides a memory barrier that ensures
* subsequent operations happen after the lock is acquired.
*/
#define arch_mcs_spin_lock_contended(l) \
do { \
while (!(smp_load_acquire(l))) \
cpu_relax_lowlatency(); \
} while (0)
#endif
#ifndef arch_mcs_spin_unlock_contended
/*
* smp_store_release() provides a memory barrier to ensure all
* operations in the critical section has been completed before
* unlocking.
*/
#define arch_mcs_spin_unlock_contended(l) \
smp_store_release((l), 1)
#endif
/*
* Note: the smp_load_acquire/smp_store_release pair is not
* sufficient to form a full memory barrier across
* cpus for many architectures (except x86) for mcs_unlock and mcs_lock.
* For applications that need a full barrier across multiple cpus
* with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be
* used after mcs_lock.
*/
/*
* In order to acquire the lock, the caller should declare a local node and
* pass a reference of the node to this function in addition to the lock.
* If the lock has already been acquired, then this will proceed to spin
* on this node->locked until the previous lock holder sets the node->locked
* in mcs_spin_unlock().
*/
static inline
void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
{
struct mcs_spinlock *prev;
/* Init node */
node->locked = 0;
node->next = NULL;
prev = xchg(lock, node);
if (likely(prev == NULL)) {
/*
* Lock acquired, don't need to set node->locked to 1. Threads
* only spin on its own node->locked value for lock acquisition.
* However, since this thread can immediately acquire the lock
* and does not proceed to spin on its own node->locked, this
* value won't be used. If a debug mode is needed to
* audit lock status, then set node->locked value here.
*/
return;
}
WRITE_ONCE(prev->next, node);
/* Wait until the lock holder passes the lock down. */
arch_mcs_spin_lock_contended(&node->locked);
}
/*
* Releases the lock. The caller should pass in the corresponding node that
* was used to acquire the lock.
*/
static inline
void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
{
struct mcs_spinlock *next = READ_ONCE(node->next);
if (likely(!next)) {
/*
* Release the lock by setting it to NULL
*/
if (likely(cmpxchg(lock, node, NULL) == node))
return;
/* Wait until the next pointer is set */
while (!(next = READ_ONCE(node->next)))
cpu_relax_lowlatency();
}
/* Pass lock to next waiter. */
arch_mcs_spin_unlock_contended(&next->locked);
}
#endif /* __LINUX_MCS_SPINLOCK_H */
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/mutex-debug.c
/*
* kernel/mutex-debug.c
*
* Debugging code for mutexes
*
* Started by Ingo Molnar:
*
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*
* lock debugging, locking tree, deadlock detection started by:
*
* Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
* Released under the General Public License (GPL).
*/
#include <linux/mutex.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/poison.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/kallsyms.h>
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
#include "mutex-debug.h"
/*
* Must be called with lock->wait_lock held.
*/
void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
{
memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
waiter->magic = waiter;
INIT_LIST_HEAD(&waiter->list);
}
void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
{
SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
}
void debug_mutex_free_waiter(struct mutex_waiter *waiter)
{
DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));
memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
}
void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct thread_info *ti)
{
SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
/* Mark the current thread as blocked on the lock: */
ti->task->blocked_on = waiter;
}
void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct thread_info *ti)
{
DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
ti->task->blocked_on = NULL;
list_del_init(&waiter->list);
waiter->task = NULL;
}
void debug_mutex_unlock(struct mutex *lock)
{
if (likely(debug_locks)) {
DEBUG_LOCKS_WARN_ON(lock->magic != lock);
if (!lock->owner)
DEBUG_LOCKS_WARN_ON(!lock->owner);
else
DEBUG_LOCKS_WARN_ON(lock->owner != current);
DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
}
/*
* __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug
* mutexes so that we can do it here after we've verified state.
*/
mutex_clear_owner(lock);
atomic_set(&lock->count, 1);
}
void debug_mutex_init(struct mutex *lock, const char *name,
struct lock_class_key *key)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lockdep_init_map(&lock->dep_map, name, key, 0);
#endif
lock->magic = lock;
}
/***
* mutex_destroy - mark a mutex unusable
* @lock: the mutex to be destroyed
*
* This function marks the mutex uninitialized, and any subsequent
* use of the mutex is forbidden. The mutex must not be locked when
* this function is called.
*/
void mutex_destroy(struct mutex *lock)
{
DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock));
lock->magic = NULL;
}
EXPORT_SYMBOL_GPL(mutex_destroy);
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/mutex-debug.h
/*
* Mutexes: blocking mutual exclusion locks
*
* started by Ingo Molnar:
*
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*
* This file contains mutex debugging related internal declarations,
* prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case.
* More details are in kernel/mutex-debug.c.
*/
/*
* This must be called with lock->wait_lock held.
*/
extern void debug_mutex_lock_common(struct mutex *lock,
struct mutex_waiter *waiter);
extern void debug_mutex_wake_waiter(struct mutex *lock,
struct mutex_waiter *waiter);
extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
extern void debug_mutex_add_waiter(struct mutex *lock,
struct mutex_waiter *waiter,
struct thread_info *ti);
extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct thread_info *ti);
extern void debug_mutex_unlock(struct mutex *lock);
extern void debug_mutex_init(struct mutex *lock, const char *name,
struct lock_class_key *key);
static inline void mutex_set_owner(struct mutex *lock)
{
lock->owner = current;
}
static inline void mutex_clear_owner(struct mutex *lock)
{
lock->owner = NULL;
}
#define spin_lock_mutex(lock, flags) \
do { \
struct mutex *l = container_of(lock, struct mutex, wait_lock); \
\
DEBUG_LOCKS_WARN_ON(in_interrupt()); \
local_irq_save(flags); \
arch_spin_lock(&(lock)->rlock.raw_lock);\
DEBUG_LOCKS_WARN_ON(l->magic != l); \
} while (0)
#define spin_unlock_mutex(lock, flags) \
do { \
arch_spin_unlock(&(lock)->rlock.raw_lock); \
local_irq_restore(flags); \
preempt_check_resched(); \
} while (0)
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/mutex.c
/*
* kernel/locking/mutex.c
*
* Mutexes: blocking mutual exclusion locks
*
* Started by Ingo Molnar:
*
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*
* Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and
* David Howells for suggestions and improvements.
*
* - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline
* from the -rt tree, where it was originally implemented for rtmutexes
* by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
* and Sven Dietrich.
*
* Also see Documentation/locking/mutex-design.txt.
*/
#include <linux/mutex.h>
#include <linux/ww_mutex.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
#include <linux/osq_lock.h>
/*
* In the DEBUG case we are using the "NULL fastpath" for mutexes,
* which forces all calls into the slowpath:
*/
#ifdef CONFIG_DEBUG_MUTEXES
# include "mutex-debug.h"
# include <asm-generic/mutex-null.h>
/*
* Must be 0 for the debug case so we do not do the unlock outside of the
* wait_lock region. debug_mutex_unlock() will do the actual unlock in this
* case.
*/
# undef __mutex_slowpath_needs_to_unlock
# define __mutex_slowpath_needs_to_unlock() 0
#else
# include "mutex.h"
# include <asm/mutex.h>
#endif
void
__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
{
atomic_set(&lock->count, 1);
spin_lock_init(&lock->wait_lock);
INIT_LIST_HEAD(&lock->wait_list);
mutex_clear_owner(lock);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
osq_lock_init(&lock->osq);
#endif
debug_mutex_init(lock, name, key);
}
EXPORT_SYMBOL(__mutex_init);
#ifndef CONFIG_DEBUG_LOCK_ALLOC
/*
* We split the mutex lock/unlock logic into separate fastpath and
* slowpath functions, to reduce the register pressure on the fastpath.
* We also put the fastpath first in the kernel image, to make sure the
* branch is predicted by the CPU as default-untaken.
*/
__visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
/**
* mutex_lock - acquire the mutex
* @lock: the mutex to be acquired
*
* Lock the mutex exclusively for this task. If the mutex is not
* available right now, it will sleep until it can get it.
*
* The mutex must later on be released by the same task that
* acquired it. Recursive locking is not allowed. The task
* may not exit without first unlocking the mutex. Also, kernel
* memory where the mutex resides must not be freed with
* the mutex still locked. The mutex must first be initialized
* (or statically defined) before it can be locked. memset()-ing
* the mutex to 0 is not allowed.
*
* ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging
* checks that will enforce the restrictions and will also do
* deadlock debugging. )
*
* This function is similar to (but not equivalent to) down().
*/
void __sched mutex_lock(struct mutex *lock)
{
might_sleep();
/*
* The locking fastpath is the 1->0 transition from
* 'unlocked' into 'locked' state.
*/
__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
mutex_set_owner(lock);
}
EXPORT_SYMBOL(mutex_lock);
#endif
static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
struct ww_acquire_ctx *ww_ctx)
{
#ifdef CONFIG_DEBUG_MUTEXES
/*
* If this WARN_ON triggers, you used ww_mutex_lock to acquire,
* but released with a normal mutex_unlock in this call.
*
* This should never happen, always use ww_mutex_unlock.
*/
DEBUG_LOCKS_WARN_ON(ww->ctx);
/*
* Not quite done after calling ww_acquire_done() ?
*/
DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
if (ww_ctx->contending_lock) {
/*
* After -EDEADLK you tried to
* acquire a different ww_mutex? Bad!
*/
DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
/*
* You called ww_mutex_lock after receiving -EDEADLK,
* but 'forgot' to unlock everything else first?
*/
DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
ww_ctx->contending_lock = NULL;
}
/*
* Naughty, using a different class will lead to undefined behavior!
*/
DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
#endif
ww_ctx->acquired++;
}
/*
* After acquiring lock with fastpath or when we lost out in contested
* slowpath, set ctx and wake up any waiters so they can recheck.
*
* This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set,
* as the fastpath and opportunistic spinning are disabled in that case.
*/
static __always_inline void
ww_mutex_set_context_fastpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
unsigned long flags;
struct mutex_waiter *cur;
ww_mutex_lock_acquired(lock, ctx);
lock->ctx = ctx;
/*
* The lock->ctx update should be visible on all cores before
* the atomic read is done, otherwise contended waiters might be
* missed. The contended waiters will either see ww_ctx == NULL
* and keep spinning, or it will acquire wait_lock, add itself
* to waiter list and sleep.
*/
smp_mb(); /* ^^^ */
/*
* Check if lock is contended, if not there is nobody to wake up
*/
if (likely(atomic_read(&lock->base.count) == 0))
return;
/*
* Uh oh, we raced in fastpath, wake up everyone in this case,
* so they can see the new lock->ctx.
*/
spin_lock_mutex(&lock->base.wait_lock, flags);
list_for_each_entry(cur, &lock->base.wait_list, list) {
debug_mutex_wake_waiter(&lock->base, cur);
wake_up_process(cur->task);
}
spin_unlock_mutex(&lock->base.wait_lock, flags);
}
/*
* After acquiring lock in the slowpath set ctx and wake up any
* waiters so they can recheck.
*
* Callers must hold the mutex wait_lock.
*/
static __always_inline void
ww_mutex_set_context_slowpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
struct mutex_waiter *cur;
ww_mutex_lock_acquired(lock, ctx);
lock->ctx = ctx;
/*
* Give any possible sleeping processes the chance to wake up,
* so they can recheck if they have to back off.
*/
list_for_each_entry(cur, &lock->base.wait_list, list) {
debug_mutex_wake_waiter(&lock->base, cur);
wake_up_process(cur->task);
}
}
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
* Look out! "owner" is an entirely speculative pointer
* access and not reliable.
*/
static noinline
bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
{
bool ret = true;
rcu_read_lock();
while (lock->owner == owner) {
/*
* Ensure we emit the owner->on_cpu, dereference _after_
* checking lock->owner still matches owner. If that fails,
* owner might point to freed memory. If it still matches,
* the rcu_read_lock() ensures the memory stays valid.
*/
barrier();
if (!owner->on_cpu || need_resched()) {
ret = false;
break;
}
cpu_relax_lowlatency();
}
rcu_read_unlock();
return ret;
}
/*
* Initial check for entering the mutex spinning loop
*/
static inline int mutex_can_spin_on_owner(struct mutex *lock)
{
struct task_struct *owner;
int retval = 1;
if (need_resched())
return 0;
rcu_read_lock();
owner = READ_ONCE(lock->owner);
if (owner)
retval = owner->on_cpu;
rcu_read_unlock();
/*
* if lock->owner is not set, the mutex owner may have just acquired
* it and not set the owner yet or the mutex has been released.
*/
return retval;
}
/*
* Atomically try to take the lock when it is available
*/
static inline bool mutex_try_to_acquire(struct mutex *lock)
{
return !mutex_is_locked(lock) &&
(atomic_cmpxchg(&lock->count, 1, 0) == 1);
}
/*
* Optimistic spinning.
*
* We try to spin for acquisition when we find that the lock owner
* is currently running on a (different) CPU and while we don't
* need to reschedule. The rationale is that if the lock owner is
* running, it is likely to release the lock soon.
*
* Since this needs the lock owner, and this mutex implementation
* doesn't track the owner atomically in the lock field, we need to
* track it non-atomically.
*
* We can't do this for DEBUG_MUTEXES because that relies on wait_lock
* to serialize everything.
*
* The mutex spinners are queued up using MCS lock so that only one
* spinner can compete for the mutex. However, if mutex spinning isn't
* going to happen, there is no point in going through the lock/unlock
* overhead.
*
* Returns true when the lock was taken, otherwise false, indicating
* that we need to jump to the slowpath and sleep.
*/
static bool mutex_optimistic_spin(struct mutex *lock,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
struct task_struct *task = current;
if (!mutex_can_spin_on_owner(lock))
goto done;
/*
* In order to avoid a stampede of mutex spinners trying to
* acquire the mutex all at once, the spinners need to take a
* MCS (queued) lock first before spinning on the owner field.
*/
if (!osq_lock(&lock->osq))
goto done;
while (true) {
struct task_struct *owner;
if (use_ww_ctx && ww_ctx->acquired > 0) {
struct ww_mutex *ww;
ww = container_of(lock, struct ww_mutex, base);
/*
* If ww->ctx is set the contents are undefined, only
* by acquiring wait_lock there is a guarantee that
* they are not invalid when reading.
*
* As such, when deadlock detection needs to be
* performed the optimistic spinning cannot be done.
*/
if (READ_ONCE(ww->ctx))
break;
}
/*
* If there's an owner, wait for it to either
* release the lock or go to sleep.
*/
owner = READ_ONCE(lock->owner);
if (owner && !mutex_spin_on_owner(lock, owner))
break;
/* Try to acquire the mutex if it is unlocked. */
if (mutex_try_to_acquire(lock)) {
lock_acquired(&lock->dep_map, ip);
if (use_ww_ctx) {
struct ww_mutex *ww;
ww = container_of(lock, struct ww_mutex, base);
ww_mutex_set_context_fastpath(ww, ww_ctx);
}
mutex_set_owner(lock);
osq_unlock(&lock->osq);
return true;
}
/*
* When there's no owner, we might have preempted between the
* owner acquiring the lock and setting the owner field. If
* we're an RT task that will live-lock because we won't let
* the owner complete.
*/
if (!owner && (need_resched() || rt_task(task)))
break;
/*
* The cpu_relax() call is a compiler barrier which forces
* everything in this loop to be re-loaded. We don't need
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
cpu_relax_lowlatency();
}
osq_unlock(&lock->osq);
done:
/*
* If we fell out of the spin path because of need_resched(),
* reschedule now, before we try-lock the mutex. This avoids getting
* scheduled out right after we obtained the mutex.
*/
if (need_resched()) {
/*
* We _should_ have TASK_RUNNING here, but just in case
* we do not, make it so, otherwise we might get stuck.
*/
__set_current_state(TASK_RUNNING);
schedule_preempt_disabled();
}
return false;
}
#else
static bool mutex_optimistic_spin(struct mutex *lock,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
return false;
}
#endif
__visible __used noinline
void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
/**
* mutex_unlock - release the mutex
* @lock: the mutex to be released
*
* Unlock a mutex that has been locked by this task previously.
*
* This function must not be used in interrupt context. Unlocking
* of a not locked mutex is not allowed.
*
* This function is similar to (but not equivalent to) up().
*/
void __sched mutex_unlock(struct mutex *lock)
{
/*
* The unlocking fastpath is the 0->1 transition from 'locked'
* into 'unlocked' state:
*/
#ifndef CONFIG_DEBUG_MUTEXES
/*
* When debugging is enabled we must not clear the owner before time,
* the slow path will always be taken, and that clears the owner field
* after verifying that it was indeed current.
*/
mutex_clear_owner(lock);
#endif
__mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
}
EXPORT_SYMBOL(mutex_unlock);
/**
* ww_mutex_unlock - release the w/w mutex
* @lock: the mutex to be released
*
* Unlock a mutex that has been locked by this task previously with any of the
* ww_mutex_lock* functions (with or without an acquire context). It is
* forbidden to release the locks after releasing the acquire context.
*
* This function must not be used in interrupt context. Unlocking
* of a unlocked mutex is not allowed.
*/
void __sched ww_mutex_unlock(struct ww_mutex *lock)
{
/*
* The unlocking fastpath is the 0->1 transition from 'locked'
* into 'unlocked' state:
*/
if (lock->ctx) {
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
#endif
if (lock->ctx->acquired > 0)
lock->ctx->acquired--;
lock->ctx = NULL;
}
#ifndef CONFIG_DEBUG_MUTEXES
/*
* When debugging is enabled we must not clear the owner before time,
* the slow path will always be taken, and that clears the owner field
* after verifying that it was indeed current.
*/
mutex_clear_owner(&lock->base);
#endif
__mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath);
}
EXPORT_SYMBOL(ww_mutex_unlock);
static inline int __sched
__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
{
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
if (!hold_ctx)
return 0;
if (unlikely(ctx == hold_ctx))
return -EALREADY;
if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
(ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
ctx->contending_lock = ww;
#endif
return -EDEADLK;
}
return 0;
}
/*
* Lock a mutex (possibly interruptible), slowpath:
*/
static __always_inline int __sched
__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
struct task_struct *task = current;
struct mutex_waiter waiter;
unsigned long flags;
int ret;
preempt_disable();
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
if (mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
/* got the lock, yay! */
preempt_enable();
return 0;
}
spin_lock_mutex(&lock->wait_lock, flags);
/*
* Once more, try to acquire the lock. Only try-lock the mutex if
* it is unlocked to reduce unnecessary xchg() operations.
*/
if (!mutex_is_locked(lock) && (atomic_xchg(&lock->count, 0) == 1))
goto skip_wait;
debug_mutex_lock_common(lock, &waiter);
debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
/* add waiting tasks to the end of the waitqueue (FIFO): */
list_add_tail(&waiter.list, &lock->wait_list);
waiter.task = task;
lock_contended(&lock->dep_map, ip);
for (;;) {
/*
* Lets try to take the lock again - this is needed even if
* we get here for the first time (shortly after failing to
* acquire the lock), to make sure that we get a wakeup once
* it's unlocked. Later on, if we sleep, this is the
* operation that gives us the lock. We xchg it to -1, so
* that when we release the lock, we properly wake up the
* other waiters. We only attempt the xchg if the count is
* non-negative in order to avoid unnecessary xchg operations:
*/
if (atomic_read(&lock->count) >= 0 &&
(atomic_xchg(&lock->count, -1) == 1))
break;
/*
* got a signal? (This code gets eliminated in the
* TASK_UNINTERRUPTIBLE case.)
*/
if (unlikely(signal_pending_state(state, task))) {
ret = -EINTR;
goto err;
}
if (use_ww_ctx && ww_ctx->acquired > 0) {
ret = __ww_mutex_lock_check_stamp(lock, ww_ctx);
if (ret)
goto err;
}
__set_task_state(task, state);
/* didn't get the lock, go to sleep: */
spin_unlock_mutex(&lock->wait_lock, flags);
schedule_preempt_disabled();
spin_lock_mutex(&lock->wait_lock, flags);
}
__set_task_state(task, TASK_RUNNING);
mutex_remove_waiter(lock, &waiter, current_thread_info());
/* set it to 0 if there are no waiters left: */
if (likely(list_empty(&lock->wait_list)))
atomic_set(&lock->count, 0);
debug_mutex_free_waiter(&waiter);
skip_wait:
/* got the lock - cleanup and rejoice! */
lock_acquired(&lock->dep_map, ip);
mutex_set_owner(lock);
if (use_ww_ctx) {
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
ww_mutex_set_context_slowpath(ww, ww_ctx);
}
spin_unlock_mutex(&lock->wait_lock, flags);
preempt_enable();
return 0;
err:
mutex_remove_waiter(lock, &waiter, task_thread_info(task));
spin_unlock_mutex(&lock->wait_lock, flags);
debug_mutex_free_waiter(&waiter);
mutex_release(&lock->dep_map, 1, ip);
preempt_enable();
return ret;
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __sched
mutex_lock_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
subclass, NULL, _RET_IP_, NULL, 0);
}
EXPORT_SYMBOL_GPL(mutex_lock_nested);
void __sched
_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
{
might_sleep();
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
0, nest, _RET_IP_, NULL, 0);
}
EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
int __sched
mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
return __mutex_lock_common(lock, TASK_KILLABLE,
subclass, NULL, _RET_IP_, NULL, 0);
}
EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
int __sched
mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
subclass, NULL, _RET_IP_, NULL, 0);
}
EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
static inline int
ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
unsigned tmp;
if (ctx->deadlock_inject_countdown-- == 0) {
tmp = ctx->deadlock_inject_interval;
if (tmp > UINT_MAX/4)
tmp = UINT_MAX;
else
tmp = tmp*2 + tmp + tmp/2;
ctx->deadlock_inject_interval = tmp;
ctx->deadlock_inject_countdown = tmp;
ctx->contending_lock = lock;
ww_mutex_unlock(lock);
return -EDEADLK;
}
#endif
return 0;
}
int __sched
__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
might_sleep();
ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE,
0, &ctx->dep_map, _RET_IP_, ctx, 1);
if (!ret && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
return ret;
}
EXPORT_SYMBOL_GPL(__ww_mutex_lock);
int __sched
__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
might_sleep();
ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE,
0, &ctx->dep_map, _RET_IP_, ctx, 1);
if (!ret && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
return ret;
}
EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
#endif
/*
* Release the lock, slowpath:
*/
static inline void
__mutex_unlock_common_slowpath(struct mutex *lock, int nested)
{
unsigned long flags;
/*
* As a performance measurement, release the lock before doing other
* wakeup related duties to follow. This allows other tasks to acquire
* the lock sooner, while still handling cleanups in past unlock calls.
* This can be done as we do not enforce strict equivalence between the
* mutex counter and wait_list.
*
*
* Some architectures leave the lock unlocked in the fastpath failure
* case, others need to leave it locked. In the later case we have to
* unlock it here - as the lock counter is currently 0 or negative.
*/
if (__mutex_slowpath_needs_to_unlock())
atomic_set(&lock->count, 1);
spin_lock_mutex(&lock->wait_lock, flags);
mutex_release(&lock->dep_map, nested, _RET_IP_);
debug_mutex_unlock(lock);
if (!list_empty(&lock->wait_list)) {
/* get the first entry from the wait-list: */
struct mutex_waiter *waiter =
list_entry(lock->wait_list.next,
struct mutex_waiter, list);
debug_mutex_wake_waiter(lock, waiter);
wake_up_process(waiter->task);
}
spin_unlock_mutex(&lock->wait_lock, flags);
}
/*
* Release the lock, slowpath:
*/
__visible void
__mutex_unlock_slowpath(atomic_t *lock_count)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
__mutex_unlock_common_slowpath(lock, 1);
}
#ifndef CONFIG_DEBUG_LOCK_ALLOC
/*
* Here come the less common (and hence less performance-critical) APIs:
* mutex_lock_interruptible() and mutex_trylock().
*/
static noinline int __sched
__mutex_lock_killable_slowpath(struct mutex *lock);
static noinline int __sched
__mutex_lock_interruptible_slowpath(struct mutex *lock);
/**
* mutex_lock_interruptible - acquire the mutex, interruptible
* @lock: the mutex to be acquired
*
* Lock the mutex like mutex_lock(), and return 0 if the mutex has
* been acquired or sleep until the mutex becomes available. If a
* signal arrives while waiting for the lock then this function
* returns -EINTR.
*
* This function is similar to (but not equivalent to) down_interruptible().
*/
int __sched mutex_lock_interruptible(struct mutex *lock)
{
int ret;
might_sleep();
ret = __mutex_fastpath_lock_retval(&lock->count);
if (likely(!ret)) {
mutex_set_owner(lock);
return 0;
} else
return __mutex_lock_interruptible_slowpath(lock);
}
EXPORT_SYMBOL(mutex_lock_interruptible);
int __sched mutex_lock_killable(struct mutex *lock)
{
int ret;
might_sleep();
ret = __mutex_fastpath_lock_retval(&lock->count);
if (likely(!ret)) {
mutex_set_owner(lock);
return 0;
} else
return __mutex_lock_killable_slowpath(lock);
}
EXPORT_SYMBOL(mutex_lock_killable);
__visible void __sched
__mutex_lock_slowpath(atomic_t *lock_count)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0,
NULL, _RET_IP_, NULL, 0);
}
static noinline int __sched
__mutex_lock_killable_slowpath(struct mutex *lock)
{
return __mutex_lock_common(lock, TASK_KILLABLE, 0,
NULL, _RET_IP_, NULL, 0);
}
static noinline int __sched
__mutex_lock_interruptible_slowpath(struct mutex *lock)
{
return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0,
NULL, _RET_IP_, NULL, 0);
}
static noinline int __sched
__ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0,
NULL, _RET_IP_, ctx, 1);
}
static noinline int __sched
__ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0,
NULL, _RET_IP_, ctx, 1);
}
#endif
/*
* Spinlock based trylock, we take the spinlock and check whether we
* can get the lock:
*/
static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
unsigned long flags;
int prev;
/* No need to trylock if the mutex is locked. */
if (mutex_is_locked(lock))
return 0;
spin_lock_mutex(&lock->wait_lock, flags);
prev = atomic_xchg(&lock->count, -1);
if (likely(prev == 1)) {
mutex_set_owner(lock);
mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
}
/* Set it back to 0 if there are no waiters: */
if (likely(list_empty(&lock->wait_list)))
atomic_set(&lock->count, 0);
spin_unlock_mutex(&lock->wait_lock, flags);
return prev == 1;
}
/**
* mutex_trylock - try to acquire the mutex, without waiting
* @lock: the mutex to be acquired
*
* Try to acquire the mutex atomically. Returns 1 if the mutex
* has been acquired successfully, and 0 on contention.
*
* NOTE: this function follows the spin_trylock() convention, so
* it is negated from the down_trylock() return values! Be careful
* about this when converting semaphore users to mutexes.
*
* This function must not be used in interrupt context. The
* mutex must be released by the same task that acquired it.
*/
int __sched mutex_trylock(struct mutex *lock)
{
int ret;
ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
if (ret)
mutex_set_owner(lock);
return ret;
}
EXPORT_SYMBOL(mutex_trylock);
#ifndef CONFIG_DEBUG_LOCK_ALLOC
int __sched
__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
might_sleep();
ret = __mutex_fastpath_lock_retval(&lock->base.count);
if (likely(!ret)) {
ww_mutex_set_context_fastpath(lock, ctx);
mutex_set_owner(&lock->base);
} else
ret = __ww_mutex_lock_slowpath(lock, ctx);
return ret;
}
EXPORT_SYMBOL(__ww_mutex_lock);
int __sched
__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
might_sleep();
ret = __mutex_fastpath_lock_retval(&lock->base.count);
if (likely(!ret)) {
ww_mutex_set_context_fastpath(lock, ctx);
mutex_set_owner(&lock->base);
} else
ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx);
return ret;
}
EXPORT_SYMBOL(__ww_mutex_lock_interruptible);
#endif
/**
* atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
* @cnt: the atomic which we are to dec
* @lock: the mutex to return holding if we dec to 0
*
* return true and hold lock if we dec to 0, return false otherwise
*/
int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
{
/* dec if we can't possibly hit 0 */
if (atomic_add_unless(cnt, -1, 1))
return 0;
/* we might hit 0, so take the lock */
mutex_lock(lock);
if (!atomic_dec_and_test(cnt)) {
/* when we actually did the dec, we didn't hit 0 */
mutex_unlock(lock);
return 0;
}
/* we hit 0, and we hold the lock */
return 1;
}
EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/mutex.h
/*
* Mutexes: blocking mutual exclusion locks
*
* started by Ingo Molnar:
*
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*
* This file contains mutex debugging related internal prototypes, for the
* !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
*/
#define spin_lock_mutex(lock, flags) \
do { spin_lock(lock); (void)(flags); } while (0)
#define spin_unlock_mutex(lock, flags) \
do { spin_unlock(lock); (void)(flags); } while (0)
#define mutex_remove_waiter(lock, waiter, ti) \
__list_del((waiter)->list.prev, (waiter)->list.next)
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
static inline void mutex_set_owner(struct mutex *lock)
{
lock->owner = current;
}
static inline void mutex_clear_owner(struct mutex *lock)
{
lock->owner = NULL;
}
#else
static inline void mutex_set_owner(struct mutex *lock)
{
}
static inline void mutex_clear_owner(struct mutex *lock)
{
}
#endif
#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
#define debug_mutex_free_waiter(waiter) do { } while (0)
#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
#define debug_mutex_unlock(lock) do { } while (0)
#define debug_mutex_init(lock, name, key) do { } while (0)
static inline void
debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
{
}
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/osq_lock.c
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/osq_lock.h>
/*
* An MCS like lock especially tailored for optimistic spinning for sleeping
* lock implementations (mutex, rwsem, etc).
*
* Using a single mcs node per CPU is safe because sleeping locks should not be
* called from interrupt context and we have preemption disabled while
* spinning.
*/
static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
/*
* We use the value 0 to represent "no CPU", thus the encoded value
* will be the CPU number incremented by 1.
*/
static inline int encode_cpu(int cpu_nr)
{
return cpu_nr + 1;
}
static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
{
int cpu_nr = encoded_cpu_val - 1;
return per_cpu_ptr(&osq_node, cpu_nr);
}
/*
* Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
* Can return NULL in case we were the last queued and we updated @lock instead.
*/
static inline struct optimistic_spin_node *
osq_wait_next(struct optimistic_spin_queue *lock,
struct optimistic_spin_node *node,
struct optimistic_spin_node *prev)
{
struct optimistic_spin_node *next = NULL;
int curr = encode_cpu(smp_processor_id());
int old;
/*
* If there is a prev node in queue, then the 'old' value will be
* the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
* we're currently last in queue, then the queue will then become empty.
*/
old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
for (;;) {
if (atomic_read(&lock->tail) == curr &&
atomic_cmpxchg(&lock->tail, curr, old) == curr) {
/*
* We were the last queued, we moved @lock back. @prev
* will now observe @lock and will complete its
* unlock()/unqueue().
*/
break;
}
/*
* We must xchg() the @node->next value, because if we were to
* leave it in, a concurrent unlock()/unqueue() from
* @node->next might complete Step-A and think its @prev is
* still valid.
*
* If the concurrent unlock()/unqueue() wins the race, we'll
* wait for either @lock to point to us, through its Step-B, or
* wait for a new @node->next from its Step-C.
*/
if (node->next) {
next = xchg(&node->next, NULL);
if (next)
break;
}
cpu_relax_lowlatency();
}
return next;
}
bool osq_lock(struct optimistic_spin_queue *lock)
{
struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
struct optimistic_spin_node *prev, *next;
int curr = encode_cpu(smp_processor_id());
int old;
node->locked = 0;
node->next = NULL;
node->cpu = curr;
old = atomic_xchg(&lock->tail, curr);
if (old == OSQ_UNLOCKED_VAL)
return true;
prev = decode_cpu(old);
node->prev = prev;
WRITE_ONCE(prev->next, node);
/*
* Normally @prev is untouchable after the above store; because at that
* moment unlock can proceed and wipe the node element from stack.
*
* However, since our nodes are static per-cpu storage, we're
* guaranteed their existence -- this allows us to apply
* cmpxchg in an attempt to undo our queueing.
*/
while (!READ_ONCE(node->locked)) {
/*
* If we need to reschedule bail... so we can block.
*/
if (need_resched())
goto unqueue;
cpu_relax_lowlatency();
}
return true;
unqueue:
/*
* Step - A -- stabilize @prev
*
* Undo our @prev->next assignment; this will make @prev's
* unlock()/unqueue() wait for a next pointer since @lock points to us
* (or later).
*/
for (;;) {
if (prev->next == node &&
cmpxchg(&prev->next, node, NULL) == node)
break;
/*
* We can only fail the cmpxchg() racing against an unlock(),
* in which case we should observe @node->locked becomming
* true.
*/
if (smp_load_acquire(&node->locked))
return true;
cpu_relax_lowlatency();
/*
* Or we race against a concurrent unqueue()'s step-B, in which
* case its step-C will write us a new @node->prev pointer.
*/
prev = READ_ONCE(node->prev);
}
/*
* Step - B -- stabilize @next
*
* Similar to unlock(), wait for @node->next or move @lock from @node
* back to @prev.
*/
next = osq_wait_next(lock, node, prev);
if (!next)
return false;
/*
* Step - C -- unlink
*
* @prev is stable because its still waiting for a new @prev->next
* pointer, @next is stable because our @node->next pointer is NULL and
* it will wait in Step-A.
*/
WRITE_ONCE(next->prev, prev);
WRITE_ONCE(prev->next, next);
return false;
}
void osq_unlock(struct optimistic_spin_queue *lock)
{
struct optimistic_spin_node *node, *next;
int curr = encode_cpu(smp_processor_id());
/*
* Fast path for the uncontended case.
*/
if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
return;
/*
* Second most likely case.
*/
node = this_cpu_ptr(&osq_node);
next = xchg(&node->next, NULL);
if (next) {
WRITE_ONCE(next->locked, 1);
return;
}
next = osq_wait_next(lock, node, NULL);
if (next)
WRITE_ONCE(next->locked, 1);
}
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/percpu-rwsem.c
#include <linux/atomic.h>
#include <linux/rwsem.h>
#include <linux/percpu.h>
#include <linux/wait.h>
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/errno.h>
int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
const char *name, struct lock_class_key *rwsem_key)
{
brw->fast_read_ctr = alloc_percpu(int);
if (unlikely(!brw->fast_read_ctr))
return -ENOMEM;
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
__init_rwsem(&brw->rw_sem, name, rwsem_key);
atomic_set(&brw->write_ctr, 0);
atomic_set(&brw->slow_read_ctr, 0);
init_waitqueue_head(&brw->write_waitq);
return 0;
}
void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
{
free_percpu(brw->fast_read_ctr);
brw->fast_read_ctr = NULL; /* catch use after free bugs */
}
/*
* This is the fast-path for down_read/up_read, it only needs to ensure
* there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
* fast per-cpu counter. The writer uses synchronize_sched_expedited() to
* serialize with the preempt-disabled section below.
*
* The nontrivial part is that we should guarantee acquire/release semantics
* in case when
*
* R_W: down_write() comes after up_read(), the writer should see all
* changes done by the reader
* or
* W_R: down_read() comes after up_write(), the reader should see all
* changes done by the writer
*
* If this helper fails the callers rely on the normal rw_semaphore and
* atomic_dec_and_test(), so in this case we have the necessary barriers.
*
* But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
* __this_cpu_add() below can be reordered with any LOAD/STORE done by the
* reader inside the critical section. See the comments in down_write and
* up_write below.
*/
static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
{
bool success = false;
preempt_disable();
if (likely(!atomic_read(&brw->write_ctr))) {
__this_cpu_add(*brw->fast_read_ctr, val);
success = true;
}
preempt_enable();
return success;
}
/*
* Like the normal down_read() this is not recursive, the writer can
* come after the first percpu_down_read() and create the deadlock.
*
* Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
* percpu_up_read() does rwsem_release(). This pairs with the usage
* of ->rw_sem in percpu_down/up_write().
*/
void percpu_down_read(struct percpu_rw_semaphore *brw)
{
might_sleep();
if (likely(update_fast_ctr(brw, +1))) {
rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
return;
}
down_read(&brw->rw_sem);
atomic_inc(&brw->slow_read_ctr);
/* avoid up_read()->rwsem_release() */
__up_read(&brw->rw_sem);
}
void percpu_up_read(struct percpu_rw_semaphore *brw)
{
rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
if (likely(update_fast_ctr(brw, -1)))
return;
/* false-positive is possible but harmless */
if (atomic_dec_and_test(&brw->slow_read_ctr))
wake_up_all(&brw->write_waitq);
}
static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
{
unsigned int sum = 0;
int cpu;
for_each_possible_cpu(cpu) {
sum += per_cpu(*brw->fast_read_ctr, cpu);
per_cpu(*brw->fast_read_ctr, cpu) = 0;
}
return sum;
}
/*
* A writer increments ->write_ctr to force the readers to switch to the
* slow mode, note the atomic_read() check in update_fast_ctr().
*
* After that the readers can only inc/dec the slow ->slow_read_ctr counter,
* ->fast_read_ctr is stable. Once the writer moves its sum into the slow
* counter it represents the number of active readers.
*
* Finally the writer takes ->rw_sem for writing and blocks the new readers,
* then waits until the slow counter becomes zero.
*/
void percpu_down_write(struct percpu_rw_semaphore *brw)
{
/* tell update_fast_ctr() there is a pending writer */
atomic_inc(&brw->write_ctr);
/*
* 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
* so that update_fast_ctr() can't succeed.
*
* 2. Ensures we see the result of every previous this_cpu_add() in
* update_fast_ctr().
*
* 3. Ensures that if any reader has exited its critical section via
* fast-path, it executes a full memory barrier before we return.
* See R_W case in the comment above update_fast_ctr().
*/
synchronize_sched_expedited();
/* exclude other writers, and block the new readers completely */
down_write(&brw->rw_sem);
/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
/* wait for all readers to complete their percpu_up_read() */
wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
}
void percpu_up_write(struct percpu_rw_semaphore *brw)
{
/* release the lock, but the readers can't use the fast-path */
up_write(&brw->rw_sem);
/*
* Insert the barrier before the next fast-path in down_read,
* see W_R case in the comment above update_fast_ctr().
*/
synchronize_sched_expedited();
/* the last writer unblocks update_fast_ctr() */
atomic_dec(&brw->write_ctr);
}
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/qrwlock.c
/*
* Queued read/write locks
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
*
* Authors: Waiman Long <waiman.long@hp.com>
*/
#include <linux/smp.h>
#include <linux/bug.h>
#include <linux/cpumask.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <asm/qrwlock.h>
/*
* This internal data structure is used for optimizing access to some of
* the subfields within the atomic_t cnts.
*/
struct __qrwlock {
union {
atomic_t cnts;
struct {
#ifdef __LITTLE_ENDIAN
u8 wmode; /* Writer mode */
u8 rcnts[3]; /* Reader counts */
#else
u8 rcnts[3]; /* Reader counts */
u8 wmode; /* Writer mode */
#endif
};
};
arch_spinlock_t lock;
};
/**
* rspin_until_writer_unlock - inc reader count & spin until writer is gone
* @lock : Pointer to queue rwlock structure
* @writer: Current queue rwlock writer status byte
*
* In interrupt context or at the head of the queue, the reader will just
* increment the reader count & wait until the writer releases the lock.
*/
static __always_inline void
rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
{
while ((cnts & _QW_WMASK) == _QW_LOCKED) {
cpu_relax_lowlatency();
cnts = smp_load_acquire((u32 *)&lock->cnts);
}
}
/**
* queue_read_lock_slowpath - acquire read lock of a queue rwlock
* @lock: Pointer to queue rwlock structure
*/
void queue_read_lock_slowpath(struct qrwlock *lock)
{
u32 cnts;
/*
* Readers come here when they cannot get the lock without waiting
*/
if (unlikely(in_interrupt())) {
/*
* Readers in interrupt context will spin until the lock is
* available without waiting in the queue.
*/
cnts = smp_load_acquire((u32 *)&lock->cnts);
rspin_until_writer_unlock(lock, cnts);
return;
}
atomic_sub(_QR_BIAS, &lock->cnts);
/*
* Put the reader into the wait queue
*/
arch_spin_lock(&lock->lock);
/*
* At the head of the wait queue now, wait until the writer state
* goes to 0 and then try to increment the reader count and get
* the lock. It is possible that an incoming writer may steal the
* lock in the interim, so it is necessary to check the writer byte
* to make sure that the write lock isn't taken.
*/
while (atomic_read(&lock->cnts) & _QW_WMASK)
cpu_relax_lowlatency();
cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
rspin_until_writer_unlock(lock, cnts);
/*
* Signal the next one in queue to become queue head
*/
arch_spin_unlock(&lock->lock);
}
EXPORT_SYMBOL(queue_read_lock_slowpath);
/**
* queue_write_lock_slowpath - acquire write lock of a queue rwlock
* @lock : Pointer to queue rwlock structure
*/
void queue_write_lock_slowpath(struct qrwlock *lock)
{
u32 cnts;
/* Put the writer into the wait queue */
arch_spin_lock(&lock->lock);
/* Try to acquire the lock directly if no reader is present */
if (!atomic_read(&lock->cnts) &&
(atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
goto unlock;
/*
* Set the waiting flag to notify readers that a writer is pending,
* or wait for a previous writer to go away.
*/
for (;;) {
struct __qrwlock *l = (struct __qrwlock *)lock;
if (!READ_ONCE(l->wmode) &&
(cmpxchg(&l->wmode, 0, _QW_WAITING) == 0))
break;
cpu_relax_lowlatency();
}
/* When no more readers, set the locked flag */
for (;;) {
cnts = atomic_read(&lock->cnts);
if ((cnts == _QW_WAITING) &&
(atomic_cmpxchg(&lock->cnts, _QW_WAITING,
_QW_LOCKED) == _QW_WAITING))
break;
cpu_relax_lowlatency();
}
unlock:
arch_spin_unlock(&lock->lock);
}
EXPORT_SYMBOL(queue_write_lock_slowpath);
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/qspinlock.c
/*
* Queued spinlock
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
* (C) Copyright 2013-2014 Red Hat, Inc.
* (C) Copyright 2015 Intel Corp.
*
* Authors: Waiman Long <waiman.long@hp.com>
* Peter Zijlstra <peterz@infradead.org>
*/
#ifndef _GEN_PV_LOCK_SLOWPATH
#include <linux/smp.h>
#include <linux/bug.h>
#include <linux/cpumask.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/mutex.h>
#include <asm/byteorder.h>
#include <asm/qspinlock.h>
/*
* The basic principle of a queue-based spinlock can best be understood
* by studying a classic queue-based spinlock implementation called the
* MCS lock. The paper below provides a good description for this kind
* of lock.
*
* http://www.cise.ufl.edu/tr/DOC/REP-1992-71.pdf
*
* This queued spinlock implementation is based on the MCS lock, however to make
* it fit the 4 bytes we assume spinlock_t to be, and preserve its existing
* API, we must modify it somehow.
*
* In particular; where the traditional MCS lock consists of a tail pointer
* (8 bytes) and needs the next pointer (another 8 bytes) of its own node to
* unlock the next pending (next->locked), we compress both these: {tail,
* next->locked} into a single u32 value.
*
* Since a spinlock disables recursion of its own context and there is a limit
* to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there
* are at most 4 nesting levels, it can be encoded by a 2-bit number. Now
* we can encode the tail by combining the 2-bit nesting level with the cpu
* number. With one byte for the lock value and 3 bytes for the tail, only a
* 32-bit word is now needed. Even though we only need 1 bit for the lock,
* we extend it to a full byte to achieve better performance for architectures
* that support atomic byte write.
*
* We also change the first spinner to spin on the lock bit instead of its
* node; whereby avoiding the need to carry a node from lock to unlock, and
* preserving existing lock API. This also makes the unlock code simpler and
* faster.
*
* N.B. The current implementation only supports architectures that allow
* atomic operations on smaller 8-bit and 16-bit data types.
*
*/
#include "mcs_spinlock.h"
#ifdef CONFIG_PARAVIRT_SPINLOCKS
#define MAX_NODES 8
#else
#define MAX_NODES 4
#endif
/*
* Per-CPU queue node structures; we can never have more than 4 nested
* contexts: task, softirq, hardirq, nmi.
*
* Exactly fits one 64-byte cacheline on a 64-bit architecture.
*
* PV doubles the storage and uses the second cacheline for PV state.
*/
static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
/*
* We must be able to distinguish between no-tail and the tail at 0:0,
* therefore increment the cpu number by one.
*/
static inline u32 encode_tail(int cpu, int idx)
{
u32 tail;
#ifdef CONFIG_DEBUG_SPINLOCK
BUG_ON(idx > 3);
#endif
tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
return tail;
}
static inline struct mcs_spinlock *decode_tail(u32 tail)
{
int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
return per_cpu_ptr(&mcs_nodes[idx], cpu);
}
#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
/*
* By using the whole 2nd least significant byte for the pending bit, we
* can allow better optimization of the lock acquisition for the pending
* bit holder.
*
* This internal structure is also used by the set_locked function which
* is not restricted to _Q_PENDING_BITS == 8.
*/
struct __qspinlock {
union {
atomic_t val;
#ifdef __LITTLE_ENDIAN
struct {
u8 locked;
u8 pending;
};
struct {
u16 locked_pending;
u16 tail;
};
#else
struct {
u16 tail;
u16 locked_pending;
};
struct {
u8 reserved[2];
u8 pending;
u8 locked;
};
#endif
};
};
#if _Q_PENDING_BITS == 8
/**
* clear_pending_set_locked - take ownership and clear the pending bit.
* @lock: Pointer to queued spinlock structure
*
* *,1,0 -> *,0,1
*
* Lock stealing is not allowed if this function is used.
*/
static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
{
struct __qspinlock *l = (void *)lock;
WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
}
/*
* xchg_tail - Put in the new queue tail code word & retrieve previous one
* @lock : Pointer to queued spinlock structure
* @tail : The new queue tail code word
* Return: The previous queue tail code word
*
* xchg(lock, tail)
*
* p,*,* -> n,*,* ; prev = xchg(lock, node)
*/
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{
struct __qspinlock *l = (void *)lock;
return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
}
#else /* _Q_PENDING_BITS == 8 */
/**
* clear_pending_set_locked - take ownership and clear the pending bit.
* @lock: Pointer to queued spinlock structure
*
* *,1,0 -> *,0,1
*/
static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
{
atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
}
/**
* xchg_tail - Put in the new queue tail code word & retrieve previous one
* @lock : Pointer to queued spinlock structure
* @tail : The new queue tail code word
* Return: The previous queue tail code word
*
* xchg(lock, tail)
*
* p,*,* -> n,*,* ; prev = xchg(lock, node)
*/
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{
u32 old, new, val = atomic_read(&lock->val);
for (;;) {
new = (val & _Q_LOCKED_PENDING_MASK) | tail;
old = atomic_cmpxchg(&lock->val, val, new);
if (old == val)
break;
val = old;
}
return old;
}
#endif /* _Q_PENDING_BITS == 8 */
/**
* set_locked - Set the lock bit and own the lock
* @lock: Pointer to queued spinlock structure
*
* *,*,0 -> *,0,1
*/
static __always_inline void set_locked(struct qspinlock *lock)
{
struct __qspinlock *l = (void *)lock;
WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
}
/*
* Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for
* all the PV callbacks.
*/
static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
static __always_inline void __pv_wait_head(struct qspinlock *lock,
struct mcs_spinlock *node) { }
#define pv_enabled() false
#define pv_init_node __pv_init_node
#define pv_wait_node __pv_wait_node
#define pv_kick_node __pv_kick_node
#define pv_wait_head __pv_wait_head
#ifdef CONFIG_PARAVIRT_SPINLOCKS
#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
#endif
#endif /* _GEN_PV_LOCK_SLOWPATH */
/**
* queued_spin_lock_slowpath - acquire the queued spinlock
* @lock: Pointer to queued spinlock structure
* @val: Current value of the queued spinlock 32-bit word
*
* (queue tail, pending bit, lock value)
*
* fast : slow : unlock
* : :
* uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0)
* : | ^--------.------. / :
* : v \ \ | :
* pending : (0,1,1) +--> (0,1,0) \ | :
* : | ^--' | | :
* : v | | :
* uncontended : (n,x,y) +--> (n,0,0) --' | :
* queue : | ^--' | :
* : v | :
* contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' :
* queue : ^--' :
*/
void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
{
struct mcs_spinlock *prev, *next, *node;
u32 new, old, tail;
int idx;
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
if (pv_enabled())
goto queue;
if (virt_spin_lock(lock))
return;
/*
* wait for in-progress pending->locked hand-overs
*
* 0,1,0 -> 0,0,1
*/
if (val == _Q_PENDING_VAL) {
while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
cpu_relax();
}
/*
* trylock || pending
*
* 0,0,0 -> 0,0,1 ; trylock
* 0,0,1 -> 0,1,1 ; pending
*/
for (;;) {
/*
* If we observe any contention; queue.
*/
if (val & ~_Q_LOCKED_MASK)
goto queue;
new = _Q_LOCKED_VAL;
if (val == new)
new |= _Q_PENDING_VAL;
old = atomic_cmpxchg(&lock->val, val, new);
if (old == val)
break;
val = old;
}
/*
* we won the trylock
*/
if (new == _Q_LOCKED_VAL)
return;
/*
* we're pending, wait for the owner to go away.
*
* *,1,1 -> *,1,0
*
* this wait loop must be a load-acquire such that we match the
* store-release that clears the locked bit and create lock
* sequentiality; this is because not all clear_pending_set_locked()
* implementations imply full barriers.
*/
while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
cpu_relax();
/*
* take ownership and clear the pending bit.
*
* *,1,0 -> *,0,1
*/
clear_pending_set_locked(lock);
return;
/*
* End of pending bit optimistic spinning and beginning of MCS
* queuing.
*/
queue:
node = this_cpu_ptr(&mcs_nodes[0]);
idx = node->count++;
tail = encode_tail(smp_processor_id(), idx);
node += idx;
node->locked = 0;
node->next = NULL;
pv_init_node(node);
/*
* We touched a (possibly) cold cacheline in the per-cpu queue node;
* attempt the trylock once more in the hope someone let go while we
* weren't watching.
*/
if (queued_spin_trylock(lock))
goto release;
/*
* We have already touched the queueing cacheline; don't bother with
* pending stuff.
*
* p,*,* -> n,*,*
*/
old = xchg_tail(lock, tail);
/*
* if there was a previous node; link it and wait until reaching the
* head of the waitqueue.
*/
if (old & _Q_TAIL_MASK) {
prev = decode_tail(old);
WRITE_ONCE(prev->next, node);
pv_wait_node(node);
arch_mcs_spin_lock_contended(&node->locked);
}
/*
* we're at the head of the waitqueue, wait for the owner & pending to
* go away.
*
* *,x,y -> *,0,0
*
* this wait loop must use a load-acquire such that we match the
* store-release that clears the locked bit and create lock
* sequentiality; this is because the set_locked() function below
* does not imply a full barrier.
*
*/
pv_wait_head(lock, node);
while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
cpu_relax();
/*
* claim the lock:
*
* n,0,0 -> 0,0,1 : lock, uncontended
* *,0,0 -> *,0,1 : lock, contended
*
* If the queue head is the only one in the queue (lock value == tail),
* clear the tail code and grab the lock. Otherwise, we only need
* to grab the lock.
*/
for (;;) {
if (val != tail) {
set_locked(lock);
break;
}
old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
if (old == val)
goto release; /* No contention */
val = old;
}
/*
* contended path; wait for next, release.
*/
while (!(next = READ_ONCE(node->next)))
cpu_relax();
arch_mcs_spin_unlock_contended(&next->locked);
pv_kick_node(next);
release:
/*
* release the node
*/
this_cpu_dec(mcs_nodes[0].count);
}
EXPORT_SYMBOL(queued_spin_lock_slowpath);
/*
* Generate the paravirt code for queued_spin_unlock_slowpath().
*/
#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS)
#define _GEN_PV_LOCK_SLOWPATH
#undef pv_enabled
#define pv_enabled() true
#undef pv_init_node
#undef pv_wait_node
#undef pv_kick_node
#undef pv_wait_head
#undef queued_spin_lock_slowpath
#define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath
#include "qspinlock_paravirt.h"
#include "qspinlock.c"
#endif
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/qspinlock_paravirt.h
#ifndef _GEN_PV_LOCK_SLOWPATH
#error "do not include this file"
#endif
#include <linux/hash.h>
#include <linux/bootmem.h>
#include <linux/debug_locks.h>
/*
* Implement paravirt qspinlocks; the general idea is to halt the vcpus instead
* of spinning them.
*
* This relies on the architecture to provide two paravirt hypercalls:
*
* pv_wait(u8 *ptr, u8 val) -- suspends the vcpu if *ptr == val
* pv_kick(cpu) -- wakes a suspended vcpu
*
* Using these we implement __pv_queued_spin_lock_slowpath() and
* __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath() and
* native_queued_spin_unlock().
*/
#define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET)
enum vcpu_state {
vcpu_running = 0,
vcpu_halted,
};
struct pv_node {
struct mcs_spinlock mcs;
struct mcs_spinlock __res[3];
int cpu;
u8 state;
};
/*
* Lock and MCS node addresses hash table for fast lookup
*
* Hashing is done on a per-cacheline basis to minimize the need to access
* more than one cacheline.
*
* Dynamically allocate a hash table big enough to hold at least 4X the
* number of possible cpus in the system. Allocation is done on page
* granularity. So the minimum number of hash buckets should be at least
* 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page.
*
* Since we should not be holding locks from NMI context (very rare indeed) the
* max load factor is 0.75, which is around the point where open addressing
* breaks down.
*
*/
struct pv_hash_entry {
struct qspinlock *lock;
struct pv_node *node;
};
#define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry))
#define PV_HE_MIN (PAGE_SIZE / sizeof(struct pv_hash_entry))
static struct pv_hash_entry *pv_lock_hash;
static unsigned int pv_lock_hash_bits __read_mostly;
/*
* Allocate memory for the PV qspinlock hash buckets
*
* This function should be called from the paravirt spinlock initialization
* routine.
*/
void __init __pv_init_lock_hash(void)
{
int pv_hash_size = ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE);
if (pv_hash_size < PV_HE_MIN)
pv_hash_size = PV_HE_MIN;
/*
* Allocate space from bootmem which should be page-size aligned
* and hence cacheline aligned.
*/
pv_lock_hash = alloc_large_system_hash("PV qspinlock",
sizeof(struct pv_hash_entry),
pv_hash_size, 0, HASH_EARLY,
&pv_lock_hash_bits, NULL,
pv_hash_size, pv_hash_size);
}
#define for_each_hash_entry(he, offset, hash) \
for (hash &= ~(PV_HE_PER_LINE - 1), he = &pv_lock_hash[hash], offset = 0; \
offset < (1 << pv_lock_hash_bits); \
offset++, he = &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash_bits) - 1)])
static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
{
unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
struct pv_hash_entry *he;
for_each_hash_entry(he, offset, hash) {
if (!cmpxchg(&he->lock, NULL, lock)) {
WRITE_ONCE(he->node, node);
return &he->lock;
}
}
/*
* Hard assume there is a free entry for us.
*
* This is guaranteed by ensuring every blocked lock only ever consumes
* a single entry, and since we only have 4 nesting levels per CPU
* and allocated 4*nr_possible_cpus(), this must be so.
*
* The single entry is guaranteed by having the lock owner unhash
* before it releases.
*/
BUG();
}
static struct pv_node *pv_unhash(struct qspinlock *lock)
{
unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
struct pv_hash_entry *he;
struct pv_node *node;
for_each_hash_entry(he, offset, hash) {
if (READ_ONCE(he->lock) == lock) {
node = READ_ONCE(he->node);
WRITE_ONCE(he->lock, NULL);
return node;
}
}
/*
* Hard assume we'll find an entry.
*
* This guarantees a limited lookup time and is itself guaranteed by
* having the lock owner do the unhash -- IFF the unlock sees the
* SLOW flag, there MUST be a hash entry.
*/
BUG();
}
/*
* Initialize the PV part of the mcs_spinlock node.
*/
static void pv_init_node(struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock));
pn->cpu = smp_processor_id();
pn->state = vcpu_running;
}
/*
* Wait for node->locked to become true, halt the vcpu after a short spin.
* pv_kick_node() is used to wake the vcpu again.
*/
static void pv_wait_node(struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
int loop;
for (;;) {
for (loop = SPIN_THRESHOLD; loop; loop--) {
if (READ_ONCE(node->locked))
return;
cpu_relax();
}
/*
* Order pn->state vs pn->locked thusly:
*
* [S] pn->state = vcpu_halted [S] next->locked = 1
* MB MB
* [L] pn->locked [RmW] pn->state = vcpu_running
*
* Matches the xchg() from pv_kick_node().
*/
smp_store_mb(pn->state, vcpu_halted);
if (!READ_ONCE(node->locked))
pv_wait(&pn->state, vcpu_halted);
/*
* Reset the vCPU state to avoid unncessary CPU kicking
*/
WRITE_ONCE(pn->state, vcpu_running);
/*
* If the locked flag is still not set after wakeup, it is a
* spurious wakeup and the vCPU should wait again. However,
* there is a pretty high overhead for CPU halting and kicking.
* So it is better to spin for a while in the hope that the
* MCS lock will be released soon.
*/
}
/*
* By now our node->locked should be 1 and our caller will not actually
* spin-wait for it. We do however rely on our caller to do a
* load-acquire for us.
*/
}
/*
* Called after setting next->locked = 1, used to wake those stuck in
* pv_wait_node().
*/
static void pv_kick_node(struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
/*
* Note that because node->locked is already set, this actual
* mcs_spinlock entry could be re-used already.
*
* This should be fine however, kicking people for no reason is
* harmless.
*
* See the comment in pv_wait_node().
*/
if (xchg(&pn->state, vcpu_running) == vcpu_halted)
pv_kick(pn->cpu);
}
/*
* Wait for l->locked to become clear; halt the vcpu after a short spin.
* __pv_queued_spin_unlock() will wake us.
*/
static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
struct __qspinlock *l = (void *)lock;
struct qspinlock **lp = NULL;
int loop;
for (;;) {
for (loop = SPIN_THRESHOLD; loop; loop--) {
if (!READ_ONCE(l->locked))
return;
cpu_relax();
}
WRITE_ONCE(pn->state, vcpu_halted);
if (!lp) { /* ONCE */
lp = pv_hash(lock, pn);
/*
* lp must be set before setting _Q_SLOW_VAL
*
* [S] lp = lock [RmW] l = l->locked = 0
* MB MB
* [S] l->locked = _Q_SLOW_VAL [L] lp
*
* Matches the cmpxchg() in __pv_queued_spin_unlock().
*/
if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) {
/*
* The lock is free and _Q_SLOW_VAL has never
* been set. Therefore we need to unhash before
* getting the lock.
*/
WRITE_ONCE(*lp, NULL);
return;
}
}
pv_wait(&l->locked, _Q_SLOW_VAL);
/*
* The unlocker should have freed the lock before kicking the
* CPU. So if the lock is still not free, it is a spurious
* wakeup and so the vCPU should wait again after spinning for
* a while.
*/
}
/*
* Lock is unlocked now; the caller will acquire it without waiting.
* As with pv_wait_node() we rely on the caller to do a load-acquire
* for us.
*/
}
/*
* PV version of the unlock function to be used in stead of
* queued_spin_unlock().
*/
__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
{
struct __qspinlock *l = (void *)lock;
struct pv_node *node;
u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
/*
* We must not unlock if SLOW, because in that case we must first
* unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD.
*/
if (likely(lockval == _Q_LOCKED_VAL))
return;
if (unlikely(lockval != _Q_SLOW_VAL)) {
if (debug_locks_silent)
return;
WARN(1, "pvqspinlock: lock %p has corrupted value 0x%x!\n", lock, atomic_read(&lock->val));
return;
}
/*
* Since the above failed to release, this must be the SLOW path.
* Therefore start by looking up the blocked node and unhashing it.
*/
node = pv_unhash(lock);
/*
* Now that we have a reference to the (likely) blocked pv_node,
* release the lock.
*/
smp_store_release(&l->locked, 0);
/*
* At this point the memory pointed at by lock can be freed/reused,
* however we can still use the pv_node to kick the CPU.
*/
if (READ_ONCE(node->state) == vcpu_halted)
pv_kick(node->cpu);
}
/*
* Include the architecture specific callee-save thunk of the
* __pv_queued_spin_unlock(). This thunk is put together with
* __pv_queued_spin_unlock() near the top of the file to make sure
* that the callee-save thunk and the real unlock function are close
* to each other sharing consecutive instruction cachelines.
*/
#include <asm/qspinlock_paravirt.h>
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/rtmutex-debug.c
/*
* RT-Mutexes: blocking mutual exclusion locks with PI support
*
* started by Ingo Molnar and Thomas Gleixner:
*
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* This code is based on the rt.c implementation in the preempt-rt tree.
* Portions of said code are
*
* Copyright (C) 2004 LynuxWorks, Inc., Igor Manyilov, Bill Huey
* Copyright (C) 2006 Esben Nielsen
* Copyright (C) 2006 Kihon Technologies Inc.,
* Steven Rostedt <rostedt@goodmis.org>
*
* See rt.c in preempt-rt for proper credits and further information
*/
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/kallsyms.h>
#include <linux/syscalls.h>
#include <linux/interrupt.h>
#include <linux/rbtree.h>
#include <linux/fs.h>
#include <linux/debug_locks.h>
#include "rtmutex_common.h"
static void printk_task(struct task_struct *p)
{
if (p)
printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio);
else
printk("<none>");
}
static void printk_lock(struct rt_mutex *lock, int print_owner)
{
if (lock->name)
printk(" [%p] {%s}\n",
lock, lock->name);
else
printk(" [%p] {%s:%d}\n",
lock, lock->file, lock->line);
if (print_owner && rt_mutex_owner(lock)) {
printk(".. ->owner: %p\n", lock->owner);
printk(".. held by: ");
printk_task(rt_mutex_owner(lock));
printk("\n");
}
}
void rt_mutex_debug_task_free(struct task_struct *task)
{
DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters));
DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
}
/*
* We fill out the fields in the waiter to store the information about
* the deadlock. We print when we return. act_waiter can be NULL in
* case of a remove waiter operation.
*/
void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk,
struct rt_mutex_waiter *act_waiter,
struct rt_mutex *lock)
{
struct task_struct *task;
if (!debug_locks || chwalk == RT_MUTEX_FULL_CHAINWALK || !act_waiter)
return;
task = rt_mutex_owner(act_waiter->lock);
if (task && task != current) {
act_waiter->deadlock_task_pid = get_pid(task_pid(task));
act_waiter->deadlock_lock = lock;
}
}
void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
{
struct task_struct *task;
if (!waiter->deadlock_lock || !debug_locks)
return;
rcu_read_lock();
task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID);
if (!task) {
rcu_read_unlock();
return;
}
if (!debug_locks_off()) {
rcu_read_unlock();
return;
}
printk("\n============================================\n");
printk( "[ BUG: circular locking deadlock detected! ]\n");
printk("%s\n", print_tainted());
printk( "--------------------------------------------\n");
printk("%s/%d is deadlocking current task %s/%d\n\n",
task->comm, task_pid_nr(task),
current->comm, task_pid_nr(current));
printk("\n1) %s/%d is trying to acquire this lock:\n",
current->comm, task_pid_nr(current));
printk_lock(waiter->lock, 1);
printk("\n2) %s/%d is blocked on this lock:\n",
task->comm, task_pid_nr(task));
printk_lock(waiter->deadlock_lock, 1);
debug_show_held_locks(current);
debug_show_held_locks(task);
printk("\n%s/%d's [blocked] stackdump:\n\n",
task->comm, task_pid_nr(task));
show_stack(task, NULL);
printk("\n%s/%d's [current] stackdump:\n\n",
current->comm, task_pid_nr(current));
dump_stack();
debug_show_all_locks();
rcu_read_unlock();
printk("[ turning off deadlock detection."
"Please report this trace. ]\n\n");
}
void debug_rt_mutex_lock(struct rt_mutex *lock)
{
}
void debug_rt_mutex_unlock(struct rt_mutex *lock)
{
DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current);
}
void
debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)
{
}
void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
{
DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock));
}
void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
{
memset(waiter, 0x11, sizeof(*waiter));
waiter->deadlock_task_pid = NULL;
}
void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
{
put_pid(waiter->deadlock_task_pid);
memset(waiter, 0x22, sizeof(*waiter));
}
void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
{
/*
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lock->name = name;
}
void
rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
{
}
void rt_mutex_deadlock_account_unlock(struct task_struct *task)
{
}
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/rtmutex-debug.h
/*
* RT-Mutexes: blocking mutual exclusion locks with PI support
*
* started by Ingo Molnar and Thomas Gleixner:
*
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* This file contains macros used solely by rtmutex.c. Debug version.
*/
extern void
rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
extern void debug_rt_mutex_lock(struct rt_mutex *lock);
extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
struct task_struct *powner);
extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
extern void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk,
struct rt_mutex_waiter *waiter,
struct rt_mutex *lock);
extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter);
# define debug_rt_mutex_reset_waiter(w) \
do { (w)->deadlock_lock = NULL; } while (0)
static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter,
enum rtmutex_chainwalk walk)
{
return (waiter != NULL);
}
static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
{
debug_rt_mutex_print_deadlock(w);
}
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/rtmutex-tester.c
/*
* RT-Mutex-tester: scriptable tester for rt mutexes
*
* started by Thomas Gleixner:
*
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
*/
#include <linux/device.h>
#include <linux/kthread.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/spinlock.h>
#include <linux/timer.h>
#include <linux/freezer.h>
#include <linux/stat.h>
#include "rtmutex.h"
#define MAX_RT_TEST_THREADS 8
#define MAX_RT_TEST_MUTEXES 8
static spinlock_t rttest_lock;
static atomic_t rttest_event;
struct test_thread_data {
int opcode;
int opdata;
int mutexes[MAX_RT_TEST_MUTEXES];
int event;
struct device dev;
};
static struct test_thread_data thread_data[MAX_RT_TEST_THREADS];
static struct task_struct *threads[MAX_RT_TEST_THREADS];
static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];
enum test_opcodes {
RTTEST_NOP = 0,
RTTEST_SCHEDOT, /* 1 Sched other, data = nice */
RTTEST_SCHEDRT, /* 2 Sched fifo, data = prio */
RTTEST_LOCK, /* 3 Lock uninterruptible, data = lockindex */
RTTEST_LOCKNOWAIT, /* 4 Lock uninterruptible no wait in wakeup, data = lockindex */
RTTEST_LOCKINT, /* 5 Lock interruptible, data = lockindex */
RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */
RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */
RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */
/* 9, 10 - reserved for BKL commemoration */
RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */
RTTEST_RESETEVENT = 98, /* 98 Reset event counter */
RTTEST_RESET = 99, /* 99 Reset all pending operations */
};
static int handle_op(struct test_thread_data *td, int lockwakeup)
{
int i, id, ret = -EINVAL;
switch(td->opcode) {
case RTTEST_NOP:
return 0;
case RTTEST_LOCKCONT:
td->mutexes[td->opdata] = 1;
td->event = atomic_add_return(1, &rttest_event);
return 0;
case RTTEST_RESET:
for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) {
if (td->mutexes[i] == 4) {
rt_mutex_unlock(&mutexes[i]);
td->mutexes[i] = 0;
}
}
return 0;
case RTTEST_RESETEVENT:
atomic_set(&rttest_event, 0);
return 0;
default:
if (lockwakeup)
return ret;
}
switch(td->opcode) {
case RTTEST_LOCK:
case RTTEST_LOCKNOWAIT:
id = td->opdata;
if (id < 0 || id >= MAX_RT_TEST_MUTEXES)
return ret;
td->mutexes[id] = 1;
td->event = atomic_add_return(1, &rttest_event);
rt_mutex_lock(&mutexes[id]);
td->event = atomic_add_return(1, &rttest_event);
td->mutexes[id] = 4;
return 0;
case RTTEST_LOCKINT:
case RTTEST_LOCKINTNOWAIT:
id = td->opdata;
if (id < 0 || id >= MAX_RT_TEST_MUTEXES)
return ret;
td->mutexes[id] = 1;
td->event = atomic_add_return(1, &rttest_event);
ret = rt_mutex_lock_interruptible(&mutexes[id], 0);
td->event = atomic_add_return(1, &rttest_event);
td->mutexes[id] = ret ? 0 : 4;
return ret ? -EINTR : 0;
case RTTEST_UNLOCK:
id = td->opdata;
if (id < 0 || id >= MAX_RT_TEST_MUTEXES || td->mutexes[id] != 4)
return ret;
td->event = atomic_add_return(1, &rttest_event);
rt_mutex_unlock(&mutexes[id]);
td->event = atomic_add_return(1, &rttest_event);
td->mutexes[id] = 0;
return 0;
default:
break;
}
return ret;
}
/*
* Schedule replacement for rtsem_down(). Only called for threads with
* PF_MUTEX_TESTER set.
*
* This allows us to have finegrained control over the event flow.
*
*/
void schedule_rt_mutex_test(struct rt_mutex *mutex)
{
int tid, op, dat;
struct test_thread_data *td;
/* We have to lookup the task */
for (tid = 0; tid < MAX_RT_TEST_THREADS; tid++) {
if (threads[tid] == current)
break;
}
BUG_ON(tid == MAX_RT_TEST_THREADS);
td = &thread_data[tid];
op = td->opcode;
dat = td->opdata;
switch (op) {
case RTTEST_LOCK:
case RTTEST_LOCKINT:
case RTTEST_LOCKNOWAIT:
case RTTEST_LOCKINTNOWAIT:
if (mutex != &mutexes[dat])
break;
if (td->mutexes[dat] != 1)
break;
td->mutexes[dat] = 2;
td->event = atomic_add_return(1, &rttest_event);
break;
default:
break;
}
schedule();
switch (op) {
case RTTEST_LOCK:
case RTTEST_LOCKINT:
if (mutex != &mutexes[dat])
return;
if (td->mutexes[dat] != 2)
return;
td->mutexes[dat] = 3;
td->event = atomic_add_return(1, &rttest_event);
break;
case RTTEST_LOCKNOWAIT:
case RTTEST_LOCKINTNOWAIT:
if (mutex != &mutexes[dat])
return;
if (td->mutexes[dat] != 2)
return;
td->mutexes[dat] = 1;
td->event = atomic_add_return(1, &rttest_event);
return;
default:
return;
}
td->opcode = 0;
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (td->opcode > 0) {
int ret;
set_current_state(TASK_RUNNING);
ret = handle_op(td, 1);
set_current_state(TASK_INTERRUPTIBLE);
if (td->opcode == RTTEST_LOCKCONT)
break;
td->opcode = ret;
}
/* Wait for the next command to be executed */
schedule();
}
/* Restore previous command and data */
td->opcode = op;
td->opdata = dat;
}
static int test_func(void *data)
{
struct test_thread_data *td = data;
int ret;
current->flags |= PF_MUTEX_TESTER;
set_freezable();
allow_signal(SIGHUP);
for(;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (td->opcode > 0) {
set_current_state(TASK_RUNNING);
ret = handle_op(td, 0);
set_current_state(TASK_INTERRUPTIBLE);
td->opcode = ret;
}
/* Wait for the next command to be executed */
schedule();
try_to_freeze();
if (signal_pending(current))
flush_signals(current);
if(kthread_should_stop())
break;
}
return 0;
}
/**
* sysfs_test_command - interface for test commands
* @dev: thread reference
* @buf: command for actual step
* @count: length of buffer
*
* command syntax:
*
* opcode:data
*/
static ssize_t sysfs_test_command(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct sched_param schedpar;
struct test_thread_data *td;
char cmdbuf[32];
int op, dat, tid, ret;
td = container_of(dev, struct test_thread_data, dev);
tid = td->dev.id;
/* strings from sysfs write are not 0 terminated! */
if (count >= sizeof(cmdbuf))
return -EINVAL;
/* strip of \n: */
if (buf[count-1] == '\n')
count--;
if (count < 1)
return -EINVAL;
memcpy(cmdbuf, buf, count);
cmdbuf[count] = 0;
if (sscanf(cmdbuf, "%d:%d", &op, &dat) != 2)
return -EINVAL;
switch (op) {
case RTTEST_SCHEDOT:
schedpar.sched_priority = 0;
ret = sched_setscheduler(threads[tid], SCHED_NORMAL, &schedpar);
if (ret)
return ret;
set_user_nice(current, 0);
break;
case RTTEST_SCHEDRT:
schedpar.sched_priority = dat;
ret = sched_setscheduler(threads[tid], SCHED_FIFO, &schedpar);
if (ret)
return ret;
break;
case RTTEST_SIGNAL:
send_sig(SIGHUP, threads[tid], 0);
break;
default:
if (td->opcode > 0)
return -EBUSY;
td->opdata = dat;
td->opcode = op;
wake_up_process(threads[tid]);
}
return count;
}
/**
* sysfs_test_status - sysfs interface for rt tester
* @dev: thread to query
* @buf: char buffer to be filled with thread status info
*/
static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct test_thread_data *td;
struct task_struct *tsk;
char *curr = buf;
int i;
td = container_of(dev, struct test_thread_data, dev);
tsk = threads[td->dev.id];
spin_lock(&rttest_lock);
curr += sprintf(curr,
"O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:",
td->opcode, td->event, tsk->state,
(MAX_RT_PRIO - 1) - tsk->prio,
(MAX_RT_PRIO - 1) - tsk->normal_prio,
tsk->pi_blocked_on);
for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
curr += sprintf(curr, "%d", td->mutexes[i]);
spin_unlock(&rttest_lock);
curr += sprintf(curr, ", T: %p, R: %p\n", tsk,
mutexes[td->dev.id].owner);
return curr - buf;
}
static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL);
static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command);
static struct bus_type rttest_subsys = {
.name = "rttest",
.dev_name = "rttest",
};
static int init_test_thread(int id)
{
thread_data[id].dev.bus = &rttest_subsys;
thread_data[id].dev.id = id;
threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id);
if (IS_ERR(threads[id]))
return PTR_ERR(threads[id]);
return device_register(&thread_data[id].dev);
}
static int init_rttest(void)
{
int ret, i;
spin_lock_init(&rttest_lock);
for (i = 0; i < MAX_RT_TEST_MUTEXES; i++)
rt_mutex_init(&mutexes[i]);
ret = subsys_system_register(&rttest_subsys, NULL);
if (ret)
return ret;
for (i = 0; i < MAX_RT_TEST_THREADS; i++) {
ret = init_test_thread(i);
if (ret)
break;
ret = device_create_file(&thread_data[i].dev, &dev_attr_status);
if (ret)
break;
ret = device_create_file(&thread_data[i].dev, &dev_attr_command);
if (ret)
break;
}
printk("Initializing RT-Tester: %s\n", ret ? "Failed" : "OK" );
return ret;
}
device_initcall(init_rttest);
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/rtmutex.c
/*
* RT-Mutexes: simple blocking mutual exclusion locks with PI support
*
* started by Ingo Molnar and Thomas Gleixner.
*
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
* Copyright (C) 2006 Esben Nielsen
*
* See Documentation/locking/rt-mutex-design.txt for details.
*/
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/sched/deadline.h>
#include <linux/timer.h>
#include "rtmutex_common.h"
/*
* lock->owner state tracking:
*
* lock->owner holds the task_struct pointer of the owner. Bit 0
* is used to keep track of the "lock has waiters" state.
*
* owner bit0
* NULL 0 lock is free (fast acquire possible)
* NULL 1 lock is free and has waiters and the top waiter
* is going to take the lock*
* taskpointer 0 lock is held (fast release possible)
* taskpointer 1 lock is held and has waiters**
*
* The fast atomic compare exchange based acquire and release is only
* possible when bit 0 of lock->owner is 0.
*
* (*) It also can be a transitional state when grabbing the lock
* with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
* we need to set the bit0 before looking at the lock, and the owner may be
* NULL in this small time, hence this can be a transitional state.
*
* (**) There is a small time when bit 0 is set but there are no
* waiters. This can happen when grabbing the lock in the slow path.
* To prevent a cmpxchg of the owner releasing the lock, we need to
* set this bit before looking at the lock.
*/
static void
rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
{
unsigned long val = (unsigned long)owner;
if (rt_mutex_has_waiters(lock))
val |= RT_MUTEX_HAS_WAITERS;
lock->owner = (struct task_struct *)val;
}
static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
{
lock->owner = (struct task_struct *)
((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
}
static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
{
if (!rt_mutex_has_waiters(lock))
clear_rt_mutex_waiters(lock);
}
/*
* We can speed up the acquire/release, if there's no debugging state to be
* set up.
*/
#ifndef CONFIG_DEBUG_RT_MUTEXES
# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{
unsigned long owner, *p = (unsigned long *) &lock->owner;
do {
owner = *p;
} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
}
/*
* Safe fastpath aware unlock:
* 1) Clear the waiters bit
* 2) Drop lock->wait_lock
* 3) Try to unlock the lock with cmpxchg
*/
static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
__releases(lock->wait_lock)
{
struct task_struct *owner = rt_mutex_owner(lock);
clear_rt_mutex_waiters(lock);
raw_spin_unlock(&lock->wait_lock);
/*
* If a new waiter comes in between the unlock and the cmpxchg
* we have two situations:
*
* unlock(wait_lock);
* lock(wait_lock);
* cmpxchg(p, owner, 0) == owner
* mark_rt_mutex_waiters(lock);
* acquire(lock);
* or:
*
* unlock(wait_lock);
* lock(wait_lock);
* mark_rt_mutex_waiters(lock);
*
* cmpxchg(p, owner, 0) != owner
* enqueue_waiter();
* unlock(wait_lock);
* lock(wait_lock);
* wake waiter();
* unlock(wait_lock);
* lock(wait_lock);
* acquire(lock);
*/
return rt_mutex_cmpxchg(lock, owner, NULL);
}
#else
# define rt_mutex_cmpxchg(l,c,n) (0)
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{
lock->owner = (struct task_struct *)
((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
}
/*
* Simple slow path only version: lock->owner is protected by lock->wait_lock.
*/
static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
__releases(lock->wait_lock)
{
lock->owner = NULL;
raw_spin_unlock(&lock->wait_lock);
return true;
}
#endif
static inline int
rt_mutex_waiter_less(struct rt_mutex_waiter *left,
struct rt_mutex_waiter *right)
{
if (left->prio < right->prio)
return 1;
/*
* If both waiters have dl_prio(), we check the deadlines of the
* associated tasks.
* If left waiter has a dl_prio(), and we didn't return 1 above,
* then right waiter has a dl_prio() too.
*/
if (dl_prio(left->prio))
return (left->task->dl.deadline < right->task->dl.deadline);
return 0;
}
static void
rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
struct rb_node **link = &lock->waiters.rb_node;
struct rb_node *parent = NULL;
struct rt_mutex_waiter *entry;
int leftmost = 1;
while (*link) {
parent = *link;
entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
if (rt_mutex_waiter_less(waiter, entry)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
leftmost = 0;
}
}
if (leftmost)
lock->waiters_leftmost = &waiter->tree_entry;
rb_link_node(&waiter->tree_entry, parent, link);
rb_insert_color(&waiter->tree_entry, &lock->waiters);
}
static void
rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
if (RB_EMPTY_NODE(&waiter->tree_entry))
return;
if (lock->waiters_leftmost == &waiter->tree_entry)
lock->waiters_leftmost = rb_next(&waiter->tree_entry);
rb_erase(&waiter->tree_entry, &lock->waiters);
RB_CLEAR_NODE(&waiter->tree_entry);
}
static void
rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
struct rb_node **link = &task->pi_waiters.rb_node;
struct rb_node *parent = NULL;
struct rt_mutex_waiter *entry;
int leftmost = 1;
while (*link) {
parent = *link;
entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
if (rt_mutex_waiter_less(waiter, entry)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
leftmost = 0;
}
}
if (leftmost)
task->pi_waiters_leftmost = &waiter->pi_tree_entry;
rb_link_node(&waiter->pi_tree_entry, parent, link);
rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
}
static void
rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
return;
if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);
rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
RB_CLEAR_NODE(&waiter->pi_tree_entry);
}
/*
* Calculate task priority from the waiter tree priority
*
* Return task->normal_prio when the waiter tree is empty or when
* the waiter is not allowed to do priority boosting
*/
int rt_mutex_getprio(struct task_struct *task)
{
if (likely(!task_has_pi_waiters(task)))
return task->normal_prio;
return min(task_top_pi_waiter(task)->prio,
task->normal_prio);
}
struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
{
if (likely(!task_has_pi_waiters(task)))
return NULL;
return task_top_pi_waiter(task)->task;
}
/*
* Called by sched_setscheduler() to get the priority which will be
* effective after the change.
*/
int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
{
if (!task_has_pi_waiters(task))
return newprio;
if (task_top_pi_waiter(task)->task->prio <= newprio)
return task_top_pi_waiter(task)->task->prio;
return newprio;
}
/*
* Adjust the priority of a task, after its pi_waiters got modified.
*
* This can be both boosting and unboosting. task->pi_lock must be held.
*/
static void __rt_mutex_adjust_prio(struct task_struct *task)
{
int prio = rt_mutex_getprio(task);
if (task->prio != prio || dl_prio(prio))
rt_mutex_setprio(task, prio);
}
/*
* Adjust task priority (undo boosting). Called from the exit path of
* rt_mutex_slowunlock() and rt_mutex_slowlock().
*
* (Note: We do this outside of the protection of lock->wait_lock to
* allow the lock to be taken while or before we readjust the priority
* of task. We do not use the spin_xx_mutex() variants here as we are
* outside of the debug path.)
*/
void rt_mutex_adjust_prio(struct task_struct *task)
{
unsigned long flags;
raw_spin_lock_irqsave(&task->pi_lock, flags);
__rt_mutex_adjust_prio(task);
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
}
/*
* Deadlock detection is conditional:
*
* If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
* if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
*
* If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
* conducted independent of the detect argument.
*
* If the waiter argument is NULL this indicates the deboost path and
* deadlock detection is disabled independent of the detect argument
* and the config settings.
*/
static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
enum rtmutex_chainwalk chwalk)
{
/*
* This is just a wrapper function for the following call,
* because debug_rt_mutex_detect_deadlock() smells like a magic
* debug feature and I wanted to keep the cond function in the
* main source file along with the comments instead of having
* two of the same in the headers.
*/
return debug_rt_mutex_detect_deadlock(waiter, chwalk);
}
/*
* Max number of times we'll walk the boosting chain:
*/
int max_lock_depth = 1024;
static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
{
return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
}
/*
* Adjust the priority chain. Also used for deadlock detection.
* Decreases task's usage by one - may thus free the task.
*
* @task: the task owning the mutex (owner) for which a chain walk is
* probably needed
* @chwalk: do we have to carry out deadlock detection?
* @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
* things for a task that has just got its priority adjusted, and
* is waiting on a mutex)
* @next_lock: the mutex on which the owner of @orig_lock was blocked before
* we dropped its pi_lock. Is never dereferenced, only used for
* comparison to detect lock chain changes.
* @orig_waiter: rt_mutex_waiter struct for the task that has just donated
* its priority to the mutex owner (can be NULL in the case
* depicted above or if the top waiter is gone away and we are
* actually deboosting the owner)
* @top_task: the current top waiter
*
* Returns 0 or -EDEADLK.
*
* Chain walk basics and protection scope
*
* [R] refcount on task
* [P] task->pi_lock held
* [L] rtmutex->wait_lock held
*
* Step Description Protected by
* function arguments:
* @task [R]
* @orig_lock if != NULL @top_task is blocked on it
* @next_lock Unprotected. Cannot be
* dereferenced. Only used for
* comparison.
* @orig_waiter if != NULL @top_task is blocked on it
* @top_task current, or in case of proxy
* locking protected by calling
* code
* again:
* loop_sanity_check();
* retry:
* [1] lock(task->pi_lock); [R] acquire [P]
* [2] waiter = task->pi_blocked_on; [P]
* [3] check_exit_conditions_1(); [P]
* [4] lock = waiter->lock; [P]
* [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L]
* unlock(task->pi_lock); release [P]
* goto retry;
* }
* [6] check_exit_conditions_2(); [P] + [L]
* [7] requeue_lock_waiter(lock, waiter); [P] + [L]
* [8] unlock(task->pi_lock); release [P]
* put_task_struct(task); release [R]
* [9] check_exit_conditions_3(); [L]
* [10] task = owner(lock); [L]
* get_task_struct(task); [L] acquire [R]
* lock(task->pi_lock); [L] acquire [P]
* [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
* [12] check_exit_conditions_4(); [P] + [L]
* [13] unlock(task->pi_lock); release [P]
* unlock(lock->wait_lock); release [L]
* goto again;
*/
static int rt_mutex_adjust_prio_chain(struct task_struct *task,
enum rtmutex_chainwalk chwalk,
struct rt_mutex *orig_lock,
struct rt_mutex *next_lock,
struct rt_mutex_waiter *orig_waiter,
struct task_struct *top_task)
{
struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
struct rt_mutex_waiter *prerequeue_top_waiter;
int ret = 0, depth = 0;
struct rt_mutex *lock;
bool detect_deadlock;
unsigned long flags;
bool requeue = true;
detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
/*
* The (de)boosting is a step by step approach with a lot of
* pitfalls. We want this to be preemptible and we want hold a
* maximum of two locks per step. So we have to check
* carefully whether things change under us.
*/
again:
/*
* We limit the lock chain length for each invocation.
*/
if (++depth > max_lock_depth) {
static int prev_max;
/*
* Print this only once. If the admin changes the limit,
* print a new message when reaching the limit again.
*/
if (prev_max != max_lock_depth) {
prev_max = max_lock_depth;
printk(KERN_WARNING "Maximum lock depth %d reached "
"task: %s (%d)\n", max_lock_depth,
top_task->comm, task_pid_nr(top_task));
}
put_task_struct(task);
return -EDEADLK;
}
/*
* We are fully preemptible here and only hold the refcount on
* @task. So everything can have changed under us since the
* caller or our own code below (goto retry/again) dropped all
* locks.
*/
retry:
/*
* [1] Task cannot go away as we did a get_task() before !
*/
raw_spin_lock_irqsave(&task->pi_lock, flags);
/*
* [2] Get the waiter on which @task is blocked on.
*/
waiter = task->pi_blocked_on;
/*
* [3] check_exit_conditions_1() protected by task->pi_lock.
*/
/*
* Check whether the end of the boosting chain has been
* reached or the state of the chain has changed while we
* dropped the locks.
*/
if (!waiter)
goto out_unlock_pi;
/*
* Check the orig_waiter state. After we dropped the locks,
* the previous owner of the lock might have released the lock.
*/
if (orig_waiter && !rt_mutex_owner(orig_lock))
goto out_unlock_pi;
/*
* We dropped all locks after taking a refcount on @task, so
* the task might have moved on in the lock chain or even left
* the chain completely and blocks now on an unrelated lock or
* on @orig_lock.
*
* We stored the lock on which @task was blocked in @next_lock,
* so we can detect the chain change.
*/
if (next_lock != waiter->lock)
goto out_unlock_pi;
/*
* Drop out, when the task has no waiters. Note,
* top_waiter can be NULL, when we are in the deboosting
* mode!
*/
if (top_waiter) {
if (!task_has_pi_waiters(task))
goto out_unlock_pi;
/*
* If deadlock detection is off, we stop here if we
* are not the top pi waiter of the task. If deadlock
* detection is enabled we continue, but stop the
* requeueing in the chain walk.
*/
if (top_waiter != task_top_pi_waiter(task)) {
if (!detect_deadlock)
goto out_unlock_pi;
else
requeue = false;
}
}
/*
* If the waiter priority is the same as the task priority
* then there is no further priority adjustment necessary. If
* deadlock detection is off, we stop the chain walk. If its
* enabled we continue, but stop the requeueing in the chain
* walk.
*/
if (waiter->prio == task->prio) {
if (!detect_deadlock)
goto out_unlock_pi;
else
requeue = false;
}
/*
* [4] Get the next lock
*/
lock = waiter->lock;
/*
* [5] We need to trylock here as we are holding task->pi_lock,
* which is the reverse lock order versus the other rtmutex
* operations.
*/
if (!raw_spin_trylock(&lock->wait_lock)) {
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
cpu_relax();
goto retry;
}
/*
* [6] check_exit_conditions_2() protected by task->pi_lock and
* lock->wait_lock.
*
* Deadlock detection. If the lock is the same as the original
* lock which caused us to walk the lock chain or if the
* current lock is owned by the task which initiated the chain
* walk, we detected a deadlock.
*/
if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
debug_rt_mutex_deadlock(chwalk, orig_waiter, lock);
raw_spin_unlock(&lock->wait_lock);
ret = -EDEADLK;
goto out_unlock_pi;
}
/*
* If we just follow the lock chain for deadlock detection, no
* need to do all the requeue operations. To avoid a truckload
* of conditionals around the various places below, just do the
* minimum chain walk checks.
*/
if (!requeue) {
/*
* No requeue[7] here. Just release @task [8]
*/
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
put_task_struct(task);
/*
* [9] check_exit_conditions_3 protected by lock->wait_lock.
* If there is no owner of the lock, end of chain.
*/
if (!rt_mutex_owner(lock)) {
raw_spin_unlock(&lock->wait_lock);
return 0;
}
/* [10] Grab the next task, i.e. owner of @lock */
task = rt_mutex_owner(lock);
get_task_struct(task);
raw_spin_lock_irqsave(&task->pi_lock, flags);
/*
* No requeue [11] here. We just do deadlock detection.
*
* [12] Store whether owner is blocked
* itself. Decision is made after dropping the locks
*/
next_lock = task_blocked_on_lock(task);
/*
* Get the top waiter for the next iteration
*/
top_waiter = rt_mutex_top_waiter(lock);
/* [13] Drop locks */
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
raw_spin_unlock(&lock->wait_lock);
/* If owner is not blocked, end of chain. */
if (!next_lock)
goto out_put_task;
goto again;
}
/*
* Store the current top waiter before doing the requeue
* operation on @lock. We need it for the boost/deboost
* decision below.
*/
prerequeue_top_waiter = rt_mutex_top_waiter(lock);
/* [7] Requeue the waiter in the lock waiter tree. */
rt_mutex_dequeue(lock, waiter);
waiter->prio = task->prio;
rt_mutex_enqueue(lock, waiter);
/* [8] Release the task */
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
put_task_struct(task);
/*
* [9] check_exit_conditions_3 protected by lock->wait_lock.
*
* We must abort the chain walk if there is no lock owner even
* in the dead lock detection case, as we have nothing to
* follow here. This is the end of the chain we are walking.
*/
if (!rt_mutex_owner(lock)) {
/*
* If the requeue [7] above changed the top waiter,
* then we need to wake the new top waiter up to try
* to get the lock.
*/
if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
wake_up_process(rt_mutex_top_waiter(lock)->task);
raw_spin_unlock(&lock->wait_lock);
return 0;
}
/* [10] Grab the next task, i.e. the owner of @lock */
task = rt_mutex_owner(lock);
get_task_struct(task);
raw_spin_lock_irqsave(&task->pi_lock, flags);
/* [11] requeue the pi waiters if necessary */
if (waiter == rt_mutex_top_waiter(lock)) {
/*
* The waiter became the new top (highest priority)
* waiter on the lock. Replace the previous top waiter
* in the owner tasks pi waiters tree with this waiter
* and adjust the priority of the owner.
*/
rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
rt_mutex_enqueue_pi(task, waiter);
__rt_mutex_adjust_prio(task);
} else if (prerequeue_top_waiter == waiter) {
/*
* The waiter was the top waiter on the lock, but is
* no longer the top prority waiter. Replace waiter in
* the owner tasks pi waiters tree with the new top
* (highest priority) waiter and adjust the priority
* of the owner.
* The new top waiter is stored in @waiter so that
* @waiter == @top_waiter evaluates to true below and
* we continue to deboost the rest of the chain.
*/
rt_mutex_dequeue_pi(task, waiter);
waiter = rt_mutex_top_waiter(lock);
rt_mutex_enqueue_pi(task, waiter);
__rt_mutex_adjust_prio(task);
} else {
/*
* Nothing changed. No need to do any priority
* adjustment.
*/
}
/*
* [12] check_exit_conditions_4() protected by task->pi_lock
* and lock->wait_lock. The actual decisions are made after we
* dropped the locks.
*
* Check whether the task which owns the current lock is pi
* blocked itself. If yes we store a pointer to the lock for
* the lock chain change detection above. After we dropped
* task->pi_lock next_lock cannot be dereferenced anymore.
*/
next_lock = task_blocked_on_lock(task);
/*
* Store the top waiter of @lock for the end of chain walk
* decision below.
*/
top_waiter = rt_mutex_top_waiter(lock);
/* [13] Drop the locks */
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
raw_spin_unlock(&lock->wait_lock);
/*
* Make the actual exit decisions [12], based on the stored
* values.
*
* We reached the end of the lock chain. Stop right here. No
* point to go back just to figure that out.
*/
if (!next_lock)
goto out_put_task;
/*
* If the current waiter is not the top waiter on the lock,
* then we can stop the chain walk here if we are not in full
* deadlock detection mode.
*/
if (!detect_deadlock && waiter != top_waiter)
goto out_put_task;
goto again;
out_unlock_pi:
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
out_put_task:
put_task_struct(task);
return ret;
}
/*
* Try to take an rt-mutex
*
* Must be called with lock->wait_lock held.
*
* @lock: The lock to be acquired.
* @task: The task which wants to acquire the lock
* @waiter: The waiter that is queued to the lock's wait tree if the
* callsite called task_blocked_on_lock(), otherwise NULL
*/
static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
struct rt_mutex_waiter *waiter)
{
unsigned long flags;
/*
* Before testing whether we can acquire @lock, we set the
* RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
* other tasks which try to modify @lock into the slow path
* and they serialize on @lock->wait_lock.
*
* The RT_MUTEX_HAS_WAITERS bit can have a transitional state
* as explained at the top of this file if and only if:
*
* - There is a lock owner. The caller must fixup the
* transient state if it does a trylock or leaves the lock
* function due to a signal or timeout.
*
* - @task acquires the lock and there are no other
* waiters. This is undone in rt_mutex_set_owner(@task) at
* the end of this function.
*/
mark_rt_mutex_waiters(lock);
/*
* If @lock has an owner, give up.
*/
if (rt_mutex_owner(lock))
return 0;
/*
* If @waiter != NULL, @task has already enqueued the waiter
* into @lock waiter tree. If @waiter == NULL then this is a
* trylock attempt.
*/
if (waiter) {
/*
* If waiter is not the highest priority waiter of
* @lock, give up.
*/
if (waiter != rt_mutex_top_waiter(lock))
return 0;
/*
* We can acquire the lock. Remove the waiter from the
* lock waiters tree.
*/
rt_mutex_dequeue(lock, waiter);
} else {
/*
* If the lock has waiters already we check whether @task is
* eligible to take over the lock.
*
* If there are no other waiters, @task can acquire
* the lock. @task->pi_blocked_on is NULL, so it does
* not need to be dequeued.
*/
if (rt_mutex_has_waiters(lock)) {
/*
* If @task->prio is greater than or equal to
* the top waiter priority (kernel view),
* @task lost.
*/
if (task->prio >= rt_mutex_top_waiter(lock)->prio)
return 0;
/*
* The current top waiter stays enqueued. We
* don't have to change anything in the lock
* waiters order.
*/
} else {
/*
* No waiters. Take the lock without the
* pi_lock dance.@task->pi_blocked_on is NULL
* and we have no waiters to enqueue in @task
* pi waiters tree.
*/
goto takeit;
}
}
/*
* Clear @task->pi_blocked_on. Requires protection by
* @task->pi_lock. Redundant operation for the @waiter == NULL
* case, but conditionals are more expensive than a redundant
* store.
*/
raw_spin_lock_irqsave(&task->pi_lock, flags);
task->pi_blocked_on = NULL;
/*
* Finish the lock acquisition. @task is the new owner. If
* other waiters exist we have to insert the highest priority
* waiter into @task->pi_waiters tree.
*/
if (rt_mutex_has_waiters(lock))
rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
takeit:
/* We got the lock. */
debug_rt_mutex_lock(lock);
/*
* This either preserves the RT_MUTEX_HAS_WAITERS bit if there
* are still waiters or clears it.
*/
rt_mutex_set_owner(lock, task);
rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
/*
* Task blocks on lock.
*
* Prepare waiter and propagate pi chain
*
* This must be called with lock->wait_lock held.
*/
static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task,
enum rtmutex_chainwalk chwalk)
{
struct task_struct *owner = rt_mutex_owner(lock);
struct rt_mutex_waiter *top_waiter = waiter;
struct rt_mutex *next_lock;
int chain_walk = 0, res;
unsigned long flags;
/*
* Early deadlock detection. We really don't want the task to
* enqueue on itself just to untangle the mess later. It's not
* only an optimization. We drop the locks, so another waiter
* can come in before the chain walk detects the deadlock. So
* the other will detect the deadlock and return -EDEADLOCK,
* which is wrong, as the other waiter is not in a deadlock
* situation.
*/
if (owner == task)
return -EDEADLK;
raw_spin_lock_irqsave(&task->pi_lock, flags);
__rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
waiter->prio = task->prio;
/* Get the top priority waiter on the lock */
if (rt_mutex_has_waiters(lock))
top_waiter = rt_mutex_top_waiter(lock);
rt_mutex_enqueue(lock, waiter);
task->pi_blocked_on = waiter;
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
if (!owner)
return 0;
raw_spin_lock_irqsave(&owner->pi_lock, flags);
if (waiter == rt_mutex_top_waiter(lock)) {
rt_mutex_dequeue_pi(owner, top_waiter);
rt_mutex_enqueue_pi(owner, waiter);
__rt_mutex_adjust_prio(owner);
if (owner->pi_blocked_on)
chain_walk = 1;
} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
chain_walk = 1;
}
/* Store the lock on which owner is blocked or NULL */
next_lock = task_blocked_on_lock(owner);
raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
/*
* Even if full deadlock detection is on, if the owner is not
* blocked itself, we can avoid finding this out in the chain
* walk.
*/
if (!chain_walk || !next_lock)
return 0;
/*
* The owner can't disappear while holding a lock,
* so the owner struct is protected by wait_lock.
* Gets dropped in rt_mutex_adjust_prio_chain()!
*/
get_task_struct(owner);
raw_spin_unlock(&lock->wait_lock);
res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
next_lock, waiter, task);
raw_spin_lock(&lock->wait_lock);
return res;
}
/*
* Remove the top waiter from the current tasks pi waiter tree and
* queue it up.
*
* Called with lock->wait_lock held.
*/
static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
struct rt_mutex *lock)
{
struct rt_mutex_waiter *waiter;
unsigned long flags;
raw_spin_lock_irqsave(¤t->pi_lock, flags);
waiter = rt_mutex_top_waiter(lock);
/*
* Remove it from current->pi_waiters. We do not adjust a
* possible priority boost right now. We execute wakeup in the
* boosted mode and go back to normal after releasing
* lock->wait_lock.
*/
rt_mutex_dequeue_pi(current, waiter);
/*
* As we are waking up the top waiter, and the waiter stays
* queued on the lock until it gets the lock, this lock
* obviously has waiters. Just set the bit here and this has
* the added benefit of forcing all new tasks into the
* slow path making sure no task of lower priority than
* the top waiter can steal this lock.
*/
lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
wake_q_add(wake_q, waiter->task);
}
/*
* Remove a waiter from a lock and give up
*
* Must be called with lock->wait_lock held and
* have just failed to try_to_take_rt_mutex().
*/
static void remove_waiter(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter)
{
bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
struct task_struct *owner = rt_mutex_owner(lock);
struct rt_mutex *next_lock;
unsigned long flags;
raw_spin_lock_irqsave(¤t->pi_lock, flags);
rt_mutex_dequeue(lock, waiter);
current->pi_blocked_on = NULL;
raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
/*
* Only update priority if the waiter was the highest priority
* waiter of the lock and there is an owner to update.
*/
if (!owner || !is_top_waiter)
return;
raw_spin_lock_irqsave(&owner->pi_lock, flags);
rt_mutex_dequeue_pi(owner, waiter);
if (rt_mutex_has_waiters(lock))
rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
__rt_mutex_adjust_prio(owner);
/* Store the lock on which owner is blocked or NULL */
next_lock = task_blocked_on_lock(owner);
raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
/*
* Don't walk the chain, if the owner task is not blocked
* itself.
*/
if (!next_lock)
return;
/* gets dropped in rt_mutex_adjust_prio_chain()! */
get_task_struct(owner);
raw_spin_unlock(&lock->wait_lock);
rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
next_lock, NULL, current);
raw_spin_lock(&lock->wait_lock);
}
/*
* Recheck the pi chain, in case we got a priority setting
*
* Called from sched_setscheduler
*/
void rt_mutex_adjust_pi(struct task_struct *task)
{
struct rt_mutex_waiter *waiter;
struct rt_mutex *next_lock;
unsigned long flags;
raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on;
if (!waiter || (waiter->prio == task->prio &&
!dl_prio(task->prio))) {
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return;
}
next_lock = waiter->lock;
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
/* gets dropped in rt_mutex_adjust_prio_chain()! */
get_task_struct(task);
rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
next_lock, NULL, task);
}
/**
* __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
* @lock: the rt_mutex to take
* @state: the state the task should block in (TASK_INTERRUPTIBLE
* or TASK_UNINTERRUPTIBLE)
* @timeout: the pre-initialized and started timer, or NULL for none
* @waiter: the pre-initialized rt_mutex_waiter
*
* lock->wait_lock must be held by the caller.
*/
static int __sched
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
struct rt_mutex_waiter *waiter)
{
int ret = 0;
for (;;) {
/* Try to acquire the lock: */
if (try_to_take_rt_mutex(lock, current, waiter))
break;
/*
* TASK_INTERRUPTIBLE checks for signals and
* timeout. Ignored otherwise.
*/
if (unlikely(state == TASK_INTERRUPTIBLE)) {
/* Signal pending? */
if (signal_pending(current))
ret = -EINTR;
if (timeout && !timeout->task)
ret = -ETIMEDOUT;
if (ret)
break;
}
raw_spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
schedule_rt_mutex(lock);
raw_spin_lock(&lock->wait_lock);
set_current_state(state);
}
__set_current_state(TASK_RUNNING);
return ret;
}
static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
struct rt_mutex_waiter *w)
{
/*
* If the result is not -EDEADLOCK or the caller requested
* deadlock detection, nothing to do here.
*/
if (res != -EDEADLOCK || detect_deadlock)
return;
/*
* Yell lowdly and stop the task right here.
*/
rt_mutex_print_deadlock(w);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
}
}
/*
* Slow path lock function:
*/
static int __sched
rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
enum rtmutex_chainwalk chwalk)
{
struct rt_mutex_waiter waiter;
int ret = 0;
debug_rt_mutex_init_waiter(&waiter);
RB_CLEAR_NODE(&waiter.pi_tree_entry);
RB_CLEAR_NODE(&waiter.tree_entry);
raw_spin_lock(&lock->wait_lock);
/* Try to acquire the lock again: */
if (try_to_take_rt_mutex(lock, current, NULL)) {
raw_spin_unlock(&lock->wait_lock);
return 0;
}
set_current_state(state);
/* Setup the timer, when timeout != NULL */
if (unlikely(timeout))
hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
if (likely(!ret))
/* sleep on the mutex */
ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
if (unlikely(ret)) {
__set_current_state(TASK_RUNNING);
if (rt_mutex_has_waiters(lock))
remove_waiter(lock, &waiter);
rt_mutex_handle_deadlock(ret, chwalk, &waiter);
}
/*
* try_to_take_rt_mutex() sets the waiter bit
* unconditionally. We might have to fix that up.
*/
fixup_rt_mutex_waiters(lock);
raw_spin_unlock(&lock->wait_lock);
/* Remove pending timer: */
if (unlikely(timeout))
hrtimer_cancel(&timeout->timer);
debug_rt_mutex_free_waiter(&waiter);
return ret;
}
/*
* Slow path try-lock function:
*/
static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
{
int ret;
/*
* If the lock already has an owner we fail to get the lock.
* This can be done without taking the @lock->wait_lock as
* it is only being read, and this is a trylock anyway.
*/
if (rt_mutex_owner(lock))
return 0;
/*
* The mutex has currently no owner. Lock the wait lock and
* try to acquire the lock.
*/
raw_spin_lock(&lock->wait_lock);
ret = try_to_take_rt_mutex(lock, current, NULL);
/*
* try_to_take_rt_mutex() sets the lock waiters bit
* unconditionally. Clean this up.
*/
fixup_rt_mutex_waiters(lock);
raw_spin_unlock(&lock->wait_lock);
return ret;
}
/*
* Slow path to release a rt-mutex.
* Return whether the current task needs to undo a potential priority boosting.
*/
static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
struct wake_q_head *wake_q)
{
raw_spin_lock(&lock->wait_lock);
debug_rt_mutex_unlock(lock);
rt_mutex_deadlock_account_unlock(current);
/*
* We must be careful here if the fast path is enabled. If we
* have no waiters queued we cannot set owner to NULL here
* because of:
*
* foo->lock->owner = NULL;
* rtmutex_lock(foo->lock); <- fast path
* free = atomic_dec_and_test(foo->refcnt);
* rtmutex_unlock(foo->lock); <- fast path
* if (free)
* kfree(foo);
* raw_spin_unlock(foo->lock->wait_lock);
*
* So for the fastpath enabled kernel:
*
* Nothing can set the waiters bit as long as we hold
* lock->wait_lock. So we do the following sequence:
*
* owner = rt_mutex_owner(lock);
* clear_rt_mutex_waiters(lock);
* raw_spin_unlock(&lock->wait_lock);
* if (cmpxchg(&lock->owner, owner, 0) == owner)
* return;
* goto retry;
*
* The fastpath disabled variant is simple as all access to
* lock->owner is serialized by lock->wait_lock:
*
* lock->owner = NULL;
* raw_spin_unlock(&lock->wait_lock);
*/
while (!rt_mutex_has_waiters(lock)) {
/* Drops lock->wait_lock ! */
if (unlock_rt_mutex_safe(lock) == true)
return false;
/* Relock the rtmutex and try again */
raw_spin_lock(&lock->wait_lock);
}
/*
* The wakeup next waiter path does not suffer from the above
* race. See the comments there.
*
* Queue the next waiter for wakeup once we release the wait_lock.
*/
mark_wakeup_next_waiter(wake_q, lock);
raw_spin_unlock(&lock->wait_lock);
/* check PI boosting */
return true;
}
/*
* debug aware fast / slowpath lock,trylock,unlock
*
* The atomic acquire/release ops are compiled away, when either the
* architecture does not support cmpxchg or when debugging is enabled.
*/
static inline int
rt_mutex_fastlock(struct rt_mutex *lock, int state,
int (*slowfn)(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
enum rtmutex_chainwalk chwalk))
{
if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 0;
} else
return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
}
static inline int
rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
enum rtmutex_chainwalk chwalk,
int (*slowfn)(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
enum rtmutex_chainwalk chwalk))
{
if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
likely(rt_mutex_cmpxchg(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 0;
} else
return slowfn(lock, state, timeout, chwalk);
}
static inline int
rt_mutex_fasttrylock(struct rt_mutex *lock,
int (*slowfn)(struct rt_mutex *lock))
{
if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 1;
}
return slowfn(lock);
}
static inline void
rt_mutex_fastunlock(struct rt_mutex *lock,
bool (*slowfn)(struct rt_mutex *lock,
struct wake_q_head *wqh))
{
WAKE_Q(wake_q);
if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
rt_mutex_deadlock_account_unlock(current);
} else {
bool deboost = slowfn(lock, &wake_q);
wake_up_q(&wake_q);
/* Undo pi boosting if necessary: */
if (deboost)
rt_mutex_adjust_prio(current);
}
}
/**
* rt_mutex_lock - lock a rt_mutex
*
* @lock: the rt_mutex to be locked
*/
void __sched rt_mutex_lock(struct rt_mutex *lock)
{
might_sleep();
rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock);
/**
* rt_mutex_lock_interruptible - lock a rt_mutex interruptible
*
* @lock: the rt_mutex to be locked
*
* Returns:
* 0 on success
* -EINTR when interrupted by a signal
*/
int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
{
might_sleep();
return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
/*
* Futex variant with full deadlock detection.
*/
int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *timeout)
{
might_sleep();
return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
RT_MUTEX_FULL_CHAINWALK,
rt_mutex_slowlock);
}
/**
* rt_mutex_timed_lock - lock a rt_mutex interruptible
* the timeout structure is provided
* by the caller
*
* @lock: the rt_mutex to be locked
* @timeout: timeout structure or NULL (no timeout)
*
* Returns:
* 0 on success
* -EINTR when interrupted by a signal
* -ETIMEDOUT when the timeout expired
*/
int
rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
{
might_sleep();
return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
RT_MUTEX_MIN_CHAINWALK,
rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
/**
* rt_mutex_trylock - try to lock a rt_mutex
*
* @lock: the rt_mutex to be locked
*
* This function can only be called in thread context. It's safe to
* call it from atomic regions, but not from hard interrupt or soft
* interrupt context.
*
* Returns 1 on success and 0 on contention
*/
int __sched rt_mutex_trylock(struct rt_mutex *lock)
{
if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
return 0;
return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
}
EXPORT_SYMBOL_GPL(rt_mutex_trylock);
/**
* rt_mutex_unlock - unlock a rt_mutex
*
* @lock: the rt_mutex to be unlocked
*/
void __sched rt_mutex_unlock(struct rt_mutex *lock)
{
rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_unlock);
/**
* rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
* @lock: the rt_mutex to be unlocked
*
* Returns: true/false indicating whether priority adjustment is
* required or not.
*/
bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
struct wake_q_head *wqh)
{
if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
rt_mutex_deadlock_account_unlock(current);
return false;
}
return rt_mutex_slowunlock(lock, wqh);
}
/**
* rt_mutex_destroy - mark a mutex unusable
* @lock: the mutex to be destroyed
*
* This function marks the mutex uninitialized, and any subsequent
* use of the mutex is forbidden. The mutex must not be locked when
* this function is called.
*/
void rt_mutex_destroy(struct rt_mutex *lock)
{
WARN_ON(rt_mutex_is_locked(lock));
#ifdef CONFIG_DEBUG_RT_MUTEXES
lock->magic = NULL;
#endif
}
EXPORT_SYMBOL_GPL(rt_mutex_destroy);
/**
* __rt_mutex_init - initialize the rt lock
*
* @lock: the rt lock to be initialized
*
* Initialize the rt lock to unlocked state.
*
* Initializing of a locked rt lock is not allowed
*/
void __rt_mutex_init(struct rt_mutex *lock, const char *name)
{
lock->owner = NULL;
raw_spin_lock_init(&lock->wait_lock);
lock->waiters = RB_ROOT;
lock->waiters_leftmost = NULL;
debug_rt_mutex_init(lock, name);
}
EXPORT_SYMBOL_GPL(__rt_mutex_init);
/**
* rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
* proxy owner
*
* @lock: the rt_mutex to be locked
* @proxy_owner:the task to set as owner
*
* No locking. Caller has to do serializing itself
* Special API call for PI-futex support
*/
void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner)
{
__rt_mutex_init(lock, NULL);
debug_rt_mutex_proxy_lock(lock, proxy_owner);
rt_mutex_set_owner(lock, proxy_owner);
rt_mutex_deadlock_account_lock(lock, proxy_owner);
}
/**
* rt_mutex_proxy_unlock - release a lock on behalf of owner
*
* @lock: the rt_mutex to be locked
*
* No locking. Caller has to do serializing itself
* Special API call for PI-futex support
*/
void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner)
{
debug_rt_mutex_proxy_unlock(lock);
rt_mutex_set_owner(lock, NULL);
rt_mutex_deadlock_account_unlock(proxy_owner);
}
/**
* rt_mutex_start_proxy_lock() - Start lock acquisition for another task
* @lock: the rt_mutex to take
* @waiter: the pre-initialized rt_mutex_waiter
* @task: the task to prepare
*
* Returns:
* 0 - task blocked on lock
* 1 - acquired the lock for task, caller should wake it up
* <0 - error
*
* Special API call for FUTEX_REQUEUE_PI support.
*/
int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task)
{
int ret;
raw_spin_lock(&lock->wait_lock);
if (try_to_take_rt_mutex(lock, task, NULL)) {
raw_spin_unlock(&lock->wait_lock);
return 1;
}
/* We enforce deadlock detection for futexes */
ret = task_blocks_on_rt_mutex(lock, waiter, task,
RT_MUTEX_FULL_CHAINWALK);
if (ret && !rt_mutex_owner(lock)) {
/*
* Reset the return value. We might have
* returned with -EDEADLK and the owner
* released the lock while we were walking the
* pi chain. Let the waiter sort it out.
*/
ret = 0;
}
if (unlikely(ret))
remove_waiter(lock, waiter);
raw_spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
return ret;
}
/**
* rt_mutex_next_owner - return the next owner of the lock
*
* @lock: the rt lock query
*
* Returns the next owner of the lock or NULL
*
* Caller has to serialize against other accessors to the lock
* itself.
*
* Special API call for PI-futex support
*/
struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
{
if (!rt_mutex_has_waiters(lock))
return NULL;
return rt_mutex_top_waiter(lock)->task;
}
/**
* rt_mutex_finish_proxy_lock() - Complete lock acquisition
* @lock: the rt_mutex we were woken on
* @to: the timeout, null if none. hrtimer should already have
* been started.
* @waiter: the pre-initialized rt_mutex_waiter
*
* Complete the lock acquisition started our behalf by another thread.
*
* Returns:
* 0 - success
* <0 - error, one of -EINTR, -ETIMEDOUT
*
* Special API call for PI-futex requeue support
*/
int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *to,
struct rt_mutex_waiter *waiter)
{
int ret;
raw_spin_lock(&lock->wait_lock);
set_current_state(TASK_INTERRUPTIBLE);
/* sleep on the mutex */
ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
if (unlikely(ret))
remove_waiter(lock, waiter);
/*
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
* have to fix that up.
*/
fixup_rt_mutex_waiters(lock);
raw_spin_unlock(&lock->wait_lock);
return ret;
}
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/rtmutex.h
/*
* RT-Mutexes: blocking mutual exclusion locks with PI support
*
* started by Ingo Molnar and Thomas Gleixner:
*
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* This file contains macros used solely by rtmutex.c.
* Non-debug version.
*/
#define rt_mutex_deadlock_check(l) (0)
#define rt_mutex_deadlock_account_lock(m, t) do { } while (0)
#define rt_mutex_deadlock_account_unlock(l) do { } while (0)
#define debug_rt_mutex_init_waiter(w) do { } while (0)
#define debug_rt_mutex_free_waiter(w) do { } while (0)
#define debug_rt_mutex_lock(l) do { } while (0)
#define debug_rt_mutex_proxy_lock(l,p) do { } while (0)
#define debug_rt_mutex_proxy_unlock(l) do { } while (0)
#define debug_rt_mutex_unlock(l) do { } while (0)
#define debug_rt_mutex_init(m, n) do { } while (0)
#define debug_rt_mutex_deadlock(d, a ,l) do { } while (0)
#define debug_rt_mutex_print_deadlock(w) do { } while (0)
#define debug_rt_mutex_reset_waiter(w) do { } while (0)
static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
{
WARN(1, "rtmutex deadlock detected\n");
}
static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *w,
enum rtmutex_chainwalk walk)
{
return walk == RT_MUTEX_FULL_CHAINWALK;
}
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/rtmutex_common.h
/*
* RT Mutexes: blocking mutual exclusion locks with PI support
*
* started by Ingo Molnar and Thomas Gleixner:
*
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* This file contains the private data structure and API definitions.
*/
#ifndef __KERNEL_RTMUTEX_COMMON_H
#define __KERNEL_RTMUTEX_COMMON_H
#include <linux/rtmutex.h>
/*
* The rtmutex in kernel tester is independent of rtmutex debugging. We
* call schedule_rt_mutex_test() instead of schedule() for the tasks which
* belong to the tester. That way we can delay the wakeup path of those
* threads to provoke lock stealing and testing of complex boosting scenarios.
*/
#ifdef CONFIG_RT_MUTEX_TESTER
extern void schedule_rt_mutex_test(struct rt_mutex *lock);
#define schedule_rt_mutex(_lock) \
do { \
if (!(current->flags & PF_MUTEX_TESTER)) \
schedule(); \
else \
schedule_rt_mutex_test(_lock); \
} while (0)
#else
# define schedule_rt_mutex(_lock) schedule()
#endif
/*
* This is the control structure for tasks blocked on a rt_mutex,
* which is allocated on the kernel stack on of the blocked task.
*
* @tree_entry: pi node to enqueue into the mutex waiters tree
* @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree
* @task: task reference to the blocked task
*/
struct rt_mutex_waiter {
struct rb_node tree_entry;
struct rb_node pi_tree_entry;
struct task_struct *task;
struct rt_mutex *lock;
#ifdef CONFIG_DEBUG_RT_MUTEXES
unsigned long ip;
struct pid *deadlock_task_pid;
struct rt_mutex *deadlock_lock;
#endif
int prio;
};
/*
* Various helpers to access the waiters-tree:
*/
static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
{
return !RB_EMPTY_ROOT(&lock->waiters);
}
static inline struct rt_mutex_waiter *
rt_mutex_top_waiter(struct rt_mutex *lock)
{
struct rt_mutex_waiter *w;
w = rb_entry(lock->waiters_leftmost, struct rt_mutex_waiter,
tree_entry);
BUG_ON(w->lock != lock);
return w;
}
static inline int task_has_pi_waiters(struct task_struct *p)
{
return !RB_EMPTY_ROOT(&p->pi_waiters);
}
static inline struct rt_mutex_waiter *
task_top_pi_waiter(struct task_struct *p)
{
return rb_entry(p->pi_waiters_leftmost, struct rt_mutex_waiter,
pi_tree_entry);
}
/*
* lock->owner state tracking:
*/
#define RT_MUTEX_HAS_WAITERS 1UL
#define RT_MUTEX_OWNER_MASKALL 1UL
static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
{
return (struct task_struct *)
((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
}
/*
* Constants for rt mutex functions which have a selectable deadlock
* detection.
*
* RT_MUTEX_MIN_CHAINWALK: Stops the lock chain walk when there are
* no further PI adjustments to be made.
*
* RT_MUTEX_FULL_CHAINWALK: Invoke deadlock detection with a full
* walk of the lock chain.
*/
enum rtmutex_chainwalk {
RT_MUTEX_MIN_CHAINWALK,
RT_MUTEX_FULL_CHAINWALK,
};
/*
* PI-futex support (proxy locking functions, etc.):
*/
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner);
extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task);
extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *to,
struct rt_mutex_waiter *waiter);
extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
struct wake_q_head *wqh);
extern void rt_mutex_adjust_prio(struct task_struct *task);
#ifdef CONFIG_DEBUG_RT_MUTEXES
# include "rtmutex-debug.h"
#else
# include "rtmutex.h"
#endif
#endif
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/rwsem-spinlock.c
/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
* generic spinlock implementation
*
* Copyright (c) 2001 David Howells (dhowells@redhat.com).
* - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
* - Derived also from comments by Linus
*/
#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/export.h>
enum rwsem_waiter_type {
RWSEM_WAITING_FOR_WRITE,
RWSEM_WAITING_FOR_READ
};
struct rwsem_waiter {
struct list_head list;
struct task_struct *task;
enum rwsem_waiter_type type;
};
int rwsem_is_locked(struct rw_semaphore *sem)
{
int ret = 1;
unsigned long flags;
if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
ret = (sem->count != 0);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
return ret;
}
EXPORT_SYMBOL(rwsem_is_locked);
/*
* initialise the semaphore
*/
void __init_rwsem(struct rw_semaphore *sem, const char *name,
struct lock_class_key *key)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held semaphore:
*/
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map(&sem->dep_map, name, key, 0);
#endif
sem->count = 0;
raw_spin_lock_init(&sem->wait_lock);
INIT_LIST_HEAD(&sem->wait_list);
}
EXPORT_SYMBOL(__init_rwsem);
/*
* handle the lock release when processes blocked on it that can now run
* - if we come here, then:
* - the 'active count' _reached_ zero
* - the 'waiting count' is non-zero
* - the spinlock must be held by the caller
* - woken process blocks are discarded from the list after having task zeroed
* - writers are only woken if wakewrite is non-zero
*/
static inline struct rw_semaphore *
__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
{
struct rwsem_waiter *waiter;
struct task_struct *tsk;
int woken;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
if (wakewrite)
/* Wake up a writer. Note that we do not grant it the
* lock - it will have to acquire it when it runs. */
wake_up_process(waiter->task);
goto out;
}
/* grant an infinite number of read locks to the front of the queue */
woken = 0;
do {
struct list_head *next = waiter->list.next;
list_del(&waiter->list);
tsk = waiter->task;
/*
* Make sure we do not wakeup the next reader before
* setting the nil condition to grant the next reader;
* otherwise we could miss the wakeup on the other
* side and end up sleeping again. See the pairing
* in rwsem_down_read_failed().
*/
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
woken++;
if (next == &sem->wait_list)
break;
waiter = list_entry(next, struct rwsem_waiter, list);
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
sem->count += woken;
out:
return sem;
}
/*
* wake a single writer
*/
static inline struct rw_semaphore *
__rwsem_wake_one_writer(struct rw_semaphore *sem)
{
struct rwsem_waiter *waiter;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
wake_up_process(waiter->task);
return sem;
}
/*
* get a read lock on the semaphore
*/
void __sched __down_read(struct rw_semaphore *sem)
{
struct rwsem_waiter waiter;
struct task_struct *tsk;
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
/* granted */
sem->count++;
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
goto out;
}
tsk = current;
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
/* set up my own style of waitqueue */
waiter.task = tsk;
waiter.type = RWSEM_WAITING_FOR_READ;
get_task_struct(tsk);
list_add_tail(&waiter.list, &sem->wait_list);
/* we don't need to touch the semaphore struct anymore */
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
/* wait to be given the lock */
for (;;) {
if (!waiter.task)
break;
schedule();
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
}
__set_task_state(tsk, TASK_RUNNING);
out:
;
}
/*
* trylock for reading -- returns 1 if successful, 0 if contention
*/
int __down_read_trylock(struct rw_semaphore *sem)
{
unsigned long flags;
int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
/* granted */
sem->count++;
ret = 1;
}
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return ret;
}
/*
* get a write lock on the semaphore
*/
void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
{
struct rwsem_waiter waiter;
struct task_struct *tsk;
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
/* set up my own style of waitqueue */
tsk = current;
waiter.task = tsk;
waiter.type = RWSEM_WAITING_FOR_WRITE;
list_add_tail(&waiter.list, &sem->wait_list);
/* wait for someone to release the lock */
for (;;) {
/*
* That is the key to support write lock stealing: allows the
* task already on CPU to get the lock soon rather than put
* itself into sleep and waiting for system woke it or someone
* else in the head of the wait list up.
*/
if (sem->count == 0)
break;
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
schedule();
raw_spin_lock_irqsave(&sem->wait_lock, flags);
}
/* got the lock */
sem->count = -1;
list_del(&waiter.list);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
void __sched __down_write(struct rw_semaphore *sem)
{
__down_write_nested(sem, 0);
}
/*
* trylock for writing -- returns 1 if successful, 0 if contention
*/
int __down_write_trylock(struct rw_semaphore *sem)
{
unsigned long flags;
int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->count == 0) {
/* got the lock */
sem->count = -1;
ret = 1;
}
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return ret;
}
/*
* release a read lock on the semaphore
*/
void __up_read(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (--sem->count == 0 && !list_empty(&sem->wait_list))
sem = __rwsem_wake_one_writer(sem);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
/*
* release a write lock on the semaphore
*/
void __up_write(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
sem->count = 0;
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, 1);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
/*
* downgrade a write lock into a read lock
* - just wake up any readers at the front of the queue
*/
void __downgrade_write(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
sem->count = 1;
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, 0);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/rwsem-xadd.c
/* rwsem.c: R/W semaphores: contention handling functions
*
* Written by David Howells (dhowells@redhat.com).
* Derived from arch/i386/kernel/semaphore.c
*
* Writer lock-stealing by Alex Shi <alex.shi@intel.com>
* and Michel Lespinasse <walken@google.com>
*
* Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
* and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
*/
#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/export.h>
#include <linux/sched/rt.h>
#include <linux/osq_lock.h>
#include "rwsem.h"
/*
* Guide to the rw_semaphore's count field for common values.
* (32-bit case illustrated, similar for 64-bit)
*
* 0x0000000X (1) X readers active or attempting lock, no writer waiting
* X = #active_readers + #readers attempting to lock
* (X*ACTIVE_BIAS)
*
* 0x00000000 rwsem is unlocked, and no one is waiting for the lock or
* attempting to read lock or write lock.
*
* 0xffff000X (1) X readers active or attempting lock, with waiters for lock
* X = #active readers + # readers attempting lock
* (X*ACTIVE_BIAS + WAITING_BIAS)
* (2) 1 writer attempting lock, no waiters for lock
* X-1 = #active readers + #readers attempting lock
* ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
* (3) 1 writer active, no waiters for lock
* X-1 = #active readers + #readers attempting lock
* ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
*
* 0xffff0001 (1) 1 reader active or attempting lock, waiters for lock
* (WAITING_BIAS + ACTIVE_BIAS)
* (2) 1 writer active or attempting lock, no waiters for lock
* (ACTIVE_WRITE_BIAS)
*
* 0xffff0000 (1) There are writers or readers queued but none active
* or in the process of attempting lock.
* (WAITING_BIAS)
* Note: writer can attempt to steal lock for this count by adding
* ACTIVE_WRITE_BIAS in cmpxchg and checking the old count
*
* 0xfffe0001 (1) 1 writer active, or attempting lock. Waiters on queue.
* (ACTIVE_WRITE_BIAS + WAITING_BIAS)
*
* Note: Readers attempt to lock by adding ACTIVE_BIAS in down_read and checking
* the count becomes more than 0 for successful lock acquisition,
* i.e. the case where there are only readers or nobody has lock.
* (1st and 2nd case above).
*
* Writers attempt to lock by adding ACTIVE_WRITE_BIAS in down_write and
* checking the count becomes ACTIVE_WRITE_BIAS for successful lock
* acquisition (i.e. nobody else has lock or attempts lock). If
* unsuccessful, in rwsem_down_write_failed, we'll check to see if there
* are only waiters but none active (5th case above), and attempt to
* steal the lock.
*
*/
/*
* Initialize an rwsem:
*/
void __init_rwsem(struct rw_semaphore *sem, const char *name,
struct lock_class_key *key)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held semaphore:
*/
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map(&sem->dep_map, name, key, 0);
#endif
sem->count = RWSEM_UNLOCKED_VALUE;
raw_spin_lock_init(&sem->wait_lock);
INIT_LIST_HEAD(&sem->wait_list);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
sem->owner = NULL;
osq_lock_init(&sem->osq);
#endif
}
EXPORT_SYMBOL(__init_rwsem);
enum rwsem_waiter_type {
RWSEM_WAITING_FOR_WRITE,
RWSEM_WAITING_FOR_READ
};
struct rwsem_waiter {
struct list_head list;
struct task_struct *task;
enum rwsem_waiter_type type;
};
enum rwsem_wake_type {
RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
RWSEM_WAKE_READERS, /* Wake readers only */
RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
};
/*
* handle the lock release when processes blocked on it that can now run
* - if we come here from up_xxxx(), then:
* - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
* - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
* - there must be someone on the queue
* - the spinlock must be held by the caller
* - woken process blocks are discarded from the list after having task zeroed
* - writers are only woken if downgrading is false
*/
static struct rw_semaphore *
__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
{
struct rwsem_waiter *waiter;
struct task_struct *tsk;
struct list_head *next;
long oldcount, woken, loop, adjustment;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
if (wake_type == RWSEM_WAKE_ANY)
/* Wake writer at the front of the queue, but do not
* grant it the lock yet as we want other writers
* to be able to steal it. Readers, on the other hand,
* will block as they will notice the queued writer.
*/
wake_up_process(waiter->task);
goto out;
}
/* Writers might steal the lock before we grant it to the next reader.
* We prefer to do the first reader grant before counting readers
* so we can bail out early if a writer stole the lock.
*/
adjustment = 0;
if (wake_type != RWSEM_WAKE_READ_OWNED) {
adjustment = RWSEM_ACTIVE_READ_BIAS;
try_reader_grant:
oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
/* A writer stole the lock. Undo our reader grant. */
if (rwsem_atomic_update(-adjustment, sem) &
RWSEM_ACTIVE_MASK)
goto out;
/* Last active locker left. Retry waking readers. */
goto try_reader_grant;
}
}
/* Grant an infinite number of read locks to the readers at the front
* of the queue. Note we increment the 'active part' of the count by
* the number of readers before waking any processes up.
*/
woken = 0;
do {
woken++;
if (waiter->list.next == &sem->wait_list)
break;
waiter = list_entry(waiter->list.next,
struct rwsem_waiter, list);
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
if (waiter->type != RWSEM_WAITING_FOR_WRITE)
/* hit end of list above */
adjustment -= RWSEM_WAITING_BIAS;
if (adjustment)
rwsem_atomic_add(adjustment, sem);
next = sem->wait_list.next;
loop = woken;
do {
waiter = list_entry(next, struct rwsem_waiter, list);
next = waiter->list.next;
tsk = waiter->task;
/*
* Make sure we do not wakeup the next reader before
* setting the nil condition to grant the next reader;
* otherwise we could miss the wakeup on the other
* side and end up sleeping again. See the pairing
* in rwsem_down_read_failed().
*/
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
} while (--loop);
sem->wait_list.next = next;
next->prev = &sem->wait_list;
out:
return sem;
}
/*
* Wait for the read lock to be granted
*/
__visible
struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
{
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
struct rwsem_waiter waiter;
struct task_struct *tsk = current;
/* set up my own style of waitqueue */
waiter.task = tsk;
waiter.type = RWSEM_WAITING_FOR_READ;
get_task_struct(tsk);
raw_spin_lock_irq(&sem->wait_lock);
if (list_empty(&sem->wait_list))
adjustment += RWSEM_WAITING_BIAS;
list_add_tail(&waiter.list, &sem->wait_list);
/* we're now waiting on the lock, but no longer actively locking */
count = rwsem_atomic_update(adjustment, sem);
/* If there are no active locks, wake the front queued process(es).
*
* If there are no writers and we are first in the queue,
* wake our own waiter to join the existing active readers !
*/
if (count == RWSEM_WAITING_BIAS ||
(count > RWSEM_WAITING_BIAS &&
adjustment != -RWSEM_ACTIVE_READ_BIAS))
sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
raw_spin_unlock_irq(&sem->wait_lock);
/* wait to be given the lock */
while (true) {
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (!waiter.task)
break;
schedule();
}
__set_task_state(tsk, TASK_RUNNING);
return sem;
}
EXPORT_SYMBOL(rwsem_down_read_failed);
static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
{
/*
* Try acquiring the write lock. Check count first in order
* to reduce unnecessary expensive cmpxchg() operations.
*/
if (count == RWSEM_WAITING_BIAS &&
cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
if (!list_is_singular(&sem->wait_list))
rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
rwsem_set_owner(sem);
return true;
}
return false;
}
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
/*
* Try to acquire write lock before the writer has been put on wait queue.
*/
static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
{
long old, count = READ_ONCE(sem->count);
while (true) {
if (!(count == 0 || count == RWSEM_WAITING_BIAS))
return false;
old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
if (old == count) {
rwsem_set_owner(sem);
return true;
}
count = old;
}
}
static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
{
struct task_struct *owner;
bool ret = true;
if (need_resched())
return false;
rcu_read_lock();
owner = READ_ONCE(sem->owner);
if (!owner) {
long count = READ_ONCE(sem->count);
/*
* If sem->owner is not set, yet we have just recently entered the
* slowpath with the lock being active, then there is a possibility
* reader(s) may have the lock. To be safe, bail spinning in these
* situations.
*/
if (count & RWSEM_ACTIVE_MASK)
ret = false;
goto done;
}
ret = owner->on_cpu;
done:
rcu_read_unlock();
return ret;
}
static noinline
bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
{
long count;
rcu_read_lock();
while (sem->owner == owner) {
/*
* Ensure we emit the owner->on_cpu, dereference _after_
* checking sem->owner still matches owner, if that fails,
* owner might point to free()d memory, if it still matches,
* the rcu_read_lock() ensures the memory stays valid.
*/
barrier();
/* abort spinning when need_resched or owner is not running */
if (!owner->on_cpu || need_resched()) {
rcu_read_unlock();
return false;
}
cpu_relax_lowlatency();
}
rcu_read_unlock();
if (READ_ONCE(sem->owner))
return true; /* new owner, continue spinning */
/*
* When the owner is not set, the lock could be free or
* held by readers. Check the counter to verify the
* state.
*/
count = READ_ONCE(sem->count);
return (count == 0 || count == RWSEM_WAITING_BIAS);
}
static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
{
struct task_struct *owner;
bool taken = false;
preempt_disable();
/* sem->wait_lock should not be held when doing optimistic spinning */
if (!rwsem_can_spin_on_owner(sem))
goto done;
if (!osq_lock(&sem->osq))
goto done;
while (true) {
owner = READ_ONCE(sem->owner);
if (owner && !rwsem_spin_on_owner(sem, owner))
break;
/* wait_lock will be acquired if write_lock is obtained */
if (rwsem_try_write_lock_unqueued(sem)) {
taken = true;
break;
}
/*
* When there's no owner, we might have preempted between the
* owner acquiring the lock and setting the owner field. If
* we're an RT task that will live-lock because we won't let
* the owner complete.
*/
if (!owner && (need_resched() || rt_task(current)))
break;
/*
* The cpu_relax() call is a compiler barrier which forces
* everything in this loop to be re-loaded. We don't need
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
cpu_relax_lowlatency();
}
osq_unlock(&sem->osq);
done:
preempt_enable();
return taken;
}
/*
* Return true if the rwsem has active spinner
*/
static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
{
return osq_is_locked(&sem->osq);
}
#else
static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
{
return false;
}
static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
{
return false;
}
#endif
/*
* Wait until we successfully acquire the write lock
*/
__visible
struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
{
long count;
bool waiting = true; /* any queued threads before us */
struct rwsem_waiter waiter;
/* undo write bias from down_write operation, stop active locking */
count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
/* do optimistic spinning and steal lock if possible */
if (rwsem_optimistic_spin(sem))
return sem;
/*
* Optimistic spinning failed, proceed to the slowpath
* and block until we can acquire the sem.
*/
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_WRITE;
raw_spin_lock_irq(&sem->wait_lock);
/* account for this before adding a new element to the list */
if (list_empty(&sem->wait_list))
waiting = false;
list_add_tail(&waiter.list, &sem->wait_list);
/* we're now waiting on the lock, but no longer actively locking */
if (waiting) {
count = READ_ONCE(sem->count);
/*
* If there were already threads queued before us and there are
* no active writers, the lock must be read owned; so we try to
* wake any read locks that were queued ahead of us.
*/
if (count > RWSEM_WAITING_BIAS)
sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
} else
count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
/* wait until we successfully acquire the lock */
set_current_state(TASK_UNINTERRUPTIBLE);
while (true) {
if (rwsem_try_write_lock(count, sem))
break;
raw_spin_unlock_irq(&sem->wait_lock);
/* Block until there are no active lockers. */
do {
schedule();
set_current_state(TASK_UNINTERRUPTIBLE);
} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
raw_spin_lock_irq(&sem->wait_lock);
}
__set_current_state(TASK_RUNNING);
list_del(&waiter.list);
raw_spin_unlock_irq(&sem->wait_lock);
return sem;
}
EXPORT_SYMBOL(rwsem_down_write_failed);
/*
* handle waking up a waiter on the semaphore
* - up_read/up_write has decremented the active part of count if we come here
*/
__visible
struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
{
unsigned long flags;
/*
* If a spinner is present, it is not necessary to do the wakeup.
* Try to do wakeup only if the trylock succeeds to minimize
* spinlock contention which may introduce too much delay in the
* unlock operation.
*
* spinning writer up_write/up_read caller
* --------------- -----------------------
* [S] osq_unlock() [L] osq
* MB RMB
* [RmW] rwsem_try_write_lock() [RmW] spin_trylock(wait_lock)
*
* Here, it is important to make sure that there won't be a missed
* wakeup while the rwsem is free and the only spinning writer goes
* to sleep without taking the rwsem. Even when the spinning writer
* is just going to break out of the waiting loop, it will still do
* a trylock in rwsem_down_write_failed() before sleeping. IOW, if
* rwsem_has_spinner() is true, it will guarantee at least one
* trylock attempt on the rwsem later on.
*/
if (rwsem_has_spinner(sem)) {
/*
* The smp_rmb() here is to make sure that the spinner
* state is consulted before reading the wait_lock.
*/
smp_rmb();
if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
return sem;
goto locked;
}
raw_spin_lock_irqsave(&sem->wait_lock, flags);
locked:
/* do nothing if list empty */
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return sem;
}
EXPORT_SYMBOL(rwsem_wake);
/*
* downgrade a write lock into a read lock
* - caller incremented waiting part of count and discovered it still negative
* - just wake up any readers at the front of the queue
*/
__visible
struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
/* do nothing if list empty */
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return sem;
}
EXPORT_SYMBOL(rwsem_downgrade_wake);
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/rwsem.c
/* kernel/rwsem.c: R/W semaphores, public implementation
*
* Written by David Howells (dhowells@redhat.com).
* Derived from asm-i386/semaphore.h
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/export.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
#include "rwsem.h"
/*
* lock for reading
*/
void __sched down_read(struct rw_semaphore *sem)
{
might_sleep();
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
}
EXPORT_SYMBOL(down_read);
/*
* trylock for reading -- returns 1 if successful, 0 if contention
*/
int down_read_trylock(struct rw_semaphore *sem)
{
int ret = __down_read_trylock(sem);
if (ret == 1)
rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
return ret;
}
EXPORT_SYMBOL(down_read_trylock);
/*
* lock for writing
*/
void __sched down_write(struct rw_semaphore *sem)
{
might_sleep();
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
rwsem_set_owner(sem);
}
EXPORT_SYMBOL(down_write);
/*
* trylock for writing -- returns 1 if successful, 0 if contention
*/
int down_write_trylock(struct rw_semaphore *sem)
{
int ret = __down_write_trylock(sem);
if (ret == 1) {
rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
rwsem_set_owner(sem);
}
return ret;
}
EXPORT_SYMBOL(down_write_trylock);
/*
* release a read lock
*/
void up_read(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
__up_read(sem);
}
EXPORT_SYMBOL(up_read);
/*
* release a write lock
*/
void up_write(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
rwsem_clear_owner(sem);
__up_write(sem);
}
EXPORT_SYMBOL(up_write);
/*
* downgrade write lock to read lock
*/
void downgrade_write(struct rw_semaphore *sem)
{
/*
* lockdep: a downgraded write will live on as a write
* dependency.
*/
rwsem_clear_owner(sem);
__downgrade_write(sem);
}
EXPORT_SYMBOL(downgrade_write);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void down_read_nested(struct rw_semaphore *sem, int subclass)
{
might_sleep();
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
}
EXPORT_SYMBOL(down_read_nested);
void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
{
might_sleep();
rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
rwsem_set_owner(sem);
}
EXPORT_SYMBOL(_down_write_nest_lock);
void down_read_non_owner(struct rw_semaphore *sem)
{
might_sleep();
__down_read(sem);
}
EXPORT_SYMBOL(down_read_non_owner);
void down_write_nested(struct rw_semaphore *sem, int subclass)
{
might_sleep();
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
rwsem_set_owner(sem);
}
EXPORT_SYMBOL(down_write_nested);
void up_read_non_owner(struct rw_semaphore *sem)
{
__up_read(sem);
}
EXPORT_SYMBOL(up_read_non_owner);
#endif
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/rwsem.h
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
static inline void rwsem_set_owner(struct rw_semaphore *sem)
{
sem->owner = current;
}
static inline void rwsem_clear_owner(struct rw_semaphore *sem)
{
sem->owner = NULL;
}
#else
static inline void rwsem_set_owner(struct rw_semaphore *sem)
{
}
static inline void rwsem_clear_owner(struct rw_semaphore *sem)
{
}
#endif
C:\Users\Admin\Desktop\linux-4.2.y-new\linux-4.2.y\kernel\/locking/semaphore.c
/*
* Copyright (c) 2008 Intel Corporation
* Author: Matthew Wilcox <willy@linux.intel.com>
*
* Distributed under the terms of the GNU GPL, version 2
*
* This file implements counting semaphores.
* A counting semaphore may be acquired 'n' times before sleeping.
* See mutex.c for single-acquisition sleeping locks which enforce
* rules which allow code to be debugged more easily.
*/
/*
* Some notes on the implementation:
*
* The spinlock controls access to the other members of the semaphore.
* down_trylock() and up() can be called from interrupt context, so we
* have to disable interrupts when taking the lock. It turns out various
* parts of the kernel expect to be able to use down() on a semaphore in
* interrupt context when they know it will succeed, so we have to use
* irqsave variants for down(), down_interruptible() and down_killable()
* too.
*
* The ->count variable represents how many more tasks can acquire this
* semaphore. If it's zero, there may be tasks waiting on the wait_list.
*/
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/spinlock.h>
#include <linux/ftrace.h>
static noinline void __down(struct semaphore *sem);
static noinline int __down_interruptible(struct semaphore *sem);
static noinline int __down_killable(struct semaphore *sem);
static noinline int __down_timeout(struct semaphore *sem, long timeout);
static noinline void __up(struct semaphore *sem);
/**
* down - acquire the semaphore
* @sem: the semaphore to be acquired
*
* Acquires the semaphore. If no more tasks are allowed to acquire the
* semaphore, calling this function will put the task to sleep until the
* semaphore is released.
*
* Use of this function is deprecated, please use down_interruptible() or
* down_killable() instead.
*/
void down(struct semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
else
__down(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
}
EXPORT_SYMBOL(down);
/**
* down_interruptible - acquire the semaphore unless interrupted
* @sem: the semaphore to be acquired
*
* Attempts to acquire the semaphore. If no more tasks are allowed to
* acquire the semaphore, calling this function will put the task to sleep.
* If the sleep is interrupted by a signal, this function will return -EINTR.
* If the semaphore is successfully acquired, this function returns 0.
*/
int down_interruptible(struct semaphore *sem)
{
unsigned long flags;
int result = 0;
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
else
result = __down_interruptible(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
return result;
}
EXPORT_SYMBOL(down_interruptible);
/**
* down_killable - acquire the semaphore unless killed
* @sem: the semaphore to be acquired
*
* Attempts to acquire the semaphore. If no more tasks are allowed to
* acquire the semaphore, calling this function will put the task to sleep.
* If the sleep is interrupted by a fatal signal, this function will return
* -EINTR. If the semaphore is successfully acquired, this function returns
* 0.
*/
int down_killable(struct semaphore *sem)
{
unsigned long flags;
int result = 0;
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
else
result = __down_killable(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
return result;
}
EXPORT_SYMBOL(down_killable);
kkkkkk16
最新推荐文章于 2025-07-31 01:30:49 发布