Commit 0f2c3c2b authored by Ingo Molnar's avatar Ingo Molnar Committed by Thomas Gleixner

net: preempt-rt support

Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 586a6377
...@@ -1674,14 +1674,14 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) ...@@ -1674,14 +1674,14 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
static inline void __netif_tx_lock_bh(struct netdev_queue *txq) static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
{ {
spin_lock_bh(&txq->_xmit_lock); spin_lock_bh(&txq->_xmit_lock);
txq->xmit_lock_owner = smp_processor_id(); txq->xmit_lock_owner = raw_smp_processor_id();
} }
static inline int __netif_tx_trylock(struct netdev_queue *txq) static inline int __netif_tx_trylock(struct netdev_queue *txq)
{ {
int ok = spin_trylock(&txq->_xmit_lock); int ok = spin_trylock(&txq->_xmit_lock);
if (likely(ok)) if (likely(ok))
txq->xmit_lock_owner = smp_processor_id(); txq->xmit_lock_owner = raw_smp_processor_id();
return ok; return ok;
} }
...@@ -1715,7 +1715,7 @@ static inline void netif_tx_lock(struct net_device *dev) ...@@ -1715,7 +1715,7 @@ static inline void netif_tx_lock(struct net_device *dev)
int cpu; int cpu;
spin_lock(&dev->tx_global_lock); spin_lock(&dev->tx_global_lock);
cpu = smp_processor_id(); cpu = raw_smp_processor_id();
for (i = 0; i < dev->num_tx_queues; i++) { for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i); struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
...@@ -1779,7 +1779,7 @@ static inline void netif_tx_disable(struct net_device *dev) ...@@ -1779,7 +1779,7 @@ static inline void netif_tx_disable(struct net_device *dev)
int cpu; int cpu;
local_bh_disable(); local_bh_disable();
cpu = smp_processor_id(); cpu = raw_smp_processor_id();
for (i = 0; i < dev->num_tx_queues; i++) { for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i); struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
......
...@@ -1888,9 +1888,16 @@ gso: ...@@ -1888,9 +1888,16 @@ gso:
Either shot noqueue qdisc, it is even simpler 8) Either shot noqueue qdisc, it is even simpler 8)
*/ */
if (dev->flags & IFF_UP) { if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */ int cpu = raw_smp_processor_id(); /* ok because BHs are off */
/*
* No need to check for recursion with threaded interrupts:
*/
#ifdef CONFIG_PREEMPT_RT
if (1) {
#else
if (txq->xmit_lock_owner != cpu) { if (txq->xmit_lock_owner != cpu) {
#endif
HARD_TX_LOCK(dev, txq, cpu); HARD_TX_LOCK(dev, txq, cpu);
...@@ -2008,7 +2015,8 @@ EXPORT_SYMBOL(netif_rx_ni); ...@@ -2008,7 +2015,8 @@ EXPORT_SYMBOL(netif_rx_ni);
static void net_tx_action(struct softirq_action *h) static void net_tx_action(struct softirq_action *h)
{ {
struct softnet_data *sd = &__get_cpu_var(softnet_data); struct softnet_data *sd = &per_cpu(softnet_data,
raw_smp_processor_id());
if (sd->completion_queue) { if (sd->completion_queue) {
struct sk_buff *clist; struct sk_buff *clist;
...@@ -2024,6 +2032,11 @@ static void net_tx_action(struct softirq_action *h) ...@@ -2024,6 +2032,11 @@ static void net_tx_action(struct softirq_action *h)
WARN_ON(atomic_read(&skb->users)); WARN_ON(atomic_read(&skb->users));
__kfree_skb(skb); __kfree_skb(skb);
/*
* Safe to reschedule - the list is private
* at this point.
*/
cond_resched_softirq_context();
} }
} }
...@@ -2042,6 +2055,22 @@ static void net_tx_action(struct softirq_action *h) ...@@ -2042,6 +2055,22 @@ static void net_tx_action(struct softirq_action *h)
head = head->next_sched; head = head->next_sched;
root_lock = qdisc_lock(q); root_lock = qdisc_lock(q);
/*
* We are executing in softirq context here, and
* if softirqs are preemptible, we must avoid
* infinite reactivation of the softirq by
* either the tx handler, or by netif_schedule().
* (it would result in an infinitely looping
* softirq context)
* So we take the spinlock unconditionally.
*/
#ifdef CONFIG_PREEMPT_SOFTIRQS
spin_lock(root_lock);
smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED, &q->state);
qdisc_run(q);
spin_unlock(root_lock);
#else
if (spin_trylock(root_lock)) { if (spin_trylock(root_lock)) {
smp_mb__before_clear_bit(); smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED, clear_bit(__QDISC_STATE_SCHED,
...@@ -2058,6 +2087,7 @@ static void net_tx_action(struct softirq_action *h) ...@@ -2058,6 +2087,7 @@ static void net_tx_action(struct softirq_action *h)
&q->state); &q->state);
} }
} }
#endif
} }
} }
} }
...@@ -2270,7 +2300,7 @@ int netif_receive_skb(struct sk_buff *skb) ...@@ -2270,7 +2300,7 @@ int netif_receive_skb(struct sk_buff *skb)
skb->dev = orig_dev->master; skb->dev = orig_dev->master;
} }
__get_cpu_var(netdev_rx_stat).total++; per_cpu(netdev_rx_stat, raw_smp_processor_id()).total++;
skb_reset_network_header(skb); skb_reset_network_header(skb);
skb_reset_transport_header(skb); skb_reset_transport_header(skb);
...@@ -2660,9 +2690,10 @@ EXPORT_SYMBOL(napi_gro_frags); ...@@ -2660,9 +2690,10 @@ EXPORT_SYMBOL(napi_gro_frags);
static int process_backlog(struct napi_struct *napi, int quota) static int process_backlog(struct napi_struct *napi, int quota)
{ {
int work = 0; int work = 0;
struct softnet_data *queue = &__get_cpu_var(softnet_data); struct softnet_data *queue;
unsigned long start_time = jiffies; unsigned long start_time = jiffies;
queue = &per_cpu(softnet_data, raw_smp_processor_id());
napi->weight = weight_p; napi->weight = weight_p;
do { do {
struct sk_buff *skb; struct sk_buff *skb;
...@@ -2694,7 +2725,7 @@ void __napi_schedule(struct napi_struct *n) ...@@ -2694,7 +2725,7 @@ void __napi_schedule(struct napi_struct *n)
local_irq_save(flags); local_irq_save(flags);
list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ); raise_softirq_irqoff(NET_RX_SOFTIRQ);
local_irq_restore(flags); local_irq_restore(flags);
} }
EXPORT_SYMBOL(__napi_schedule); EXPORT_SYMBOL(__napi_schedule);
......
...@@ -69,20 +69,20 @@ static void queue_process(struct work_struct *work) ...@@ -69,20 +69,20 @@ static void queue_process(struct work_struct *work)
txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
local_irq_save(flags); local_irq_save_nort(flags);
__netif_tx_lock(txq, smp_processor_id()); __netif_tx_lock(txq, smp_processor_id());
if (netif_tx_queue_stopped(txq) || if (netif_tx_queue_stopped(txq) ||
netif_tx_queue_frozen(txq) || netif_tx_queue_frozen(txq) ||
ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb); skb_queue_head(&npinfo->txq, skb);
__netif_tx_unlock(txq); __netif_tx_unlock(txq);
local_irq_restore(flags); local_irq_restore_nort(flags);
schedule_delayed_work(&npinfo->tx_work, HZ/10); schedule_delayed_work(&npinfo->tx_work, HZ/10);
return; return;
} }
__netif_tx_unlock(txq); __netif_tx_unlock(txq);
local_irq_restore(flags); local_irq_restore_nort(flags);
} }
} }
...@@ -153,7 +153,7 @@ static void poll_napi(struct net_device *dev) ...@@ -153,7 +153,7 @@ static void poll_napi(struct net_device *dev)
int budget = 16; int budget = 16;
list_for_each_entry(napi, &dev->napi_list, dev_list) { list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner != smp_processor_id() && if (napi->poll_owner != raw_smp_processor_id() &&
spin_trylock(&napi->poll_lock)) { spin_trylock(&napi->poll_lock)) {
budget = poll_one_napi(dev->npinfo, napi, budget); budget = poll_one_napi(dev->npinfo, napi, budget);
spin_unlock(&napi->poll_lock); spin_unlock(&napi->poll_lock);
...@@ -214,30 +214,35 @@ static void refill_skbs(void) ...@@ -214,30 +214,35 @@ static void refill_skbs(void)
static void zap_completion_queue(void) static void zap_completion_queue(void)
{ {
unsigned long flags;
struct softnet_data *sd = &get_cpu_var(softnet_data); struct softnet_data *sd = &get_cpu_var(softnet_data);
struct sk_buff *clist = NULL;
unsigned long flags;
if (sd->completion_queue) { if (sd->completion_queue) {
struct sk_buff *clist;
local_irq_save(flags); local_irq_save(flags);
clist = sd->completion_queue; clist = sd->completion_queue;
sd->completion_queue = NULL; sd->completion_queue = NULL;
local_irq_restore(flags); local_irq_restore(flags);
while (clist != NULL) {
struct sk_buff *skb = clist;
clist = clist->next;
if (skb->destructor) {
atomic_inc(&skb->users);
dev_kfree_skb_any(skb); /* put this one back */
} else {
__kfree_skb(skb);
}
}
} }
/*
* Took the list private, can drop our softnet
* reference:
*/
put_cpu_var(softnet_data); put_cpu_var(softnet_data);
while (clist != NULL) {
struct sk_buff *skb = clist;
clist = clist->next;
if (skb->destructor) {
atomic_inc(&skb->users);
dev_kfree_skb_any(skb); /* put this one back */
} else {
__kfree_skb(skb);
}
}
} }
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
...@@ -245,13 +250,26 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) ...@@ -245,13 +250,26 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
int count = 0; int count = 0;
struct sk_buff *skb; struct sk_buff *skb;
#ifdef CONFIG_PREEMPT_RT
/*
* On -rt skb_pool.lock is schedulable, so if we are
* in an atomic context we just try to dequeue from the
* pool and fail if we cannot get one.
*/
if (in_atomic() || irqs_disabled())
goto pick_atomic;
#endif
zap_completion_queue(); zap_completion_queue();
refill_skbs(); refill_skbs();
repeat: repeat:
skb = alloc_skb(len, GFP_ATOMIC); skb = alloc_skb(len, GFP_ATOMIC);
if (!skb) if (!skb) {
#ifdef CONFIG_PREEMPT_RT
pick_atomic:
#endif
skb = skb_dequeue(&skb_pool); skb = skb_dequeue(&skb_pool);
}
if (!skb) { if (!skb) {
if (++count < 10) { if (++count < 10) {
...@@ -271,7 +289,7 @@ static int netpoll_owner_active(struct net_device *dev) ...@@ -271,7 +289,7 @@ static int netpoll_owner_active(struct net_device *dev)
struct napi_struct *napi; struct napi_struct *napi;
list_for_each_entry(napi, &dev->napi_list, dev_list) { list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner == smp_processor_id()) if (napi->poll_owner == raw_smp_processor_id())
return 1; return 1;
} }
return 0; return 0;
...@@ -297,7 +315,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) ...@@ -297,7 +315,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
local_irq_save(flags); local_irq_save_nort(flags);
/* try until next clock tick */ /* try until next clock tick */
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) { tries > 0; --tries) {
...@@ -319,7 +337,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) ...@@ -319,7 +337,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
udelay(USEC_PER_POLL); udelay(USEC_PER_POLL);
} }
local_irq_restore(flags); local_irq_restore_nort(flags);
} }
if (status != NETDEV_TX_OK) { if (status != NETDEV_TX_OK) {
......
...@@ -201,7 +201,10 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; ...@@ -201,7 +201,10 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
*/ */
static struct sock *icmp_sk(struct net *net) static struct sock *icmp_sk(struct net *net)
{ {
return net->ipv4.icmp_sk[smp_processor_id()]; /*
* Should be safe on PREEMPT_SOFTIRQS/HARDIRQS to use raw-smp-processor-id:
*/
return net->ipv4.icmp_sk[raw_smp_processor_id()];
} }
static inline struct sock *icmp_xmit_lock(struct net *net) static inline struct sock *icmp_xmit_lock(struct net *net)
......
...@@ -204,13 +204,13 @@ struct rt_hash_bucket { ...@@ -204,13 +204,13 @@ struct rt_hash_bucket {
}; };
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
defined(CONFIG_PROVE_LOCKING) defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_PREEMPT_RT)
/* /*
* Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
* The size of this table is a power of two and depends on the number of CPUS. * The size of this table is a power of two and depends on the number of CPUS.
* (on lockdep we have a quite big spinlock_t, so keep the size down there) * (on lockdep we have a quite big spinlock_t, so keep the size down there)
*/ */
#ifdef CONFIG_LOCKDEP #if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
# define RT_HASH_LOCK_SZ 256 # define RT_HASH_LOCK_SZ 256
#else #else
# if NR_CPUS >= 32 # if NR_CPUS >= 32
......
...@@ -375,7 +375,7 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -375,7 +375,7 @@ ip6t_do_table(struct sk_buff *skb,
xt_info_rdlock_bh(); xt_info_rdlock_bh();
private = table->private; private = table->private;
table_base = private->entries[smp_processor_id()]; table_base = private->entries[raw_smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]); e = get_entry(table_base, private->hook_entry[hook]);
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
*/ */
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/kallsyms.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/kernel.h> #include <linux/kernel.h>
...@@ -24,6 +25,7 @@ ...@@ -24,6 +25,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/delay.h>
#include <net/pkt_sched.h> #include <net/pkt_sched.h>
/* Main transmission queue. */ /* Main transmission queue. */
...@@ -78,7 +80,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, ...@@ -78,7 +80,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
{ {
int ret; int ret;
if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { if (unlikely(dev_queue->xmit_lock_owner == raw_smp_processor_id())) {
/* /*
* Same CPU holding the lock. It may be a transient * Same CPU holding the lock. It may be a transient
* configuration error, when hard_start_xmit() recurses. We * configuration error, when hard_start_xmit() recurses. We
...@@ -141,7 +143,7 @@ static inline int qdisc_restart(struct Qdisc *q) ...@@ -141,7 +143,7 @@ static inline int qdisc_restart(struct Qdisc *q)
dev = qdisc_dev(q); dev = qdisc_dev(q);
txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
HARD_TX_LOCK(dev, txq, smp_processor_id()); HARD_TX_LOCK(dev, txq, raw_smp_processor_id());
if (!netif_tx_queue_stopped(txq) && if (!netif_tx_queue_stopped(txq) &&
!netif_tx_queue_frozen(txq)) !netif_tx_queue_frozen(txq))
ret = dev_hard_start_xmit(skb, dev, txq); ret = dev_hard_start_xmit(skb, dev, txq);
...@@ -713,9 +715,12 @@ void dev_deactivate(struct net_device *dev) ...@@ -713,9 +715,12 @@ void dev_deactivate(struct net_device *dev)
/* Wait for outstanding qdisc-less dev_queue_xmit calls. */ /* Wait for outstanding qdisc-less dev_queue_xmit calls. */
synchronize_rcu(); synchronize_rcu();
/* Wait for outstanding qdisc_run calls. */ /*
* Wait for outstanding qdisc_run calls.
* TODO: shouldnt this be wakeup-based, instead of polling it?
*/
while (some_qdisc_is_busy(dev)) while (some_qdisc_is_busy(dev))
yield(); msleep(1);
} }
static void dev_init_scheduler_queue(struct net_device *dev, static void dev_init_scheduler_queue(struct net_device *dev,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment