diff -dNur a/arch/ppc/config.in b/arch/ppc/config.in --- a/arch/ppc/config.in +++ b/arch/ppc/config.in @@ -217,6 +217,7 @@ bool 'Preemptible kernel support' CONFIG_PREEMPT bool 'Low latency scheduling' CONFIG_LOLAT +bool 'Softnet throttle support' CONFIG_SOFTNET_THROTTLE dep_bool 'Control low latency with sysctl' CONFIG_LOLAT_SYSCTL $CONFIG_LOLAT if [ "$CONFIG_6xx" = "y" -a "$CONFIG_8260" = "n" ];then diff -dNur a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c --- a/drivers/net/ibm_emac/ibm_emac_core.c +++ b/drivers/net/ibm_emac/ibm_emac_core.c @@ -126,7 +126,7 @@ #define PHY_POLL_LINK_ON HZ #define PHY_POLL_LINK_OFF (HZ / 5) -#define TX_CLEAN_INTERVAL HZ +#define TX_CLEAN_INTERVAL HZ /* Please, keep in sync with struct ibm_emac_stats/ibm_emac_error_stats */ static const char emac_stats_keys[EMAC_ETHTOOL_STATS_COUNT][ETH_GSTRING_LEN] = { @@ -1308,6 +1308,8 @@ { struct ocp_enet_private *dev = param; int slot = dev->rx_slot, received = 0; + unsigned long time_x = jiffies + 2; + int timeout = 0; DBG2("%d: poll_rx(%d)" NL, dev->def->index, budget); @@ -1320,6 +1322,11 @@ if (ctrl & MAL_RX_CTRL_EMPTY) break; + if (unlikely(time_after_eq(jiffies, time_x))) { + timeout = 1; + break; + } + skb = dev->rx_skb[slot]; barrier(); len = dev->rx_desc[slot].data_len; @@ -1406,7 +1413,7 @@ dev->rx_slot = slot; } - if (unlikely(budget && dev->commac.rx_stopped)) { + if (unlikely(budget && dev->commac.rx_stopped && !timeout)) { struct ocp_func_emac_data *emacdata = dev->def->additions; barrier(); diff -dNur a/drivers/net/ibm_emac/ibm_emac_mal.c b/drivers/net/ibm_emac/ibm_emac_mal.c --- a/drivers/net/ibm_emac/ibm_emac_mal.c +++ b/drivers/net/ibm_emac/ibm_emac_mal.c @@ -273,6 +273,7 @@ { struct ibm_ocp_mal *mal = ndev->priv; struct list_head *l; + unsigned long time_x = jiffies + 2; int rx_work_limit = min(ndev->quota, *budget), received = 0, done; MAL_DBG2("%d: poll(%d) %d ->" NL, mal->def->index, *budget, @@ -296,7 +297,7 @@ if (n) { received += n; rx_work_limit -= n; - if (rx_work_limit <= 0) { + if (rx_work_limit <= 0 || time_after_eq(jiffies, time_x)) { done = 0; goto more_work; // XXX What if this is the last one ? } @@ -323,7 +324,7 @@ MAL_DBG2("%d: already in poll list" NL, mal->def->index); - if (rx_work_limit > 0) + if (rx_work_limit > 0 && time_before(jiffies, time_x)) goto again; else goto more_work; diff -dNur a/include/asm-ppc/hardirq.h b/include/asm-ppc/hardirq.h --- a/include/asm-ppc/hardirq.h +++ b/include/asm-ppc/hardirq.h @@ -17,6 +17,7 @@ unsigned int __local_bh_count; unsigned int __syscall_count; struct task_struct * __ksoftirqd_task; + unsigned int __ksoftirqd_context; unsigned int __last_jiffy_stamp; unsigned int __heartbeat_count; unsigned int __heartbeat_reset; diff -dNur a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h --- a/include/linux/irq_cpustat.h +++ b/include/linux/irq_cpustat.h @@ -31,6 +31,7 @@ #define local_bh_count(cpu) __IRQ_STAT((cpu), __local_bh_count) #define syscall_count(cpu) __IRQ_STAT((cpu), __syscall_count) #define ksoftirqd_task(cpu) __IRQ_STAT((cpu), __ksoftirqd_task) +#define ksoftirqd_context(cpu) __IRQ_STAT((cpu), __ksoftirqd_context) /* arch dependent irq_stat fields */ #define nmi_count(cpu) __IRQ_STAT((cpu), __nmi_count) /* i386, ia64 */ diff -dNur a/include/linux/netdevice.h b/include/linux/netdevice.h --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -535,11 +535,25 @@ struct sk_buff *completion_queue; struct net_device blog_dev; /* Sorry. 8) */ + +#if defined(CONFIG_SOFTNET_THROTTLE) + unsigned long softnet_time; +#endif } ____cacheline_aligned; extern struct softnet_data softnet_data[NR_CPUS]; +#if defined(CONFIG_SOFTNET_THROTTLE) +#include +extern void softnet_throttle_tick(void); +extern int softnet_throttle_ratio; +extern int softnet_proc_doratio(ctl_table *, int, struct file *, + void *, size_t *); +#else +static inline void softnet_throttle_tick(void) { } +#endif + #define HAVE_NETIF_QUEUE static inline void __netif_schedule(struct net_device *dev) diff -dNur a/include/linux/sysctl.h b/include/linux/sysctl.h --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -225,6 +225,7 @@ NET_CORE_MOD_CONG=16, NET_CORE_DEV_WEIGHT=17, NET_CORE_SOMAXCONN=18, + NET_CORE_SOFTNET_THROTTLE_RATIO=19, }; /* /proc/sys/net/ethernet */ diff -dNur a/kernel/softirq.c b/kernel/softirq.c --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -58,16 +58,13 @@ wake_up_process(tsk); } -asmlinkage void do_softirq() +static void ___do_softirq(unsigned int ksoftirqd_ctx) { int cpu; __u32 pending; unsigned long flags; __u32 mask; - if (in_interrupt()) - return; - local_irq_save(flags); cpu = smp_processor_id(); @@ -79,6 +76,7 @@ mask = ~pending; local_bh_disable(); + ksoftirqd_context(cpu) = ksoftirqd_ctx; restart: /* Reset the pending bitmask before enabling irqs */ softirq_pending(cpu) = 0; @@ -111,6 +109,14 @@ local_irq_restore(flags); } +asmlinkage void do_softirq() +{ + if (in_interrupt()) + return; + + ___do_softirq(0); +} + /* * This function must run with irq disabled! */ @@ -392,7 +398,7 @@ __set_current_state(TASK_RUNNING); while (softirq_pending(cpu)) { - do_softirq(); + ___do_softirq(1); if (current->need_resched) schedule(); } diff -dNur a/kernel/timer.c b/kernel/timer.c --- a/kernel/timer.c +++ b/kernel/timer.c @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -683,6 +684,8 @@ xuptime.tv_usec -= 1000000; } + softnet_throttle_tick(); + #ifndef CONFIG_SMP /* SMP process accounting uses the local APIC timer */ diff -dNur a/net/core/dev.c b/net/core/dev.c --- a/net/core/dev.c +++ b/net/core/dev.c @@ -108,6 +108,9 @@ extern int plip_init(void); #endif +#if defined(CONFIG_SOFTNET_THROTTLE) +#include +#endif /* This define, if set, will randomly drop a packet when congestion * is more than moderate. It helps fairness in the multi-interface @@ -1618,16 +1621,82 @@ return 0; } +#if defined(CONFIG_SOFTNET_THROTTLE) && defined(CONFIG_PPC) +#define THROTTLE_SLICE 20 + +int softnet_throttle_ratio = 20; +static int softnet_slice = THROTTLE_SLICE; +static unsigned long max_softnet_time; + +static void softnet_throttle_init(void) +{ + max_softnet_time = + ((THROTTLE_SLICE * tb_ticks_per_jiffy ) / 100) * softnet_throttle_ratio; +} + +#ifdef CONFIG_SYSCTL +int softnet_proc_doratio(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + int res = proc_dointvec(table, write, filp, buffer, lenp); + if (write && !res) { + if (softnet_throttle_ratio < 1) + softnet_throttle_ratio = 1; + else if (softnet_throttle_ratio > 100) + softnet_throttle_ratio = 100; + + softnet_throttle_init(); + } + + return res; +} +#endif + +void softnet_throttle_tick(void) +{ + if (!--softnet_slice) { + int i; + for (i = 0; i < NR_CPUS; i++) + softnet_data[i].softnet_time = 0; + softnet_slice = THROTTLE_SLICE; + } +} + +static inline int softnet_need_throttle(int cpu) +{ + return !ksoftirqd_context(cpu) && + softnet_data[cpu].softnet_time >= max_softnet_time; +} + +#define SOFTNET_THROTTLE_DECLARE \ + unsigned long start_tbl = get_tbl() + +#define SOFTNET_THROTTLE_INC(cpu) \ + softnet_data[cpu].softnet_time += get_tbl() - start_tbl + +#else +static inline void softnet_throttle_init(void) {} +static inline int softnet_need_throttle(int cpu) { return 0; } + +#define SOFTNET_THROTTLE_DECLARE ((void)0) +#define SOFTNET_THROTTLE_INC(cpu) ((void)0) + +#endif + static void net_rx_action(struct softirq_action *h) { int this_cpu = smp_processor_id(); struct softnet_data *queue = &softnet_data[this_cpu]; unsigned long start_time = jiffies; int budget = netdev_max_backlog; + SOFTNET_THROTTLE_DECLARE; br_read_lock(BR_NETPROTO_LOCK); local_irq_disable(); - + + if (softnet_need_throttle(this_cpu)) + goto softnet_throttle; + while (!list_empty(&queue->poll_list)) { struct net_device *dev; @@ -1651,12 +1720,17 @@ local_irq_disable(); } } + + SOFTNET_THROTTLE_INC(this_cpu); local_irq_enable(); br_read_unlock(BR_NETPROTO_LOCK); return; softnet_break: + SOFTNET_THROTTLE_INC(this_cpu); + +softnet_throttle: netdev_rx_stat[this_cpu].time_squeeze++; __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); @@ -2799,6 +2873,8 @@ atomic_set(&queue->blog_dev.refcnt, 1); } + softnet_throttle_init(); + #ifdef CONFIG_NET_PROFILE net_profile_init(); NET_PROFILE_REGISTER(dev_queue_xmit); diff -dNur a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL @@ -93,6 +94,11 @@ &sysctl_somaxconn, sizeof(int), 0644, NULL, &proc_dointvec }, #endif /* CONFIG_NET */ +#if defined(CONFIG_SOFTNET_THROTTLE) + {NET_CORE_SOFTNET_THROTTLE_RATIO, "softnet_throttle_ratio", + &softnet_throttle_ratio, sizeof(int), 0644, NULL, + &softnet_proc_doratio }, +#endif { 0 } }; #endif