diff options
50 files changed, 675 insertions, 101 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ca5cdcd0f0e..35ff17979b3 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -22,6 +22,13 @@ ip_no_pmtu_disc - BOOLEAN min_pmtu - INTEGER default 562 - minimum discovered Path MTU +fwmark_reflect - BOOLEAN + Controls the fwmark of kernel-generated IPv4 reply packets that are not + associated with a socket for example, TCP RSTs or ICMP echo replies). + If unset, these packets have a fwmark of zero. If set, they have the + fwmark of the packet they are replying to. + Default: 0 + route/max_size - INTEGER Maximum number of routes allowed in the kernel. Increase this when using large numbers of interfaces and/or routes. @@ -1036,6 +1043,13 @@ conf/all/forwarding - BOOLEAN proxy_ndp - BOOLEAN Do proxy ndp. +fwmark_reflect - BOOLEAN + Controls the fwmark of kernel-generated IPv6 reply packets that are not + associated with a socket for example, TCP RSTs or ICMPv6 echo replies). + If unset, these packets have a fwmark of zero. If set, they have the + fwmark of the packet they are replying to. + Default: 0 + conf/interface/*: Change special settings per interface. diff --git a/arch/arm/mach-tegra/pm-irq.c b/arch/arm/mach-tegra/pm-irq.c index 57d21361ca1..a6ca3380311 100644 --- a/arch/arm/mach-tegra/pm-irq.c +++ b/arch/arm/mach-tegra/pm-irq.c @@ -25,6 +25,7 @@ #include <linux/moduleparam.h> #include <linux/seq_file.h> #include <linux/syscore_ops.h> +#include <linux/wakeup_reason.h> #include <mach/iomap.h> @@ -217,7 +218,7 @@ static void tegra_pm_irq_syscore_resume_helper( (wake + 32 * index)); continue; } - + log_wakeup_reason(irq); desc = irq_to_desc(irq); if (!desc || !desc->action || !desc->action->name) { pr_info("Resume caused by WAKE%d, irq %d\n", diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 02ee9adff54..eac388e2aaf 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -16,6 +16,8 @@ #include <sys/ptrace.h> #include <sys/stat.h> #include <sys/wait.h> +#include <sys/time.h> +#include <sys/resource.h> #include <asm/unistd.h> #include "init.h" #include "kern_constants.h" diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index 9f46c7765b0..b16d0bb5031 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -4753,6 +4753,9 @@ static s32 wl_setup_wiphy(struct wireless_dev *wdev, struct device *sdiofunc_dev #endif /* AP_SME flag can be advertised to remove patch from wpa_supplicant */ wdev->wiphy->flags |= WIPHY_FLAG_HAVE_AP_SME; +#if defined(CONFIG_PM) + wdev->wiphy->wowlan.flags = WIPHY_WOWLAN_ANY; +#endif WL_DBG(("Registering custom regulatory)\n")); wdev->wiphy->flags |= WIPHY_FLAG_CUSTOM_REGULATORY; wiphy_apply_custom_regulatory(wdev->wiphy, &brcm_regdom); diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 51da65b68b8..9dcdb6251cb 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -49,6 +49,8 @@ enum { FRA_TABLE, /* Extended table id */ FRA_FWMASK, /* mask for netfilter mark */ FRA_OIFNAME, + FRA_UID_START, /* UID range */ + FRA_UID_END, __FRA_MAX }; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0c997767429..15395001dc5 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -161,6 +161,7 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif + __s32 accept_ra_rt_table; __s32 proxy_ndp; __s32 accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -213,6 +214,7 @@ enum { DEVCONF_DISABLE_IPV6, DEVCONF_ACCEPT_DAD, DEVCONF_FORCE_TLLAO, + DEVCONF_ACCEPT_RA_RT_TABLE, DEVCONF_MAX }; diff --git a/include/linux/prctl.h b/include/linux/prctl.h index a3baeb2c216..1c0b14a7a0b 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -102,4 +102,10 @@ #define PR_MCE_KILL_GET 34 +/* Sets the timerslack for arbitrary threads + * arg2 slack value, 0 means "use default" + * arg3 pid of the thread whose timer slack needs to be set + */ +#define PR_SET_TIMERSLACK_PID 41 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8e872ead88b..97a95808012 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -283,6 +283,8 @@ enum rtattr_type_t { RTA_MP_ALGO, /* no longer used */ RTA_TABLE, RTA_MARK, + RTA_MFC_STATS, /* not used - backported from the future */ + RTA_UID, __RTA_MAX }; diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h new file mode 100644 index 00000000000..7ce50f0debc --- /dev/null +++ b/include/linux/wakeup_reason.h @@ -0,0 +1,23 @@ +/* + * include/linux/wakeup_reason.h + * + * Logs the reason which caused the kernel to resume + * from the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_WAKEUP_REASON_H +#define _LINUX_WAKEUP_REASON_H + +void log_wakeup_reason(int irq); + +#endif /* _LINUX_WAKEUP_REASON_H */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index cbc6bb0a683..f48830293d8 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -166,6 +166,8 @@ extern int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr extern int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table); + /* Device notifier */ extern int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 075f1e3a0fe..52e77a366bf 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -23,6 +23,8 @@ struct fib_rule { struct fib_rule __rcu *ctarget; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + uid_t uid_start; + uid_t uid_end; struct rcu_head rcu; struct net * fr_net; }; @@ -79,7 +81,9 @@ struct fib_rules_ops { [FRA_FWMARK] = { .type = NLA_U32 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 } + [FRA_GOTO] = { .type = NLA_U32 }, \ + [FRA_UID_START] = { .type = NLA_U32 }, \ + [FRA_UID_END] = { .type = NLA_U32 } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 57f15a7f1cd..1beab06a4ce 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -23,6 +23,7 @@ struct flowi_common { #define FLOWI_FLAG_PRECOW_METRICS 0x02 #define FLOWI_FLAG_CAN_SLEEP 0x04 __u32 flowic_secid; + uid_t flowic_uid; }; union flowi_uli { @@ -59,6 +60,7 @@ struct flowi4 { #define flowi4_proto __fl_common.flowic_proto #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid +#define flowi4_uid __fl_common.flowic_uid __be32 daddr; __be32 saddr; union flowi_uli uli; @@ -75,7 +77,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be32 sport) + __be16 dport, __be32 sport, + uid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = 0; @@ -85,6 +88,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_proto = proto; fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; @@ -102,6 +106,7 @@ struct flowi6 { #define flowi6_proto __fl_common.flowic_proto #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; __be32 flowlabel; @@ -145,6 +150,7 @@ struct flowi { #define flowi_proto u.__fl_common.flowic_proto #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b897d6e6d0a..da4d79faee3 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -87,6 +87,7 @@ struct inet_request_sock { no_srccheck: 1; kmemcheck_bitfield_end(flags); struct ip_options_rcu *opt; + u32 ir_mark; }; static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) @@ -94,6 +95,15 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } +static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb) +{ + if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) { + return skb->mark; + } else { + return sk->sk_mark; + } +} + struct inet_cork { unsigned int flags; __be32 addr; diff --git a/include/net/ip.h b/include/net/ip.h index aa76c7a4d9c..d7c988fe292 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -165,6 +165,7 @@ struct ip_reply_arg { int csumoffset; /* u16 offset of csum in iov[0].iov_base */ /* -1 if not needed */ int bound_dev_if; + uid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 @@ -236,6 +237,9 @@ extern void ipfrag_init(void); extern void ip_static_sysctl_init(void); +#define IP4_REPLY_MARK(net, mark) \ + ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + static inline bool ip_is_fragment(const struct iphdr *iph) { return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 0bca86fc77d..d32527d2715 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -109,6 +109,9 @@ struct frag_hdr { #define IP6_MF 0x0001 +#define IP6_REPLY_MARK(net, mark) \ + ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) + #include <net/sock.h> /* sysctls */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d786b4fc02a..76ebd40d524 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -55,6 +55,8 @@ struct netns_ipv4 { int current_rt_cache_rebuild_count; unsigned int sysctl_ping_group_range[2]; + int sysctl_fwmark_reflect; + int sysctl_tcp_fwmark_accept; atomic_t rt_genid; atomic_t dev_addr_genid; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 81abfcb2eb4..20b76abcb15 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -25,6 +25,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_mtu_expires; int ip6_rt_min_advmss; int icmpv6_time; + int fwmark_reflect; }; struct netns_ipv6 { diff --git a/include/net/route.h b/include/net/route.h index db7b3432f07..5e9519ed80e 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -60,6 +60,7 @@ struct rtable { int rt_iif; int rt_oif; __u32 rt_mark; + uid_t rt_uid; /* Info on neighbour */ __be32 rt_gateway; @@ -146,7 +147,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sk ? sock_i_uid(sk) : 0); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -250,7 +251,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_CAN_SLEEP; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sock_i_uid(sk)); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 54a36fe288f..0e4298fc19a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -272,7 +272,7 @@ static void check_for_release(struct cgroup *cgrp); /* * A queue for waiters to do rmdir() cgroup. A tasks will sleep when - * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some + * list_empty(&cgroup->children) && subsys has some * reference to css->refcnt. In general, this refcnt is expected to goes down * to zero, soon. * @@ -3935,6 +3935,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) struct cgroup_subsys *ss; unsigned long flags; bool failed = false; + + if (atomic_read(&cgrp->count) != 0) + return false; + local_irq_save(flags); for_each_subsys(cgrp->root, ss) { struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; @@ -3977,19 +3981,23 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) return !failed; } -/* checks if all of the css_sets attached to a cgroup have a refcount of 0. - * Must be called with css_set_lock held */ +/* Checks if all of the css_sets attached to a cgroup have a refcount of 0. */ static int cgroup_css_sets_empty(struct cgroup *cgrp) { struct cg_cgroup_link *link; + int retval = 1; + read_lock(&css_set_lock); list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { struct css_set *cg = link->cg; - if (atomic_read(&cg->refcount) > 0) - return 0; + if (atomic_read(&cg->refcount) > 0) { + retval = 0; + break; + } } + read_unlock(&css_set_lock); - return 1; + return retval; } static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 9b224e16b19..a6ef0bed68d 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -16,3 +16,5 @@ obj-$(CONFIG_FB_EARLYSUSPEND) += fbearlysuspend.o obj-$(CONFIG_SUSPEND_TIME) += suspend_time.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o + +obj-$(CONFIG_SUSPEND) += wakeup_reason.o diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c new file mode 100644 index 00000000000..9823d9ccde4 --- /dev/null +++ b/kernel/power/wakeup_reason.c @@ -0,0 +1,132 @@ +/* + * kernel/power/wakeup_reason.c + * + * Logs the reasons which caused the kernel to resume from + * the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/wakeup_reason.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/notifier.h> +#include <linux/suspend.h> + + +#define MAX_WAKEUP_REASON_IRQS 32 +static int irq_list[MAX_WAKEUP_REASON_IRQS]; +static int irq_count; +static struct kobject *wakeup_reason; +static spinlock_t resume_reason_lock; + +static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + int irq_no, buf_offset = 0; + struct irq_desc *desc; + spin_lock(&resume_reason_lock); + for (irq_no = 0; irq_no < irq_count; irq_no++) { + desc = irq_to_desc(irq_list[irq_no]); + if (desc && desc->action && desc->action->name) + buf_offset += sprintf(buf + buf_offset, "%d %s\n", + irq_list[irq_no], desc->action->name); + else + buf_offset += sprintf(buf + buf_offset, "%d\n", + irq_list[irq_no]); + } + spin_unlock(&resume_reason_lock); + return buf_offset; +} + +static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason); + +static struct attribute *attrs[] = { + &resume_reason.attr, + NULL, +}; +static struct attribute_group attr_group = { + .attrs = attrs, +}; + +/* + * logs all the wake up reasons to the kernel + * stores the irqs to expose them to the userspace via sysfs + */ +void log_wakeup_reason(int irq) +{ + struct irq_desc *desc; + desc = irq_to_desc(irq); + if (desc && desc->action && desc->action->name) + printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq, + desc->action->name); + else + printk(KERN_INFO "Resume caused by IRQ %d\n", irq); + + spin_lock(&resume_reason_lock); + irq_list[irq_count++] = irq; + spin_unlock(&resume_reason_lock); +} + +/* Detects a suspend and clears all the previous wake up reasons*/ +static int wakeup_reason_pm_event(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + switch (pm_event) { + case PM_SUSPEND_PREPARE: + spin_lock(&resume_reason_lock); + irq_count = 0; + spin_unlock(&resume_reason_lock); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block wakeup_reason_pm_notifier_block = { + .notifier_call = wakeup_reason_pm_event, +}; + +/* Initializes the sysfs parameter + * registers the pm_event notifier + */ +int __init wakeup_reason_init(void) +{ + int retval; + spin_lock_init(&resume_reason_lock); + retval = register_pm_notifier(&wakeup_reason_pm_notifier_block); + if (retval) + printk(KERN_WARNING "[%s] failed to register PM notifier %d\n", + __func__, retval); + + wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj); + if (!wakeup_reason) { + printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n", + __func__); + return 1; + } + retval = sysfs_create_group(wakeup_reason, &attr_group); + if (retval) { + kobject_put(wakeup_reason); + printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n", + __func__, retval); + } + return 0; +} + +late_initcall(wakeup_reason_init); diff --git a/kernel/sys.c b/kernel/sys.c index f24794e7652..35cda542e8c 100755 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -39,6 +39,7 @@ #include <linux/syscore_ops.h> #include <linux/version.h> #include <linux/ctype.h> +#include <linux/sched.h> #include <linux/compat.h> #include <linux/syscalls.h> @@ -1710,6 +1711,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { struct task_struct *me = current; + struct task_struct *tsk; unsigned char comm[sizeof(me->comm)]; long error; @@ -1854,6 +1856,26 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, else error = PR_MCE_KILL_DEFAULT; break; + case PR_SET_TIMERSLACK_PID: + if (current->pid != (pid_t)arg3 && + !capable(CAP_SYS_NICE)) + return -EPERM; + rcu_read_lock(); + tsk = find_task_by_pid_ns((pid_t)arg3, &init_pid_ns); + if (tsk == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + get_task_struct(tsk); + rcu_read_unlock(); + if (arg2 <= 0) + tsk->timer_slack_ns = + tsk->default_timer_slack_ns; + else + tsk->timer_slack_ns = arg2; + put_task_struct(tsk); + error = 0; + break; default: error = -EINVAL; break; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 27071ee2a4e..c9f46b01e7f 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -16,6 +16,12 @@ #include <net/sock.h> #include <net/fib_rules.h> +#define INVALID_UID ((uid_t) -1) +#define uid_valid(uid) ((uid) != -1) +#define uid_lte(a, b) ((a) <= (b)) +#define uid_eq(a, b) ((a) == (b)) +#define uid_gte(a, b) ((a) >= (b)) + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -30,6 +36,8 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; + r->uid_start = INVALID_UID; + r->uid_end = INVALID_UID; r->fr_net = hold_net(ops->fro_net); /* The lock is not required here, the list in unreacheable @@ -176,6 +184,23 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); +static inline uid_t fib_nl_uid(struct nlattr *nla) +{ + return nla_get_u32(nla); +} + +static int nla_put_uid(struct sk_buff *skb, int idx, uid_t uid) +{ + return nla_put_u32(skb, idx, uid); +} + +static int fib_uid_range_match(struct flowi *fl, struct fib_rule *rule) +{ + return (!uid_valid(rule->uid_start) && !uid_valid(rule->uid_end)) || + (uid_gte(fl->flowi_uid, rule->uid_start) && + uid_lte(fl->flowi_uid, rule->uid_end)); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags) { @@ -190,6 +215,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; + if (!fib_uid_range_match(fl, rule)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -360,6 +388,19 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } else if (rule->action == FR_ACT_GOTO) goto errout_free; + /* UID start and end must either both be valid or both unspecified. */ + rule->uid_start = rule->uid_end = INVALID_UID; + if (tb[FRA_UID_START] || tb[FRA_UID_END]) { + if (tb[FRA_UID_START] && tb[FRA_UID_END]) { + rule->uid_start = fib_nl_uid(tb[FRA_UID_START]); + rule->uid_end = fib_nl_uid(tb[FRA_UID_END]); + } + if (!uid_valid(rule->uid_start) || + !uid_valid(rule->uid_end) || + !uid_lte(rule->uid_start, rule->uid_end)) + goto errout_free; + } + err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; @@ -442,7 +483,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (frh->action && (frh->action != rule->action)) continue; - if (frh->table && (frh_get_table(frh, tb) != rule->table)) + if (frh_get_table(frh, tb) && + (frh_get_table(frh, tb) != rule->table)) continue; if (tb[FRA_PRIORITY] && @@ -465,6 +507,14 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) continue; + if (tb[FRA_UID_START] && + !uid_eq(rule->uid_start, fib_nl_uid(tb[FRA_UID_START]))) + continue; + + if (tb[FRA_UID_END] && + !uid_eq(rule->uid_end, fib_nl_uid(tb[FRA_UID_END]))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -519,7 +569,9 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_PRIORITY */ + nla_total_size(4) /* FRA_TABLE */ + nla_total_size(4) /* FRA_FWMARK */ - + nla_total_size(4); /* FRA_FWMASK */ + + nla_total_size(4) /* FRA_FWMASK */ + + nla_total_size(4) /* FRA_UID_START */ + + nla_total_size(4); /* FRA_UID_END */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -577,6 +629,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->target) NLA_PUT_U32(skb, FRA_GOTO, rule->target); + if (uid_valid(rule->uid_start)) + nla_put_uid(skb, FRA_UID_START, rule->uid_start); + + if (uid_valid(rule->uid_end)) + nla_put_uid(skb, FRA_UID_END, rule->uid_end); + if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 92fc5f69f5d..a54817aced3 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -482,6 +482,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_FLOW] = { .type = NLA_U32 }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index cd9a67df0b5..19d18cb46c2 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -334,6 +334,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct sock *sk; struct inet_sock *inet; __be32 daddr; + u32 mark = IP4_REPLY_MARK(net, skb->mark); if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; @@ -346,6 +347,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) icmp_param->data.icmph.checksum = 0; inet->tos = ip_hdr(skb)->tos; + sk->sk_mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; ipc.opt = NULL; ipc.tx_flags = 0; @@ -357,6 +359,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = rt->rt_spec_dst; + fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); @@ -375,7 +378,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, const struct iphdr *iph, - __be32 saddr, u8 tos, + __be32 saddr, u8 tos, u32 mark, int type, int code, struct icmp_bxm *param) { @@ -387,6 +390,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->daddr = (param->replyopts.opt.opt.srr ? param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; + fl4->flowi4_mark = mark; fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; @@ -484,6 +488,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct flowi4 fl4; __be32 saddr; u8 tos; + u32 mark; struct net *net; struct sock *sk; @@ -580,6 +585,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; + mark = IP4_REPLY_MARK(net, skb_in->mark); if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) goto out_unlock; @@ -596,11 +602,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; + sk->sk_mark = mark; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts.opt; ipc.tx_flags = 0; - rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, + rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, type, code, &icmp_param); if (IS_ERR(rt)) goto out_unlock; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c14d88ad348..2b8e7d7df33 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -358,11 +358,12 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct ip_options_rcu *opt = inet_rsk(req)->opt; struct net *net = sock_net(sk); - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -391,11 +392,12 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, struct rtable *rt; fl4 = &newinet->cork.fl.u.ip4; - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -604,6 +606,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; + newsk->sk_mark = inet_rsk(req)->ir_mark; + newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; newicsk->icsk_probes_out = 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8c6563361ab..16ac1635db0 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1487,12 +1487,14 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, daddr = replyopts.opt.opt.faddr; } - flowi4_init_output(&fl4, arg->bound_dev_if, 0, + flowi4_init_output(&fl4, arg->bound_dev_if, + IP4_REPLY_MARK(sock_net(sk), skb->mark), RT_TOS(ip_hdr(skb)->tos), RT_SCOPE_UNIVERSE, sk->sk_protocol, ip_reply_arg_flowi_flags(arg), daddr, rt->rt_spec_dst, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 45bc0dd517f..b79fa527029 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -247,26 +247,33 @@ int ping_init_sock(struct sock *sk) struct net *net = sock_net(sk); gid_t group = current_egid(); gid_t range[2]; - struct group_info *group_info = get_current_groups(); - int i, j, count = group_info->ngroups; + struct group_info *group_info; + int i, j, count; + int ret = 0; inet_get_ping_group_range_net(net, range, range+1); if (range[0] <= group && group <= range[1]) return 0; + group_info = get_current_groups(); + count = group_info->ngroups; for (i = 0; i < group_info->nblocks; i++) { int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); for (j = 0; j < cp_count; j++) { group = group_info->blocks[i][j]; if (range[0] <= group && group <= range[1]) - return 0; + goto out_release_group; } count -= cp_count; } - return -EACCES; + ret = -EACCES; + +out_release_group: + put_group_info(group_info); + return ret; } EXPORT_SYMBOL_GPL(ping_init_sock); @@ -764,7 +771,8 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sock_i_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 61714bd5292..415b3a806bb 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -564,7 +564,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, - daddr, saddr, 0, 0); + daddr, saddr, 0, 0, + sock_i_uid(sk)); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b5638545deb..6c58c9238c9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -741,6 +741,7 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) (rt1->rt_mark ^ rt2->rt_mark) | (rt1->rt_key_tos ^ rt2->rt_key_tos) | (rt1->rt_route_iif ^ rt2->rt_route_iif) | + (rt1->rt_uid ^ rt2->rt_uid) | (rt1->rt_oif ^ rt2->rt_oif)) == 0; } @@ -1886,6 +1887,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) fl4.flowi4_oif = rt->dst.dev->ifindex; fl4.flowi4_iif = skb->dev->ifindex; fl4.flowi4_mark = skb->mark; + fl4.flowi4_uid = skb->sk ? sock_i_uid(skb->sk) : 0; rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) @@ -2065,6 +2067,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_uid = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; @@ -2200,6 +2203,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_uid = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; @@ -2383,6 +2387,7 @@ local_input: rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_uid = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; @@ -2587,6 +2592,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; rth->rt_mark = fl4->flowi4_mark; + rth->rt_uid = fl4->flowi4_uid; rth->rt_gateway = fl4->daddr; rth->rt_spec_dst= fl4->saddr; rth->rt_peer_genid = 0; @@ -2838,6 +2844,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) rt_is_output_route(rth) && rth->rt_oif == flp4->flowi4_oif && rth->rt_mark == flp4->flowi4_mark && + rth->rt_uid == flp4->flowi4_uid && !((rth->rt_key_tos ^ flp4->flowi4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && @@ -2917,6 +2924,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; rt->rt_mark = ort->rt_mark; + rt->rt_uid = ort->rt_uid; rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; @@ -3012,6 +3020,9 @@ static int rt_fill_info(struct net *net, if (rt->rt_mark) NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); + if (rt->rt_uid != (uid_t) -1) + NLA_PUT_BE32(skb, RTA_UID, rt->rt_uid); + error = rt->dst.error; if (peer) { inet_peer_refcheck(rt->peer); @@ -3127,6 +3138,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void .flowi4_tos = rtm->rtm_tos, .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, .flowi4_mark = mark, + .flowi4_uid = tb[RTA_UID] ? nla_get_u32(tb[RTA_UID]) : current_uid(), }; rt = ip_route_output_key(net, &fl4); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3bc5c8f7c71..184a40f4564 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -310,6 +310,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; + ireq->ir_mark = inet_request_mark(sk, skb); ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -348,11 +349,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, { struct flowi4 fl4; - flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), + flowi4_init_output(&fl4, 0, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, - ireq->loc_addr, th->source, th->dest); + ireq->loc_addr, th->source, th->dest, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f8651024ea6..2a4b1815172 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -743,6 +743,20 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_ping_group_range, }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv4.sysctl_fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_fwmark_accept", + .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6cdf6a28f6b..0b4a35e03df 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -650,6 +650,12 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; + /* When socket is gone, all binding information is lost. + * routing might fail in this case. No choice here, if we choose to force + * input interface, we will misroute in case of asymmetric route. + */ + if (sk) + arg.bound_dev_if = sk->sk_bound_dev_if; net = dev_net(skb_dst(skb)->dev); ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, @@ -1338,6 +1344,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->rmt_addr = saddr; ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(sk, skb); + ireq->ir_mark = inet_request_mark(sk, skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b5a19340a9..55feb88305f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -928,7 +928,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sock_i_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a0b4c5da8d4..e8ee4279fd2 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -86,6 +86,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.rt.rt_oif = fl4->flowi4_oif; xdst->u.rt.rt_mark = fl4->flowi4_mark; + xdst->u.rt.rt_uid = fl4->flowi4_uid; xdst->u.dst.dev = dev; dev_hold(dev); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1587d0d9295..b10720f2903 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -192,6 +192,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .accept_ra_rt_table = 0, .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, @@ -226,6 +227,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .accept_ra_rt_table = 0, .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, @@ -1680,6 +1682,31 @@ static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad } #endif +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table) { + /* Determines into what table to put autoconf PIO/RIO/default routes + * learned on this device. + * + * - If 0, use the same table for every device. This puts routes into + * one of RT_TABLE_{PREFIX,INFO,DFLT} depending on the type of route + * (but note that these three are currently all equal to + * RT6_TABLE_MAIN). + * - If > 0, use the specified table. + * - If < 0, put routes into table dev->ifindex + (-rt_table). + */ + struct inet6_dev *idev = in6_dev_get(dev); + u32 table; + int sysctl = idev->cnf.accept_ra_rt_table; + if (sysctl == 0) { + table = default_table; + } else if (sysctl > 0) { + table = (u32) sysctl; + } else { + table = (unsigned) dev->ifindex + (-sysctl); + } + in6_dev_put(idev); + return table; +} + /* * Add prefix route. */ @@ -1689,7 +1716,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, u32 flags) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_PREFIX, + .fc_table = addrconf_rt_table(dev, RT6_TABLE_PREFIX), .fc_metric = IP6_RT_PRIO_ADDRCONF, .fc_ifindex = dev->ifindex, .fc_expires = expires, @@ -3860,6 +3887,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif + array[DEVCONF_ACCEPT_RA_RT_TABLE] = cnf->accept_ra_rt_table; array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -4471,6 +4499,13 @@ static struct addrconf_sysctl_table #endif #endif { + .procname = "accept_ra_rt_table", + .data = &ipv6_devconf.accept_ra_rt_table, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "proxy_ndp", .data = &ipv6_devconf.proxy_ndp, .maxlen = sizeof(int), diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4be7f253a5f..d70560bd2e5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -702,6 +702,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b46e9f88ce3..c880af549e6 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -160,6 +160,7 @@ ipv4_connected: fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6a12eda8761..a83b79611aa 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -396,6 +396,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int len; int hlimit; int err = 0; + u32 mark = IP6_REPLY_MARK(net, skb->mark); if ((u8 *)hdr < skb->head || (skb->network_header + sizeof(*hdr)) > skb->tail) @@ -461,6 +462,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) ipv6_addr_copy(&fl6.daddr, &hdr->saddr); if (saddr) ipv6_addr_copy(&fl6.saddr, saddr); + fl6.flowi6_mark = mark; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; @@ -469,6 +471,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -543,6 +546,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; int err = 0; int hlimit; + u32 mark = IP6_REPLY_MARK(net, skb->mark); saddr = &ipv6_hdr(skb)->daddr; @@ -559,11 +563,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipv6_addr_copy(&fl6.saddr, saddr); fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 8a58e8cf664..219023f1839 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -69,9 +69,10 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, final_p = fl6_update_dst(&fl6, np->opt, &final); ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); @@ -222,6 +223,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_sport = inet->inet_sport; fl6.fl6_dport = inet->inet_dport; + fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index bc30c4b4e48..0082212c3d7 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -158,8 +158,10 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.saddr = np->saddr; fl6.daddr = *daddr; + fl6.flowi6_mark = sk->sk_mark; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; + fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 343852e5c70..913830a4018 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -758,6 +758,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6f60d8b64a1..113a70ff709 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -89,13 +89,13 @@ static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct rt6_info *rt6_add_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, unsigned pref); -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct rt6_info *rt6_get_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex); + const struct in6_addr *gwaddr); #endif static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) @@ -547,7 +547,6 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr) { - struct net *net = dev_net(dev); struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; @@ -589,8 +588,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, - dev->ifindex); + rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr); if (rt && !lifetime) { ip6_del_rt(rt); @@ -598,8 +596,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (!rt && lifetime) - rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, - pref); + rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); @@ -1791,15 +1788,16 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, } #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct rt6_info *rt6_get_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex) + const struct in6_addr *gwaddr) { struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(net, RT6_TABLE_INFO); + table = fib6_get_table(dev_net(dev), + addrconf_rt_table(dev, RT6_TABLE_INFO)); if (table == NULL) return NULL; @@ -1809,7 +1807,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, goto out; for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_dev->ifindex != ifindex) + if (rt->rt6i_dev->ifindex != dev->ifindex) continue; if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; @@ -1823,21 +1821,21 @@ out: return rt; } -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct rt6_info *rt6_add_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, unsigned pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_INFO, + .fc_table = addrconf_rt_table(dev, RT6_TABLE_INFO), .fc_metric = IP6_RT_PRIO_USER, - .fc_ifindex = ifindex, + .fc_ifindex = dev->ifindex, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), .fc_nlinfo.pid = 0, .fc_nlinfo.nlh = NULL, - .fc_nlinfo.nl_net = net, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_copy(&cfg.fc_dst, prefix); @@ -1849,7 +1847,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, ip6_route_add(&cfg); - return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); + return rt6_get_route_info(dev, prefix, prefixlen, gwaddr); } #endif @@ -1858,7 +1856,8 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), + addrconf_rt_table(dev, RT6_TABLE_DFLT)); if (table == NULL) return NULL; @@ -1880,7 +1879,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, unsigned int pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_DFLT, + .fc_table = addrconf_rt_table(dev, RT6_TABLE_DFLT), .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | @@ -1897,28 +1896,17 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, return rt6_get_dflt_router(gwaddr, dev); } -void rt6_purge_dflt_routers(struct net *net) -{ - struct rt6_info *rt; - struct fib6_table *table; - /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(net, RT6_TABLE_DFLT); - if (table == NULL) - return; +int rt6_addrconf_purge(struct rt6_info *rt, void *arg) { + if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) + return -1; + return 0; +} -restart: - read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && - (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - ip6_del_rt(rt); - goto restart; - } - } - read_unlock_bh(&table->tb6_lock); +void rt6_purge_dflt_routers(struct net *net) +{ + fib6_clean_all(net, rt6_addrconf_purge, 0, NULL); } static void rtmsg_to_fib6_config(struct net *net, @@ -2221,6 +2209,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2520,6 +2509,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (tb[RTA_OIF]) fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_UID]) + fl6.flowi6_uid = nla_get_u32(tb[RTA_UID]); + else + fl6.flowi6_uid = (iif ? (uid_t) -1 : current_uid()); + if (iif) { struct net_device *dev; dev = __dev_get_by_index(net, iif); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ac838965ff3..a6a636d6a0e 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -215,6 +215,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq6->iif = inet6_iif(skb); + ireq->ir_mark = inet_request_mark(sk, skb); + req->expires = 0UL; req->retrans = 0; ireq->ecn_ok = ecn_ok; @@ -241,9 +243,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) final_p = fl6_update_dst(&fl6, np->opt, &final); ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 6dcf5e7d661..4c27009d39e 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -47,6 +47,13 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv6.sysctl.fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cdbce216521..0c08b0bbca6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -251,6 +251,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); final_p = fl6_update_dst(&fl6, np->opt, &final); @@ -404,6 +405,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); @@ -493,9 +495,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); fl6.flowlabel = 0; fl6.flowi6_oif = treq->iif; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); opt = np->opt; @@ -1046,7 +1049,9 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; - fl6.flowi6_oif = inet6_iif(skb); + if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = inet6_iif(skb); + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); @@ -1252,6 +1257,14 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, tcp_hdr(skb)); + treq->iif = sk->sk_bound_dev_if; + inet_rsk(req)->ir_mark = inet_request_mark(sk, skb); + + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) + treq->iif = inet6_iif(skb); + if (!isn) { struct inet_peer *peer = NULL; @@ -1261,12 +1274,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) atomic_inc(&skb->users); treq->pktopts = skb; } - treq->iif = sk->sk_bound_dev_if; - - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = inet6_iif(skb); if (want_cookie) { isn = cookie_v6_init_sequence(sk, skb, &req->mss); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bb95e8e1c6f..b9bc3ca4995 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1084,6 +1084,7 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); if (msg->msg_controllen) { opt = &opt_space; diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 542af525a87..c6a76a80ee5 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -42,6 +42,11 @@ #include <linux/skbuff.h> #include <linux/workqueue.h> #include <linux/sysfs.h> +#include <linux/rtc.h> +#include <linux/time.h> +#include <linux/math64.h> +#include <linux/suspend.h> +#include <linux/notifier.h> #include <net/net_namespace.h> struct idletimer_tg_attr { @@ -58,22 +63,65 @@ struct idletimer_tg { struct kobject *kobj; struct idletimer_tg_attr attr; + struct timespec delayed_timer_trigger; + struct timespec last_modified_timer; + struct timespec last_suspend_time; + struct notifier_block pm_nb; + + int timeout; unsigned int refcnt; + bool work_pending; bool send_nl_msg; bool active; }; static LIST_HEAD(idletimer_tg_list); static DEFINE_MUTEX(list_mutex); +static DEFINE_SPINLOCK(timestamp_lock); static struct kobject *idletimer_tg_kobj; +static bool check_for_delayed_trigger(struct idletimer_tg *timer, + struct timespec *ts) +{ + bool state; + struct timespec temp; + spin_lock_bh(×tamp_lock); + timer->work_pending = false; + if ((ts->tv_sec - timer->last_modified_timer.tv_sec) > timer->timeout || + timer->delayed_timer_trigger.tv_sec != 0) { + state = false; + temp.tv_sec = timer->timeout; + temp.tv_nsec = 0; + if (timer->delayed_timer_trigger.tv_sec != 0) { + temp = timespec_add(timer->delayed_timer_trigger, temp); + ts->tv_sec = temp.tv_sec; + ts->tv_nsec = temp.tv_nsec; + timer->delayed_timer_trigger.tv_sec = 0; + timer->work_pending = true; + schedule_work(&timer->work); + } else { + temp = timespec_add(timer->last_modified_timer, temp); + ts->tv_sec = temp.tv_sec; + ts->tv_nsec = temp.tv_nsec; + } + } else { + state = timer->active; + } + spin_unlock_bh(×tamp_lock); + return state; +} + static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) { char iface_msg[NLMSG_MAX_SIZE]; char state_msg[NLMSG_MAX_SIZE]; - char *envp[] = { iface_msg, state_msg, NULL }; + char timestamp_msg[NLMSG_MAX_SIZE]; + char *envp[] = { iface_msg, state_msg, timestamp_msg, NULL }; int res; + struct timespec ts; + uint64_t time_ns; + bool state; res = snprintf(iface_msg, NLMSG_MAX_SIZE, "INTERFACE=%s", iface); @@ -81,12 +129,24 @@ static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) pr_err("message too long (%d)", res); return; } + + get_monotonic_boottime(&ts); + state = check_for_delayed_trigger(timer, &ts); res = snprintf(state_msg, NLMSG_MAX_SIZE, "STATE=%s", - timer->active ? "active" : "inactive"); + state ? "active" : "inactive"); + if (NLMSG_MAX_SIZE <= res) { pr_err("message too long (%d)", res); return; } + + time_ns = timespec_to_ns(&ts); + res = snprintf(timestamp_msg, NLMSG_MAX_SIZE, "TIME_NS=%llu", time_ns); + if (NLMSG_MAX_SIZE <= res) { + timestamp_msg[0] = '\0'; + pr_err("message too long (%d)", res); + } + pr_debug("putting nlmsg: <%s> <%s>\n", iface_msg, state_msg); kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp); return; @@ -151,9 +211,55 @@ static void idletimer_tg_expired(unsigned long data) struct idletimer_tg *timer = (struct idletimer_tg *) data; pr_debug("timer %s expired\n", timer->attr.attr.name); - + spin_lock_bh(×tamp_lock); timer->active = false; + timer->work_pending = true; schedule_work(&timer->work); + spin_unlock_bh(×tamp_lock); +} + +static int idletimer_resume(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + struct timespec ts; + unsigned long time_diff, now = jiffies; + struct idletimer_tg *timer = container_of(notifier, + struct idletimer_tg, pm_nb); + if (!timer) + return NOTIFY_DONE; + switch (pm_event) { + case PM_SUSPEND_PREPARE: + get_monotonic_boottime(&timer->last_suspend_time); + break; + case PM_POST_SUSPEND: + spin_lock_bh(×tamp_lock); + if (!timer->active) { + spin_unlock_bh(×tamp_lock); + break; + } + /* since jiffies are not updated when suspended now represents + * the time it would have suspended */ + if (time_after(timer->timer.expires, now)) { + get_monotonic_boottime(&ts); + ts = timespec_sub(ts, timer->last_suspend_time); + time_diff = timespec_to_jiffies(&ts); + if (timer->timer.expires > (time_diff + now)) { + mod_timer_pending(&timer->timer, + (timer->timer.expires - time_diff)); + } else { + del_timer(&timer->timer); + timer->timer.expires = 0; + timer->active = false; + timer->work_pending = true; + schedule_work(&timer->work); + } + } + spin_unlock_bh(×tamp_lock); + break; + default: + break; + } + return NOTIFY_DONE; } static int idletimer_tg_create(struct idletimer_tg_info *info) @@ -189,6 +295,18 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) info->timer->refcnt = 1; info->timer->send_nl_msg = (info->send_nl_msg == 0) ? false : true; info->timer->active = true; + info->timer->timeout = info->timeout; + + info->timer->delayed_timer_trigger.tv_sec = 0; + info->timer->delayed_timer_trigger.tv_nsec = 0; + info->timer->work_pending = false; + get_monotonic_boottime(&info->timer->last_modified_timer); + + info->timer->pm_nb.notifier_call = idletimer_resume; + ret = register_pm_notifier(&info->timer->pm_nb); + if (ret) + printk(KERN_WARNING "[%s] Failed to register pm notifier %d\n", + __func__, ret); mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); @@ -205,6 +323,34 @@ out: return ret; } +static void reset_timer(const struct idletimer_tg_info *info) +{ + unsigned long now = jiffies; + struct idletimer_tg *timer = info->timer; + bool timer_prev; + + spin_lock_bh(×tamp_lock); + timer_prev = timer->active; + timer->active = true; + /* timer_prev is used to guard overflow problem in time_before*/ + if (!timer_prev || time_before(timer->timer.expires, now)) { + pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", + timer->timer.expires, now); + /* checks if there is a pending inactive notification*/ + if (timer->work_pending) + timer->delayed_timer_trigger = timer->last_modified_timer; + else { + timer->work_pending = true; + schedule_work(&timer->work); + } + } + + get_monotonic_boottime(&timer->last_modified_timer); + mod_timer(&timer->timer, + msecs_to_jiffies(info->timeout * 1000) + now); + spin_unlock_bh(×tamp_lock); +} + /* * The actual xt_tables plugin. */ @@ -228,9 +374,7 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb, } /* TODO: Avoid modifying timers on each packet */ - mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + now); - + reset_timer(info); return XT_CONTINUE; } @@ -238,7 +382,6 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) { struct idletimer_tg_info *info = par->targinfo; int ret; - unsigned long now = jiffies; pr_debug("checkentry targinfo %s\n", info->label); @@ -259,17 +402,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { info->timer->refcnt++; - info->timer->active = true; - - if (time_before(info->timer->timer.expires, now)) { - schedule_work(&info->timer->work); - pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", - info->timer->timer.expires, now); - } - - mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + now); - + reset_timer(info); pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); } else { @@ -300,6 +433,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) list_del(&info->timer->entry); del_timer_sync(&info->timer->timer); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); + unregister_pm_notifier(&info->timer->pm_nb); kfree(info->timer->attr.attr.name); kfree(info->timer); } else { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0a4db0211da..86d7a7afb9d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1339,7 +1339,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_pid = addr->nl_pid; dst_group = ffs(addr->nl_groups); err = -EPERM; - if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) + if ((dst_group || dst_pid) && + !netlink_capable(sock, NL_NONROOT_SEND)) goto out; } else { dst_pid = nlk->dst_pid; @@ -2102,6 +2103,7 @@ static void __init netlink_add_usersock_entry(void) rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); nl_table[NETLINK_USERSOCK].module = THIS_MODULE; nl_table[NETLINK_USERSOCK].registered = 1; + nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND; netlink_table_ungrab(); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6000792a055..9128cdc0071 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -423,6 +423,13 @@ static int sb_finish_set_opts(struct super_block *sb) if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) sbsec->flags |= SE_SBLABELSUPP; + /* + * Special handling for rootfs. Is genfs but supports + * setting SELinux context on in-core inodes. + */ + if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0) + sbsec->flags |= SE_SBLABELSUPP; + /* Initialize the root inode. */ rc = inode_doinit_with_dentry(root_inode, root); |
